#! /usr/bin/env python3 # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020 Microsoft Corporation """Script to query and setup huge pages for DPDK applications.""" import argparse import glob import os import re import sys from math import log2 # Standard binary prefix BINARY_PREFIX = "KMG" # systemd mount point for huge pages HUGE_MOUNT = "/dev/hugepages" def fmt_memsize(kb): '''Format memory size in kB into conventional format''' logk = int(log2(kb) / 10) suffix = BINARY_PREFIX[logk] unit = 2**(logk * 10) return '{}{}b'.format(int(kb / unit), suffix) def get_memsize(arg): '''Convert memory size with suffix to kB''' match = re.match(r'(\d+)([' + BINARY_PREFIX + r']?)$', arg.upper()) if match is None: sys.exit('{} is not a valid page size'.format(arg)) num = float(match.group(1)) suffix = match.group(2) if suffix == "": return int(num / 1024) idx = BINARY_PREFIX.find(suffix) return int(num * (2**(idx * 10))) def is_numa(): '''Test if NUMA is necessary on this system''' return os.path.exists('/sys/devices/system/node') def get_hugepages(path): '''Read number of reserved pages''' with open(path + '/nr_hugepages') as nr_hugepages: return int(nr_hugepages.read()) return 0 def set_hugepages(path, pages): '''Write the number of reserved huge pages''' filename = path + '/nr_hugepages' try: with open(filename, 'w') as nr_hugepages: nr_hugepages.write('{}\n'.format(pages)) except PermissionError: sys.exit('Permission denied: need to be root!') except FileNotFoundError: filename = os.path.basename(path) size = filename[10:] sys.exit('{} is not a valid system huge page size'.format(size)) def show_numa_pages(): '''Show huge page reservations on Numa system''' print('Node Pages Size Total') for numa_path in glob.glob('/sys/devices/system/node/node*'): node = numa_path[29:] # slice after /sys/devices/system/node/node path = numa_path + '/hugepages' for hdir in os.listdir(path): pages = get_hugepages(path + '/' + hdir) if pages > 0: kb = int(hdir[10:-2]) # slice out of hugepages-NNNkB print('{:<4} {:<5} {:<6} {}'.format(node, pages, fmt_memsize(kb), fmt_memsize(pages * kb))) def show_non_numa_pages(): '''Show huge page reservations on non Numa system''' print('Pages Size Total') path = '/sys/kernel/mm/hugepages' for hdir in os.listdir(path): pages = get_hugepages(path + '/' + hdir) if pages > 0: kb = int(hdir[10:-2]) print('{:<5} {:<6} {}'.format(pages, fmt_memsize(kb), fmt_memsize(pages * kb))) def show_pages(): '''Show existing huge page settings''' if is_numa(): show_numa_pages() else: show_non_numa_pages() def clear_pages(): '''Clear all existing huge page mappings''' if is_numa(): dirs = glob.glob( '/sys/devices/system/node/node*/hugepages/hugepages-*') else: dirs = glob.glob('/sys/kernel/mm/hugepages/hugepages-*') for path in dirs: set_hugepages(path, 0) def default_pagesize(): '''Get default huge page size from /proc/meminfo''' with open('/proc/meminfo') as meminfo: for line in meminfo: if line.startswith('Hugepagesize:'): return int(line.split()[1]) return None def set_numa_pages(pages, hugepgsz, node=None): '''Set huge page reservation on Numa system''' if node: nodes = ['/sys/devices/system/node/node{}/hugepages'.format(node)] else: nodes = glob.glob('/sys/devices/system/node/node*/hugepages') for node_path in nodes: huge_path = '{}/hugepages-{}kB'.format(node_path, hugepgsz) set_hugepages(huge_path, pages) def set_non_numa_pages(pages, hugepgsz): '''Set huge page reservation on non Numa system''' path = '/sys/kernel/mm/hugepages/hugepages-{}kB'.format(hugepgsz) set_hugepages(path, pages) def reserve_pages(pages, hugepgsz, node=None): '''Set the number of huge pages to be reserved''' if node or is_numa(): set_numa_pages(pages, hugepgsz, node=node) else: set_non_numa_pages(pages, hugepgsz) def get_mountpoints(): '''Get list of where hugepage filesystem is mounted''' mounted = [] with open('/proc/mounts') as mounts: for line in mounts: fields = line.split() if fields[2] != 'hugetlbfs': continue mounted.append(fields[1]) return mounted def mount_huge(pagesize, mountpoint): '''Mount the huge TLB file system''' if mountpoint in get_mountpoints(): print(mountpoint, "already mounted") return cmd = "mount -t hugetlbfs" if pagesize: cmd += ' -o pagesize={}'.format(pagesize * 1024) cmd += ' nodev ' + mountpoint os.system(cmd) def umount_huge(mountpoint): '''Unmount the huge TLB file system (if mounted)''' if mountpoint in get_mountpoints(): os.system("umount " + mountpoint) def show_mount(): '''Show where huge page filesystem is mounted''' mounted = get_mountpoints() if mounted: print("Hugepages mounted on", *mounted) else: print("Hugepages not mounted") def main(): '''Process the command line arguments and setup huge pages''' parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="Setup huge pages", epilog=""" Examples: To display current huge page settings: %(prog)s -s To a complete setup of with 2 Gigabyte of 1G huge pages: %(prog)s -p 1G --setup 2G """) parser.add_argument( '--show', '-s', action='store_true', help="print the current huge page configuration") parser.add_argument( '--clear', '-c', action='store_true', help="clear existing huge pages") parser.add_argument( '--mount', '-m', action='store_true', help='mount the huge page filesystem') parser.add_argument( '--unmount', '-u', action='store_true', help='unmount the system huge page directory') parser.add_argument( '--node', '-n', help='select numa node to reserve pages on') parser.add_argument( '--pagesize', '-p', metavar='SIZE', help='choose huge page size to use') parser.add_argument( '--reserve', '-r', metavar='SIZE', help='reserve huge pages. Size is in bytes with K, M, or G suffix') parser.add_argument( '--setup', metavar='SIZE', help='setup huge pages by doing clear, unmount, reserve and mount') args = parser.parse_args() if args.setup: args.clear = True args.unmount = True args.reserve = args.setup args.mount = True if args.pagesize: pagesize_kb = get_memsize(args.pagesize) else: pagesize_kb = default_pagesize() if args.clear: clear_pages() if args.unmount: umount_huge(HUGE_MOUNT) if args.reserve: reserve_kb = get_memsize(args.reserve) if reserve_kb % pagesize_kb != 0: sys.exit( 'Huge reservation {}kB is not a multiple of page size {}kB'. format(reserve_kb, pagesize_kb)) reserve_pages( int(reserve_kb / pagesize_kb), pagesize_kb, node=args.node) if args.mount: mount_huge(pagesize_kb, HUGE_MOUNT) if args.show: show_pages() print() show_mount() if __name__ == "__main__": main()