|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# SPDX-License-Identifier: BSD-3-Clause |
| 4 | +# Copyright 2024, Intel Corporation |
| 5 | + |
| 6 | +import argparse |
| 7 | +import json |
| 8 | +import pandas as pd |
| 9 | +import subprocess |
| 10 | + |
| 11 | + |
| 12 | +PARSER = argparse.ArgumentParser() |
| 13 | +PARSER.add_argument('--reference', metavar='LD_LIBRARY_PATH', required=True, |
| 14 | + help='LD_LIBRARY_PATH where the first version of PMDK is built') |
| 15 | +PARSER.add_argument('--rival', metavar='LD_LIBRARY_PATH', required=True, |
| 16 | + help='LD_LIBRARY_PATH where the second version of PMDK is built') |
| 17 | +PARSER.add_argument('-c', '--config', required=True, |
| 18 | + help='Name of the .cfg file to use') |
| 19 | +PARSER.add_argument('-s', '--scenario', required=True, |
| 20 | + help='Name of the scenario to run') |
| 21 | +PARSER.add_argument('-p', '--pmem_path', required=True, |
| 22 | + help='PMEM-mounted directory to use') |
| 23 | + |
| 24 | + |
| 25 | +COLUMNS_COMBINE = [ |
| 26 | + 'ops-per-second[1/sec]', |
| 27 | + 'total-avg[sec]', |
| 28 | + 'total-max[sec]', |
| 29 | + 'total-min[sec]', |
| 30 | + 'total-median[sec]', |
| 31 | + 'total-std-dev[sec]', |
| 32 | + 'latency-avg[nsec]', |
| 33 | + 'latency-min[nsec]', |
| 34 | + 'latency-max[nsec]', |
| 35 | + 'latency-std-dev[nsec]', |
| 36 | + 'latency-pctl-50.0%[nsec]', |
| 37 | + 'latency-pctl-99.0%[nsec]', |
| 38 | + 'latency-pctl-99.9%[nsec]', |
| 39 | +] |
| 40 | + |
| 41 | + |
| 42 | +COLUMNS_COPY = [ |
| 43 | + 'threads', |
| 44 | + 'ops-per-thread', |
| 45 | + 'data-size', |
| 46 | + 'seed', |
| 47 | + 'repeats', |
| 48 | + 'thread-affinity', |
| 49 | + 'main-affinity', |
| 50 | + 'min-exe-time', |
| 51 | + 'random', |
| 52 | + 'min-size', |
| 53 | + 'type-number', |
| 54 | + 'operation', |
| 55 | + 'lib', |
| 56 | + 'nestings', |
| 57 | + 'type', |
| 58 | + 'max-key', |
| 59 | + 'external-tx', |
| 60 | + 'alloc', |
| 61 | +] |
| 62 | + |
| 63 | + |
| 64 | +def get_numa_node(pmem_path: str) -> int: |
| 65 | + """Find the NUMA node of the device mounted at the specified path""" |
| 66 | + # no slash at the end of the path |
| 67 | + if pmem_path[-1] == '/': |
| 68 | + pmem_path = pmem_path[0:-1] |
| 69 | + # find the line describing the mount point |
| 70 | + mount = subprocess.getoutput('mount') |
| 71 | + mount_line = None |
| 72 | + for line in mount.splitlines(): |
| 73 | + if pmem_path in line: |
| 74 | + mount_line = line |
| 75 | + break |
| 76 | + if mount_line is None: |
| 77 | + print(mount) |
| 78 | + raise Exception(f'Can not find the mounted PMEM device for: {pmem_path}') |
| 79 | + # Extract blockdev from the found line e.g. |
| 80 | + # /dev/pmem1 on /mnt/pmem1 type ext4 (rw,noatime,seclabel,nodelalloc,dax=always) |
| 81 | + # - Linux device is the first part of the line e.g. /dev/pmem1 |
| 82 | + # - blockdev is the name of the device file e.g. pmem1 |
| 83 | + blockdev = mount_line.split(' ')[0].split('/')[2] |
| 84 | + # Find the NDCTL namespace of the blockdev and extract its NUMA node |
| 85 | + numa_node = None |
| 86 | + namespaces = json.loads(subprocess.getoutput('ndctl list -v')) |
| 87 | + for namespace in namespaces: |
| 88 | + if namespace['mode'] == 'fsdax' and namespace['blockdev'] == blockdev: |
| 89 | + numa_node = namespace['numa_node'] |
| 90 | + break |
| 91 | + if numa_node is None: |
| 92 | + print(namespaces) |
| 93 | + raise Exception(f'Can not find the namespace: mode == "fsdax" and blockdev == "{blockdev}"') |
| 94 | + return numa_node |
| 95 | + |
| 96 | + |
| 97 | +IDX_TO_NAME = ['ref', 'riv'] |
| 98 | + |
| 99 | + |
| 100 | +def output_name(args: argparse.Namespace, idx: int) -> str: |
| 101 | + """Generate a file name for an output ref or riv""" |
| 102 | + return f'{args.config}__{args.scenario}_{IDX_TO_NAME[idx]}.csv' |
| 103 | + |
| 104 | + |
| 105 | +def run(numa_node: int, args: argparse.Namespace, idx: int, ld_library_path: str) -> None: |
| 106 | + """Run PMEMBENCH according to the provided parameters""" |
| 107 | + config = f'src/benchmarks/{args.config}.cfg' |
| 108 | + file = f'{args.pmem_path}/testfile.obj' |
| 109 | + cmd = f'numactl --cpunodebind {numa_node} --localalloc ./src/benchmarks/pmembench {config} {args.scenario} --file {file}' |
| 110 | + env = {'LD_LIBRARY_PATH': ld_library_path} |
| 111 | + result = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, |
| 112 | + stderr=subprocess.PIPE, encoding='utf-8', shell=True) |
| 113 | + # generate the file with the output |
| 114 | + if result.returncode == 0: |
| 115 | + # drop the first line e.g. obj_rbtree_map_insert: map_insert [1] |
| 116 | + out = ''.join(result.stdout.splitlines(keepends=True)[1:]) |
| 117 | + else: |
| 118 | + out = result.stdout |
| 119 | + with open(output_name(args, idx), 'w') as output: |
| 120 | + output.write(out) |
| 121 | + # validate the run |
| 122 | + if result.returncode != 0: |
| 123 | + print(result.stdout) |
| 124 | + print(result.stderr) |
| 125 | + exit(result.returncode) |
| 126 | + |
| 127 | + |
| 128 | +def column_name(column: str, idx: int) -> str: |
| 129 | + """Generate a column name with ref or rev infix""" |
| 130 | + return f'-{IDX_TO_NAME[idx]}['.join(column.split('[')) |
| 131 | + |
| 132 | + |
| 133 | +def combine(args: argparse.Namespace) -> None: |
| 134 | + """" |
| 135 | + Combine outputs from the reference and rival runs. |
| 136 | +
|
| 137 | + Output data files: |
| 138 | + - combined - contains data from ref and riv together with a normalized |
| 139 | + difference between them. |
| 140 | + - diff - just a normalized difference between ref and riv |
| 141 | + """ |
| 142 | + dfs = [pd.read_csv(output_name(args, idx), sep=';') for idx in range(2)] |
| 143 | + combined = pd.DataFrame() |
| 144 | + diff = pd.DataFrame() |
| 145 | + for column in COLUMNS_COMBINE: |
| 146 | + # Copy columns to combine from both ref and riv |
| 147 | + for idx in range(2): |
| 148 | + combined[column_name(column, idx)] = dfs[idx][column] |
| 149 | + diff_column = f'{column}-diff' |
| 150 | + # Normalized difference between ref and riv: |
| 151 | + # diff = (riv - ref) / ref |
| 152 | + # Both output data frames contains diff columns. |
| 153 | + combined[diff_column] = (dfs[1][column] / dfs[0][column] - 1) |
| 154 | + diff[diff_column] = combined[diff_column] |
| 155 | + for column in COLUMNS_COPY: |
| 156 | + if column in dfs[0].columns: |
| 157 | + # These columns are identical in both data frames |
| 158 | + # so they can be copied from either data frame ref or riv. |
| 159 | + combined[column] = dfs[0][column] |
| 160 | + diff[column] = dfs[0][column] |
| 161 | + # Write the generated data frames to CSV files. |
| 162 | + prefix = f'{args.config}__{args.scenario}' |
| 163 | + combined.to_csv(f'{prefix}_combined.csv', index=False, float_format='%.3f') |
| 164 | + diff.to_csv(f'{prefix}_diff.csv', index=False, float_format='%.3f') |
| 165 | + |
| 166 | + |
| 167 | +def main(): |
| 168 | + args = PARSER.parse_args() |
| 169 | + numa_node = get_numa_node(args.pmem_path) |
| 170 | + for idx, ld_library_path in enumerate([args.reference, args.rival]): |
| 171 | + run(numa_node, args, idx, ld_library_path) |
| 172 | + combine(args) |
| 173 | + |
| 174 | + |
| 175 | +if __name__ == '__main__': |
| 176 | + main() |
0 commit comments