Skip to content

Commit

Permalink
Performance of kernel generated from comet-py is now on par with mlir…
Browse files Browse the repository at this point in the history
…-cpu-runner (#30).

  By passing the target triple returned by ``llvm-config --host-target`` to llvm opt.

  Also fixed a bug where calling the kernel function in a loop would cause wrong code generation.
  Finally, made integration test use multiprocessing
  • Loading branch information
pthomadakis committed Oct 22, 2023
1 parent 37f8cd9 commit 34a69b5
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 77 deletions.
38 changes: 18 additions & 20 deletions frontends/numpy-scipy/MLIRGen/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ def cleanup():
os.remove(f)
# pass

platform_args = ""
atexit.register(cleanup)
if("macOS" in platform.platform()):
comet_runner_util = "../build/lib/libcomet_runner_utils.dylib"
platform_args = "-isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/ "
elif("Linux" in platform.platform()):
comet_runner_util = "../build/lib/libcomet_runner_utils.so"
else:
Expand Down Expand Up @@ -476,44 +478,41 @@ def translate_and_exec_llvm_with_jit(llvm_in,func_name, inputs, outputs, uuid_s)

translate_mlir_command = "../llvm/build/bin/mlir-translate --mlir-to-llvmir " + llvm_in

p = subprocess.run(shlex.split(translate_mlir_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE,shell=False,close_fds=False)
p = subprocess.run(shlex.split(translate_mlir_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# llvmir_file = 'einsum.ll'
llvmir_file = uuid_s+'.bc'
llvmir_file = uuid_s+'.ll'
llvmir_out = p.stdout
p = subprocess.run(['../llvm/build/bin/llvm-config', '--host-target'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
llvm_cfg_out = p.stdout.decode().strip() # This only needs to run once and should be possible to generate at compile time
# llvmir_out = llvmir_out.decode()

if(os.path.exists(llvmir_file) == False):
f = open(os.path.join( os.getcwd(), llvmir_file), 'wb')
files_to_cleanup.append(os.path.join( os.getcwd(), llvmir_file))
else:
f = open(llvmir_file, 'wb')

llvmir_out = p.stdout
# with open(os.path.join( os.getcwd(),llvmir_file), 'wb') as f:
f.write(llvmir_out)
f.close()
llvmir_opt_file = llvmir_file+'.opt'
llvmir_opt_file = llvmir_file+'.opt.ll'

llvm_opt_command = "../llvm/build/bin/opt --O3 "+llvmir_file+" -S -o "+llvmir_opt_file
p = subprocess.run(shlex.split(llvm_opt_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE,shell=False)
llvm_opt_command = '../llvm/build/bin/opt --O3 --mtriple="' + llvm_cfg_out+'" ' +llvmir_file+' -S -o '+llvmir_opt_file
p = subprocess.run(shlex.split(llvm_opt_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if(p.returncode != 0):
cleanup()
raise AssertionError("opt failed with error code: {}. Error: {}".format(p.returncode, p.stderr))
files_to_cleanup.append(llvmir_opt_file)

llc_obj_file = llvmir_opt_file+".o"
llc_command = "../llvm/build/bin/llc -O3 "+llvmir_opt_file+" -filetype=obj -o " +llc_obj_file
p = subprocess.run(shlex.split(llc_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE,shell=False)
if(p.returncode != 0):
cleanup()
raise AssertionError("llc failed with error code: {}. Error: {}".format(p.returncode, p.stderr))
files_to_cleanup.append(llc_obj_file)
libname = "./lib"+llc_obj_file+func_name+".so"
libname = "./lib"+llvmir_file+func_name+".so"

gcc_command = "gcc --shared " +llc_obj_file+ " -O3 -o "+libname+" -fpic -L ../build/lib/ -Wl,-rpath,../build/lib/ -lcomet_runner_utils"
gcc_command = "../llvm/build/bin/clang -Wno-everything --shared -O3 "+platform_args+ llvmir_opt_file+ " -o "+libname+" -fpic -L ../build/lib/ -Wl,-rpath,../build/lib/ -lcomet_runner_utils"

p = subprocess.run(shlex.split(gcc_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE,shell=False)
p = subprocess.run(shlex.split(gcc_command) , stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if(p.returncode != 0):
cleanup()
raise AssertionError("gcc failed with error code: {}. Error: {}".format(p.returncode, p.stderr))
files_to_cleanup.append(os.path.join( os.getcwd(), libname))

# Load code generated from COMET
lib = ctypes.cdll.LoadLibrary(libname)
Expand Down Expand Up @@ -558,9 +557,9 @@ def translate_and_exec_llvm_with_jit(llvm_in,func_name, inputs, outputs, uuid_s)
out = ret_outputs.pop()
else:
out = ret_outputs
files_to_cleanup.append(os.path.join( os.getcwd(), libname))

return out, llvmir_file

def func_execute(func, args):
func(*(args))

Expand Down Expand Up @@ -706,17 +705,16 @@ def lower_dialect_with_jit(ta_dialect_rep, out_dims, compile_with_flags,func_nam

# Uncomment for debugging pusposes
scf_out_file = lower_ta_to_mlir_with_jit(ta_dialect_file, mlir_lower_flags, args_vals, uuid_s)

# Running --convert-ta-to-it --convert-to-loops and --convert-to-llvm in separate steps
# does not produce correct output. This is an issue with the backend.

#lower the SCF dialect to first STD dialect and then to the llvm dialect

llvm_out_file = lower_scf_to_llvm(scf_out_file, scf_lower_flags, uuid_s)
# llvm_out_file = lower_scf_to_llvm(ta_dialect_file, mlir_lower_flags + scf_lower_flags)

#lower the SCF dialect to the LLVM dialect
#result = execute_llvm(llvm_out_file)

result,llvmir_file = translate_and_exec_llvm_with_jit(llvm_out_file,func_name, args_vals, outputs, uuid_s)

return result
34 changes: 17 additions & 17 deletions frontends/numpy-scipy/comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,25 +58,25 @@ def get_format(A):


class NewVisitor(ast.NodeVisitor):
tsemantics = {}
isemantics = {}
inputs = []
tsymbols = {"comet": None}
isymbols = {}
tcurr = 0
icurr = 0
ops = []
iLabelsToVals = {}
valsToILabels = {}
declarations = []
uniqueLabels = []
in_args = []
returns = []
mask = None
need_opt_comp_workspace = False

def __init__(self,inputs):
self.tsemantics = {}
self.isemantics = {}
self.tsymbols = {"comet": None}
self.isymbols = {}
self.tcurr = 0
self.icurr = 0
self.ops = []
self.iLabelsToVals = {}
self.valsToILabels = {}
self.declarations = []
self.uniqueLabels = []
self.in_args = []
self.returns = []
self.mask = None
self.need_opt_comp_workspace = False

self.inputs = inputs

# Output formats when multiply matrices of different formats
self.sp_matmult_conversions = {
CSR: {
Expand Down
101 changes: 61 additions & 40 deletions frontends/numpy-scipy/integration_tests/numpy_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,79 @@
import glob

import subprocess
import multiprocessing
import os
import sys

categories = ['ops', 'opts', 'kernels', 'compound_exps', 'semiring']
files = []
if not os.path.exists("../llvm/"):
os.symlink("../../llvm", "../llvm")
if not os.path.exists("../build/"):
os.symlink("../../build", "../build")
def run_test_case(test_file):
print("Running", test_file,end= " ")
p = subprocess.run(['python3', test_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if(p.returncode != 0):
ret = "FAILED"
# print(ret)
# print("=======================================================")
# print(test_file)
# failed_tests = failed_tests + 1
# list_failed_tests.append(test_file)
else:
ret = "PASSED"
print(ret)

for c in categories:
if not os.path.exists("./"+c+"/comet.py"):
os.symlink("../../comet.py","./"+c+"/comet.py")
if not os.path.exists("./"+c+"/MLIRGen"):
os.symlink("../../MLIRGen","./"+c+"/MLIRGen")
files = files + glob.glob("./"+c+"/test_*.py")
return (test_file, ret, p.stderr.decode())


print("\nFound" , len(files), "test cases")
if __name__ == '__main__':
categories = ['ops', 'opts', 'kernels', 'compound_exps', 'semiring']
files = []
if not os.path.exists("../llvm/"):
os.symlink("../../llvm", "../llvm")
if not os.path.exists("../build/"):
os.symlink("../../build", "../build")

print("Running the tests......")
for c in categories:
if not os.path.exists("./"+c+"/comet.py"):
os.symlink("../../comet.py","./"+c+"/comet.py")
if not os.path.exists("./"+c+"/MLIRGen"):
os.symlink("../../MLIRGen","./"+c+"/MLIRGen")
files = files + glob.glob("./"+c+"/test_*.py")

failed_tests = 0
list_failed_tests = []
for test_file in files:
print("Running", test_file,end= " ")
test_result = subprocess.call(['python3', test_file])

if(test_result != 0):
print("FAILED")
print("=======================================================")
print(test_file)
failed_tests = failed_tests + 1
list_failed_tests.append(test_file)
else:
print("PASSED")

print("\nFound" , len(files), "test cases")

print("Running the tests with up to {} cores......".format(os.cpu_count()))

failed_tests = 0
list_failed_tests = []

with multiprocessing.Pool() as p:
results = p.map(run_test_case, files)
for res in results:
if(res[1] == "FAILED"):
list_failed_tests.append((res[0],res[2]))

print("**********************************************\n")
print("Passed = ", len(files) - failed_tests)
print("Passed = ", len(files) - len(list_failed_tests))

print("Failed = " , failed_tests)
print("Failed = " , len(list_failed_tests))

if(list_failed_tests):
print("Following tests failed:")
if(list_failed_tests):
print("The following tests failed:")

for failed_test in list_failed_tests:
print(' ' , failed_test)
for failed_test in list_failed_tests:
print(' ' , failed_test[0])

if len(sys.argv) == 2:
if sys.argv[1] == '-v':
print()
print("Error messages of failed tests:")
for failed_test in list_failed_tests:
print(' ' , failed_test[0])
print('='*40)
print(failed_test[1])
print('*'*40)

for c in categories:
os.unlink("./"+c+"/comet.py")
os.unlink("./"+c+"/MLIRGen")
for c in categories:
os.unlink("./"+c+"/comet.py")
os.unlink("./"+c+"/MLIRGen")

os.unlink("../llvm")
os.unlink("../build")
os.unlink("../llvm")
os.unlink("../build")

0 comments on commit 34a69b5

Please sign in to comment.