diff --git a/src/runtime/hexagon_remote/Makefile b/src/runtime/hexagon_remote/Makefile index 0cb0bff13157..52672135d1bf 100644 --- a/src/runtime/hexagon_remote/Makefile +++ b/src/runtime/hexagon_remote/Makefile @@ -106,6 +106,10 @@ bin/%/host_malloc.o: host_malloc.cpp mkdir -p $(@D) $(CXX-$*) $(CCFLAGS-$*) -fPIC -c host_malloc.cpp -o $@ +bin/%/host_shim.o: host_shim.cpp + mkdir -p $(@D) + $(CXX-$*) $(CCFLAGS-$*) -fPIC -c host_shim.cpp -o $@ + bin/%/nearbyint.o: nearbyint.cpp mkdir -p $(@D) $(CXX-$*) $(CCFLAGS-$*) -fPIC -c nearbyint.cpp -o $@ @@ -129,7 +133,7 @@ bin/%/signed_by_debug/libhalide_hexagon_remote_skel.so: bin/%/libhalide_hexagon_ mkdir -p $(@D) python $(HEXAGON_ELFSIGNER) --no_disclaimer -i $^ -o `dirname $@` -bin/%/libhalide_hexagon_host.so: bin/src/halide_hexagon_remote_stub.c bin/%/host_malloc.o +bin/%/libhalide_hexagon_host.so: bin/src/halide_hexagon_remote_stub.c bin/%/host_malloc.o bin/%/host_shim.o mkdir -p $(@D) $(CC-$*) $^ $(CCFLAGS-$*) -Wl,-soname,libhalide_hexagon_host.so -shared -o $@ diff --git a/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so b/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so index 446bda3c494f..dc83b23b670a 100755 Binary files a/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so and b/src/runtime/hexagon_remote/bin/arm-32-android/libhalide_hexagon_host.so differ diff --git a/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so b/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so index 7fcf61e0e724..b3395951ef19 100755 Binary files a/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so and b/src/runtime/hexagon_remote/bin/arm-64-android/libhalide_hexagon_host.so differ diff --git a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote.h b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote.h index 0b8d7840166b..9fbdbfd1ba75 100644 --- a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote.h +++ b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote.h @@ -1,5 +1,6 @@ #ifndef _HALIDE_HEXAGON_REMOTE_H #define _HALIDE_HEXAGON_REMOTE_H +#include "AEEStdDef.h" #ifndef __QAIC_HEADER #define __QAIC_HEADER(ff) ff #endif //__QAIC_HEADER @@ -33,11 +34,12 @@ struct _halide_hexagon_remote_buffer__seq_octet { int dataLen; }; typedef unsigned int halide_hexagon_remote_handle_t; +typedef uint64 halide_hexagon_remote_scalar_t; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_initialize_kernels_v3)(const unsigned char* code, int codeLen, halide_hexagon_remote_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_get_symbol_v4)(halide_hexagon_remote_handle_t module_ptr, const char* name, int nameLen, halide_hexagon_remote_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_power_hvx_on)(void) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_power_hvx_off)(void) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_run)(halide_hexagon_remote_handle_t module_ptr, halide_hexagon_remote_handle_t symbol, const halide_hexagon_remote_buffer* input_buffers, int input_buffersLen, halide_hexagon_remote_buffer* output_buffers, int output_buffersLen, const halide_hexagon_remote_buffer* input_scalars, int input_scalarsLen) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_run_v2)(halide_hexagon_remote_handle_t module_ptr, halide_hexagon_remote_handle_t symbol, const halide_hexagon_remote_buffer* input_buffers, int input_buffersLen, halide_hexagon_remote_buffer* output_buffers, int output_buffersLen, const halide_hexagon_remote_scalar_t* scalars, int scalarsLen) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_release_kernels_v2)(halide_hexagon_remote_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_poll_log)(char* log, int logLen, int* read_size) __QAIC_HEADER_ATTRIBUTE; __QAIC_HEADER_EXPORT int __QAIC_HEADER(halide_hexagon_remote_poll_profiler_state)(int* func, int* threads) __QAIC_HEADER_ATTRIBUTE; diff --git a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_skel.c b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_skel.c index 9e0652f45279..fac7a14a5293 100644 --- a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_skel.c +++ b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_skel.c @@ -418,15 +418,15 @@ struct Interface { #define __QAIC_SLIM_EXPORT #endif -static const Type types[2]; +static const Type types[3]; static const SequenceType sequenceTypes[1] = {{&(types[1]),0x0,0x4,0x4,0x0}}; -static const Type types[2] = {{0x1,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x1},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8)}}; -static const Parameter parameters[8] = {{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),3,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,0,0}}; -static const Parameter* const parameterArrays[20] = {(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[7])),(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[3])),(&(parameters[4])),(&(parameters[3])),(&(parameters[2])),(&(parameters[0])),(&(parameters[1])),(&(parameters[6])),(&(parameters[6])),(&(parameters[5])),(&(parameters[6]))}; +static const Type types[3] = {{0x1,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x1},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8)},{0x8,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x8}}; +static const Parameter parameters[9] = {{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),3,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[2]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,0,0}}; +static const Parameter* const parameterArrays[20] = {(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[8])),(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[3])),(&(parameters[4])),(&(parameters[5])),(&(parameters[2])),(&(parameters[0])),(&(parameters[1])),(&(parameters[7])),(&(parameters[7])),(&(parameters[6])),(&(parameters[7]))}; static const Method methods[9] = {{REMOTE_SCALARS_MAKEX(0,0,0x2,0x1,0x0,0x0),0x4,0x4,3,2,(&(parameterArrays[14])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x2,0x1,0x0,0x0),0x8,0x4,4,3,(&(parameterArrays[13])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x0,0x0,0x0,0x0),0x0,0x0,0,0,0,0x0,0x0},{REMOTE_SCALARS_MAKEX(0,0,255,255,15,15),0x14,0x0,9,5,(&(parameterArrays[8])),0x4,0x1},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x4,0x0,1,1,(&(parameterArrays[1])),0x4,0x0},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x2,0x0,0x0),0x4,0x4,4,2,(&(parameterArrays[18])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x0,0x1,0x0,0x0),0x0,0x8,2,2,(&(parameterArrays[16])),0x1,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x4,0x0,1,1,(&(parameterArrays[0])),0x4,0x0},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x20,0x0,8,8,(&(parameterArrays[0])),0x4,0x0}}; static const Method* const methodArrays[10] = {&(methods[0]),&(methods[1]),&(methods[2]),&(methods[2]),&(methods[3]),&(methods[4]),&(methods[5]),&(methods[6]),&(methods[7]),&(methods[8])}; -static const char strings[349] = "initialize_kernels_v3\0busbwUsagePercentage\0set_performance_mode\0poll_profiler_state\0release_kernels_v2\0bwMegabytesPerSec\0set_performance\0output_buffers\0mipsPerThread\0input_scalars\0input_buffers\0power_hvx_off\0get_symbol_v4\0power_hvx_on\0set_latency\0set_bus_bw\0module_ptr\0mipsTotal\0read_size\0set_mips\0poll_log\0threads\0sym_ptr\0symbol\0func\0name\0code\0run\0"; -static const uint16_t methodStrings[34] = {121,289,152,269,247,103,22,235,239,345,258,323,180,137,166,208,258,335,315,64,330,307,298,303,279,0,340,258,43,59,84,258,194,222}; +static const char strings[346] = "initialize_kernels_v3\0busbwUsagePercentage\0set_performance_mode\0poll_profiler_state\0release_kernels_v2\0bwMegabytesPerSec\0set_performance\0output_buffers\0mipsPerThread\0input_buffers\0power_hvx_off\0get_symbol_v4\0power_hvx_on\0set_latency\0set_bus_bw\0module_ptr\0mipsTotal\0read_size\0set_mips\0poll_log\0threads\0scalars\0sym_ptr\0symbol\0run_v2\0func\0name\0code\0"; +static const uint16_t methodStrings[34] = {121,275,152,255,233,103,22,221,225,324,244,317,166,137,301,194,244,336,309,64,331,293,284,289,265,0,341,244,43,59,84,244,180,208}; static const uint16_t methodStringsArrays[10] = {25,15,33,32,9,30,22,19,28,0}; __QAIC_SLIM_EXPORT const Interface __QAIC_SLIM(halide_hexagon_remote_slim) = {10,&(methodArrays[0]),0,0,&(methodStringsArrays [0]),methodStrings,strings}; #endif //_HALIDE_HEXAGON_REMOTE_SLIM_H @@ -522,23 +522,23 @@ static __inline int _skel_method_3(int (*_pfn)(char*, uint32_t, uint32_t*), uint _CATCH(_nErr) {} return _nErr; } -static __inline int _skel_pack(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { +static __inline int _skel_pack(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { int _nErr = 0; remote_arg* _praROutPostStart = _praROutPost; remote_arg** _ppraROutPostStart = _ppraROutPost; _ppraROutPost = &_praROutPost; - _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +0; + _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +1; return _nErr; } -static __inline int _skel_pack_1(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { +static __inline int _skel_pack_1(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { int _nErr = 0; remote_arg* _praROutPostStart = _praROutPost; remote_arg** _ppraROutPostStart = _ppraROutPost; _ppraROutPost = &_praROutPost; - _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +1; + _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +0; return _nErr; } -static __inline int _skel_unpack(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { +static __inline int _skel_unpack(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { int _nErr = 0; remote_arg* _praInStart = _praIn; remote_arg** _ppraInStart = _ppraIn; @@ -546,15 +546,15 @@ static __inline int _skel_unpack(_allocator* _al, remote_arg* _praIn, remote_arg remote_arg** _ppraROutStart = _ppraROut; _ppraIn = &_praIn; _ppraROut = &_praROut; - _COPY(_in0Len, 0, _primIn, 0, 4); - _ASSERT(_nErr, (int)((_praIn[0].buf.nLen / 1)) >= (int)(_in0Len[0])); - _in0[0] = _praIn[0].buf.pv; - _ppraInStart[0] += (_praIn - _praInStart) + 1; - _ppraROutStart[0] += (_praROut - _praROutStart) +0; + _COPY(_rout0Len, 0, _primIn, 0, 4); + _ASSERT(_nErr, (int)((_praROut[0].buf.nLen / 1)) >= (int)(_rout0Len[0])); + _rout0[0] = _praROut[0].buf.pv; + _ppraInStart[0] += (_praIn - _praInStart) + 0; + _ppraROutStart[0] += (_praROut - _praROutStart) +1; _CATCH(_nErr) {} return _nErr; } -static __inline int _skel_unpack_1(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { +static __inline int _skel_unpack_1(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { int _nErr = 0; remote_arg* _praInStart = _praIn; remote_arg** _ppraInStart = _ppraIn; @@ -562,15 +562,15 @@ static __inline int _skel_unpack_1(_allocator* _al, remote_arg* _praIn, remote_a remote_arg** _ppraROutStart = _ppraROut; _ppraIn = &_praIn; _ppraROut = &_praROut; - _COPY(_rout0Len, 0, _primIn, 0, 4); - _ASSERT(_nErr, (int)((_praROut[0].buf.nLen / 1)) >= (int)(_rout0Len[0])); - _rout0[0] = _praROut[0].buf.pv; - _ppraInStart[0] += (_praIn - _praInStart) + 0; - _ppraROutStart[0] += (_praROut - _praROutStart) +1; + _COPY(_in0Len, 0, _primIn, 0, 4); + _ASSERT(_nErr, (int)((_praIn[0].buf.nLen / 1)) >= (int)(_in0Len[0])); + _in0[0] = _praIn[0].buf.pv; + _ppraInStart[0] += (_praIn - _praInStart) + 1; + _ppraROutStart[0] += (_praROut - _praROutStart) +0; _CATCH(_nErr) {} return _nErr; } -static __inline int _skel_method_4(int (*_pfn)(uint32_t, uint32_t, void*, uint32_t, void*, uint32_t, void*, uint32_t), uint32_t _sc, remote_arg* _pra) { +static __inline int _skel_method_4(int (*_pfn)(uint32_t, uint32_t, void*, uint32_t, void*, uint32_t, char*, uint32_t), uint32_t _sc, remote_arg* _pra) { remote_arg* _praEnd; uint32_t _in0[1]; uint32_t _in1[1]; @@ -578,7 +578,7 @@ static __inline int _skel_method_4(int (*_pfn)(uint32_t, uint32_t, void*, uint32 uint32_t _in2Len[1]; void* _rout3[1]; uint32_t _rout3Len[1]; - void* _in4[1]; + char* _in4[1]; uint32_t _in4Len[1]; uint32_t* _primIn; int _numIn[1]; @@ -595,8 +595,6 @@ static __inline int _skel_method_4(int (*_pfn)(uint32_t, uint32_t, void*, uint32 int _nErr = 0; char* _seq_primIn3; char* _seq_nat3; - char* _seq_primIn4; - char* _seq_nat4; _praEnd = ((_pra + REMOTE_SCALARS_INBUFS(_sc)) + REMOTE_SCALARS_OUTBUFS(_sc) + REMOTE_SCALARS_INHANDLES(_sc) + REMOTE_SCALARS_OUTHANDLES(_sc)); _ASSERT(_nErr, (_pra + ((4 + 0) + (0 + 0))) <= _praEnd); _numIn[0] = (REMOTE_SCALARS_INBUFS(_sc) - 1); @@ -613,34 +611,26 @@ static __inline int _skel_method_4(int (*_pfn)(uint32_t, uint32_t, void*, uint32 _ALLOCATE(_nErr, _al, (_in2Len[0] * SLIM_IFPTR32(8, 16)), SLIM_IFPTR32(4, 8), _in2[0]); for(_ii = 0, _seq_primIn2 = (char*)_praIn[0].buf.pv, _seq_nat2 = (char*)_in2[0];_ii < (int)_in2Len[0];++_ii, _seq_primIn2 = (_seq_primIn2 + 4), _seq_nat2 = (_seq_nat2 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _skel_unpack(_al, (_praIn + 1), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn2, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); + _TRY(_nErr, _skel_unpack_1(_al, (_praIn + 1), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn2, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); } _COPY(_rout3Len, 0, _primIn, 12, 4); _ASSERT(_nErr, (int)((_praIn[1].buf.nLen / 4)) >= (int)(_rout3Len[0])); _ALLOCATE(_nErr, _al, (_rout3Len[0] * SLIM_IFPTR32(8, 16)), SLIM_IFPTR32(4, 8), _rout3[0]); for(_ii = 0, _seq_primIn3 = (char*)_praIn[1].buf.pv, _seq_nat3 = (char*)_rout3[0];_ii < (int)_rout3Len[0];++_ii, _seq_primIn3 = (_seq_primIn3 + 4), _seq_nat3 = (_seq_nat3 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _skel_unpack_1(_al, (_praIn + 2), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn3, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); + _TRY(_nErr, _skel_unpack(_al, (_praIn + 2), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn3, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); } _COPY(_in4Len, 0, _primIn, 16, 4); - _ASSERT(_nErr, (int)((_praIn[2].buf.nLen / 4)) >= (int)(_in4Len[0])); - _ALLOCATE(_nErr, _al, (_in4Len[0] * SLIM_IFPTR32(8, 16)), SLIM_IFPTR32(4, 8), _in4[0]); - for(_ii = 0, _seq_primIn4 = (char*)_praIn[2].buf.pv, _seq_nat4 = (char*)_in4[0];_ii < (int)_in4Len[0];++_ii, _seq_primIn4 = (_seq_primIn4 + 4), _seq_nat4 = (_seq_nat4 + SLIM_IFPTR32(8, 16))) - { - _TRY(_nErr, _skel_unpack(_al, (_praIn + 3), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn4, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat4)[0]), (char**)&(((uint64_t*)_seq_nat4)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat4)[1]), (uint32_t*)&(((uint32_t*)_seq_nat4)[2])))); - } + _ASSERT(_nErr, (int)((_praIn[2].buf.nLen / 8)) >= (int)(_in4Len[0])); + _in4[0] = _praIn[2].buf.pv; _TRY(_nErr, _pfn(*_in0, *_in1, *_in2, *_in2Len, *_rout3, *_rout3Len, *_in4, *_in4Len)); for(_ii = 0, _seq_nat2 = (char*)_in2[0];_ii < (int)_in2Len[0];++_ii, _seq_nat2 = (_seq_nat2 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _skel_pack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); + _TRY(_nErr, _skel_pack_1((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); } for(_ii = 0, _seq_nat3 = (char*)_rout3[0];_ii < (int)_rout3Len[0];++_ii, _seq_nat3 = (_seq_nat3 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _skel_pack_1((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); - } - for(_ii = 0, _seq_nat4 = (char*)_in4[0];_ii < (int)_in4Len[0];++_ii, _seq_nat4 = (_seq_nat4 + SLIM_IFPTR32(8, 16))) - { - _TRY(_nErr, _skel_pack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat4)[0]), (char**)&(((uint64_t*)_seq_nat4)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat4)[1]), (uint32_t*)&(((uint32_t*)_seq_nat4)[2])))); + _TRY(_nErr, _skel_pack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); } _CATCH(_nErr) {} _allocator_deinit(_al); @@ -721,7 +711,7 @@ __QAIC_SKEL_EXPORT int __QAIC_SKEL(halide_hexagon_remote_skel_invoke)(uint32_t _ case 3: return _skel_method_5((void*)__QAIC_IMPL(halide_hexagon_remote_power_hvx_off), _sc, _pra); case 4: - return _skel_method_4((void*)__QAIC_IMPL(halide_hexagon_remote_run), _sc, _pra); + return _skel_method_4((void*)__QAIC_IMPL(halide_hexagon_remote_run_v2), _sc, _pra); case 5: return _skel_method_1((void*)__QAIC_IMPL(halide_hexagon_remote_release_kernels_v2), _sc, _pra); case 6: diff --git a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_stub.c b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_stub.c index 7ce946615647..78292ddfa388 100644 --- a/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_stub.c +++ b/src/runtime/hexagon_remote/bin/src/halide_hexagon_remote_stub.c @@ -418,15 +418,15 @@ struct Interface { #define __QAIC_SLIM_EXPORT #endif -static const Type types[2]; +static const Type types[3]; static const SequenceType sequenceTypes[1] = {{&(types[1]),0x0,0x4,0x4,0x0}}; -static const Type types[2] = {{0x1,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x1},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8)}}; -static const Parameter parameters[8] = {{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),3,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,0,0}}; -static const Parameter* const parameterArrays[20] = {(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[7])),(&(parameters[7])),(&(parameters[2])),(&(parameters[2])),(&(parameters[3])),(&(parameters[4])),(&(parameters[3])),(&(parameters[2])),(&(parameters[0])),(&(parameters[1])),(&(parameters[6])),(&(parameters[6])),(&(parameters[5])),(&(parameters[6]))}; +static const Type types[3] = {{0x1,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x1},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8)},{0x8,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x8}}; +static const Parameter parameters[9] = {{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)0}}, 2,0x4,0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(sequenceTypes[0]),0}}, 25,SLIM_IFPTR32(0x4,0x8),3,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[2]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),0,0},{SLIM_IFPTR32(0x8,0x10),{{(const uintptr_t)&(types[0]),(const uintptr_t)0x0}}, 9,SLIM_IFPTR32(0x4,0x8),3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,3,0},{0x4,{{(const uintptr_t)0,(const uintptr_t)1}}, 2,0x4,0,0}}; +static const Parameter* const parameterArrays[20] = {(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[8])),(&(parameters[8])),(&(parameters[2])),(&(parameters[2])),(&(parameters[3])),(&(parameters[4])),(&(parameters[5])),(&(parameters[2])),(&(parameters[0])),(&(parameters[1])),(&(parameters[7])),(&(parameters[7])),(&(parameters[6])),(&(parameters[7]))}; static const Method methods[9] = {{REMOTE_SCALARS_MAKEX(0,0,0x2,0x1,0x0,0x0),0x4,0x4,3,2,(&(parameterArrays[14])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x2,0x1,0x0,0x0),0x8,0x4,4,3,(&(parameterArrays[13])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x0,0x0,0x0,0x0),0x0,0x0,0,0,0,0x0,0x0},{REMOTE_SCALARS_MAKEX(0,0,255,255,15,15),0x14,0x0,9,5,(&(parameterArrays[8])),0x4,0x1},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x4,0x0,1,1,(&(parameterArrays[1])),0x4,0x0},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x2,0x0,0x0),0x4,0x4,4,2,(&(parameterArrays[18])),0x4,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x0,0x1,0x0,0x0),0x0,0x8,2,2,(&(parameterArrays[16])),0x1,0x4},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x4,0x0,1,1,(&(parameterArrays[0])),0x4,0x0},{REMOTE_SCALARS_MAKEX(0,0,0x1,0x0,0x0,0x0),0x20,0x0,8,8,(&(parameterArrays[0])),0x4,0x0}}; static const Method* const methodArrays[10] = {&(methods[0]),&(methods[1]),&(methods[2]),&(methods[2]),&(methods[3]),&(methods[4]),&(methods[5]),&(methods[6]),&(methods[7]),&(methods[8])}; -static const char strings[349] = "initialize_kernels_v3\0busbwUsagePercentage\0set_performance_mode\0poll_profiler_state\0release_kernels_v2\0bwMegabytesPerSec\0set_performance\0output_buffers\0mipsPerThread\0input_scalars\0input_buffers\0power_hvx_off\0get_symbol_v4\0power_hvx_on\0set_latency\0set_bus_bw\0module_ptr\0mipsTotal\0read_size\0set_mips\0poll_log\0threads\0sym_ptr\0symbol\0func\0name\0code\0run\0"; -static const uint16_t methodStrings[34] = {121,289,152,269,247,103,22,235,239,345,258,323,180,137,166,208,258,335,315,64,330,307,298,303,279,0,340,258,43,59,84,258,194,222}; +static const char strings[346] = "initialize_kernels_v3\0busbwUsagePercentage\0set_performance_mode\0poll_profiler_state\0release_kernels_v2\0bwMegabytesPerSec\0set_performance\0output_buffers\0mipsPerThread\0input_buffers\0power_hvx_off\0get_symbol_v4\0power_hvx_on\0set_latency\0set_bus_bw\0module_ptr\0mipsTotal\0read_size\0set_mips\0poll_log\0threads\0scalars\0sym_ptr\0symbol\0run_v2\0func\0name\0code\0"; +static const uint16_t methodStrings[34] = {121,275,152,255,233,103,22,221,225,324,244,317,166,137,301,194,244,336,309,64,331,293,284,289,265,0,341,244,43,59,84,244,180,208}; static const uint16_t methodStringsArrays[10] = {25,15,33,32,9,30,22,19,28,0}; __QAIC_SLIM_EXPORT const Interface __QAIC_SLIM(halide_hexagon_remote_slim) = {10,&(methodArrays[0]),0,0,&(methodStringsArrays [0]),methodStrings,strings}; #endif //_HALIDE_HEXAGON_REMOTE_SLIM_H @@ -592,23 +592,23 @@ __QAIC_STUB_EXPORT int __QAIC_STUB(halide_hexagon_remote_power_hvx_off)(void) __ uint32_t _mid = 3; return _stub_method_2(_halide_hexagon_remote_handle(), _mid); } -static __inline int _stub_unpack(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { +static __inline int _stub_unpack(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { int _nErr = 0; remote_arg* _praROutPostStart = _praROutPost; remote_arg** _ppraROutPostStart = _ppraROutPost; _ppraROutPost = &_praROutPost; - _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +0; + _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +1; return _nErr; } -static __inline int _stub_unpack_1(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { +static __inline int _stub_unpack_1(remote_arg* _praROutPost, remote_arg* _ppraROutPost[1], void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { int _nErr = 0; remote_arg* _praROutPostStart = _praROutPost; remote_arg** _ppraROutPostStart = _ppraROutPost; _ppraROutPost = &_praROutPost; - _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +1; + _ppraROutPostStart[0] += (_praROutPost - _praROutPostStart) +0; return _nErr; } -static __inline int _stub_pack(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { +static __inline int _stub_pack(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { int _nErr = 0; remote_arg* _praInStart = _praIn; remote_arg** _ppraInStart = _ppraIn; @@ -616,14 +616,14 @@ static __inline int _stub_pack(_allocator* _al, remote_arg* _praIn, remote_arg* remote_arg** _ppraROutStart = _ppraROut; _ppraIn = &_praIn; _ppraROut = &_praROut; - _COPY(_primIn, 0, _in0Len, 0, 4); - _praIn[0].buf.pv = _in0[0]; - _praIn[0].buf.nLen = (1 * _in0Len[0]); - _ppraInStart[0] += (_praIn - _praInStart) + 1; - _ppraROutStart[0] += (_praROut - _praROutStart) +0; + _COPY(_primIn, 0, _rout0Len, 0, 4); + _praROut[0].buf.pv = _rout0[0]; + _praROut[0].buf.nLen = (1 * _rout0Len[0]); + _ppraInStart[0] += (_praIn - _praInStart) + 0; + _ppraROutStart[0] += (_praROut - _praROutStart) +1; return _nErr; } -static __inline int _stub_pack_1(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _rout0[1], uint32_t _rout0Len[1]) { +static __inline int _stub_pack_1(_allocator* _al, remote_arg* _praIn, remote_arg* _ppraIn[1], remote_arg* _praROut, remote_arg* _ppraROut[1], void* _primIn, void* _primROut, char* _in0[1], uint32_t _in0Len[1]) { int _nErr = 0; remote_arg* _praInStart = _praIn; remote_arg** _ppraInStart = _ppraIn; @@ -631,29 +631,28 @@ static __inline int _stub_pack_1(_allocator* _al, remote_arg* _praIn, remote_arg remote_arg** _ppraROutStart = _ppraROut; _ppraIn = &_praIn; _ppraROut = &_praROut; - _COPY(_primIn, 0, _rout0Len, 0, 4); - _praROut[0].buf.pv = _rout0[0]; - _praROut[0].buf.nLen = (1 * _rout0Len[0]); - _ppraInStart[0] += (_praIn - _praInStart) + 0; - _ppraROutStart[0] += (_praROut - _praROutStart) +1; + _COPY(_primIn, 0, _in0Len, 0, 4); + _praIn[0].buf.pv = _in0[0]; + _praIn[0].buf.nLen = (1 * _in0Len[0]); + _ppraInStart[0] += (_praIn - _praInStart) + 1; + _ppraROutStart[0] += (_praROut - _praROutStart) +0; return _nErr; } -static __inline void _count(int _numIn[1], int _numROut[1], char* _in0[1], uint32_t _in0Len[1]) { - _numIn[0] += 1; - _numROut[0] += 0; -} -static __inline void _count_1(int _numIn[1], int _numROut[1], char* _rout0[1], uint32_t _rout0Len[1]) { +static __inline void _count(int _numIn[1], int _numROut[1], char* _rout0[1], uint32_t _rout0Len[1]) { _numIn[0] += 0; _numROut[0] += 1; } -static __inline int _stub_method_3(remote_handle _handle, uint32_t _mid, uint32_t _in0[1], uint32_t _in1[1], void* _in2[1], uint32_t _in2Len[1], void* _rout3[1], uint32_t _rout3Len[1], void* _in4[1], uint32_t _in4Len[1]) { +static __inline void _count_1(int _numIn[1], int _numROut[1], char* _in0[1], uint32_t _in0Len[1]) { + _numIn[0] += 1; + _numROut[0] += 0; +} +static __inline int _stub_method_3(remote_handle _handle, uint32_t _mid, uint32_t _in0[1], uint32_t _in1[1], void* _in2[1], uint32_t _in2Len[1], void* _rout3[1], uint32_t _rout3Len[1], char* _in4[1], uint32_t _in4Len[1]) { remote_arg* _pra; int _numIn[1]; int _numROut[1]; char* _seq_nat2; int _ii; char* _seq_nat3; - char* _seq_nat4; _allocator _al[1] = {{0}}; uint32_t _primIn[5]; remote_arg* _praIn; @@ -665,20 +664,15 @@ static __inline int _stub_method_3(remote_handle _handle, uint32_t _mid, uint32_ char* _seq_primIn2; int _nErr = 0; char* _seq_primIn3; - char* _seq_primIn4; _numIn[0] = 3; _numROut[0] = 0; for(_ii = 0, _seq_nat2 = (char*)_in2[0];_ii < (int)_in2Len[0];++_ii, _seq_nat2 = (_seq_nat2 + SLIM_IFPTR32(8, 16))) { - _count(_numIn, _numROut, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2]))); + _count_1(_numIn, _numROut, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2]))); } for(_ii = 0, _seq_nat3 = (char*)_rout3[0];_ii < (int)_rout3Len[0];++_ii, _seq_nat3 = (_seq_nat3 + SLIM_IFPTR32(8, 16))) { - _count_1(_numIn, _numROut, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2]))); - } - for(_ii = 0, _seq_nat4 = (char*)_in4[0];_ii < (int)_in4Len[0];++_ii, _seq_nat4 = (_seq_nat4 + SLIM_IFPTR32(8, 16))) - { - _count(_numIn, _numROut, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat4)[0]), (char**)&(((uint64_t*)_seq_nat4)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat4)[1]), (uint32_t*)&(((uint32_t*)_seq_nat4)[2]))); + _count(_numIn, _numROut, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2]))); } _allocator_init(_al, 0, 0); _ALLOCATE(_nErr, _al, ((((_numIn[0] + _numROut[0]) + 1) + 0) * sizeof(_pra[0])), 4, _pra); @@ -694,42 +688,34 @@ static __inline int _stub_method_3(remote_handle _handle, uint32_t _mid, uint32_ _praIn[0].buf.nLen = (4 * _in2Len[0]); for(_ii = 0, _seq_primIn2 = (char*)_praIn[0].buf.pv, _seq_nat2 = (char*)_in2[0];_ii < (int)_in2Len[0];++_ii, _seq_primIn2 = (_seq_primIn2 + 4), _seq_nat2 = (_seq_nat2 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _stub_pack(_al, (_praIn + 1), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn2, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); + _TRY(_nErr, _stub_pack_1(_al, (_praIn + 1), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn2, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); } _COPY(_primIn, 12, _rout3Len, 0, 4); _ALLOCATE(_nErr, _al, (_rout3Len[0] * 4), 4, _praIn[1].buf.pv); _praIn[1].buf.nLen = (4 * _rout3Len[0]); for(_ii = 0, _seq_primIn3 = (char*)_praIn[1].buf.pv, _seq_nat3 = (char*)_rout3[0];_ii < (int)_rout3Len[0];++_ii, _seq_primIn3 = (_seq_primIn3 + 4), _seq_nat3 = (_seq_nat3 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _stub_pack_1(_al, (_praIn + 2), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn3, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); + _TRY(_nErr, _stub_pack(_al, (_praIn + 2), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn3, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); } _COPY(_primIn, 16, _in4Len, 0, 4); - _ALLOCATE(_nErr, _al, (_in4Len[0] * 4), 4, _praIn[2].buf.pv); - _praIn[2].buf.nLen = (4 * _in4Len[0]); - for(_ii = 0, _seq_primIn4 = (char*)_praIn[2].buf.pv, _seq_nat4 = (char*)_in4[0];_ii < (int)_in4Len[0];++_ii, _seq_primIn4 = (_seq_primIn4 + 4), _seq_nat4 = (_seq_nat4 + SLIM_IFPTR32(8, 16))) - { - _TRY(_nErr, _stub_pack(_al, (_praIn + 3), _ppraIn, (_praROut + 0), _ppraROut, _seq_primIn4, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat4)[0]), (char**)&(((uint64_t*)_seq_nat4)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat4)[1]), (uint32_t*)&(((uint32_t*)_seq_nat4)[2])))); - } + _praIn[2].buf.pv = _in4[0]; + _praIn[2].buf.nLen = (8 * _in4Len[0]); _TRY(_nErr, __QAIC_REMOTE(remote_handle_invoke)(_handle, REMOTE_SCALARS_MAKEX(0, _mid, (_numIn[0] + 1), (_numROut[0] + 0), 0, 0), _pra)); for(_ii = 0, _seq_nat2 = (char*)_in2[0];_ii < (int)_in2Len[0];++_ii, _seq_nat2 = (_seq_nat2 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _stub_unpack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); + _TRY(_nErr, _stub_unpack_1((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat2)[0]), (char**)&(((uint64_t*)_seq_nat2)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat2)[1]), (uint32_t*)&(((uint32_t*)_seq_nat2)[2])))); } for(_ii = 0, _seq_nat3 = (char*)_rout3[0];_ii < (int)_rout3Len[0];++_ii, _seq_nat3 = (_seq_nat3 + SLIM_IFPTR32(8, 16))) { - _TRY(_nErr, _stub_unpack_1((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); - } - for(_ii = 0, _seq_nat4 = (char*)_in4[0];_ii < (int)_in4Len[0];++_ii, _seq_nat4 = (_seq_nat4 + SLIM_IFPTR32(8, 16))) - { - _TRY(_nErr, _stub_unpack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat4)[0]), (char**)&(((uint64_t*)_seq_nat4)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat4)[1]), (uint32_t*)&(((uint32_t*)_seq_nat4)[2])))); + _TRY(_nErr, _stub_unpack((_praROutPost + 0), _ppraROutPost, 0, SLIM_IFPTR32((char**)&(((uint32_t*)_seq_nat3)[0]), (char**)&(((uint64_t*)_seq_nat3)[0])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_seq_nat3)[1]), (uint32_t*)&(((uint32_t*)_seq_nat3)[2])))); } _CATCH(_nErr) {} _allocator_deinit(_al); return _nErr; } -__QAIC_STUB_EXPORT int __QAIC_STUB(halide_hexagon_remote_run)(halide_hexagon_remote_handle_t module_ptr, halide_hexagon_remote_handle_t symbol, const halide_hexagon_remote_buffer* input_buffers, int input_buffersLen, halide_hexagon_remote_buffer* output_buffers, int output_buffersLen, const halide_hexagon_remote_buffer* input_scalars, int input_scalarsLen) __QAIC_STUB_ATTRIBUTE { +__QAIC_STUB_EXPORT int __QAIC_STUB(halide_hexagon_remote_run_v2)(halide_hexagon_remote_handle_t module_ptr, halide_hexagon_remote_handle_t symbol, const halide_hexagon_remote_buffer* input_buffers, int input_buffersLen, halide_hexagon_remote_buffer* output_buffers, int output_buffersLen, const halide_hexagon_remote_scalar_t* scalars, int scalarsLen) __QAIC_STUB_ATTRIBUTE { uint32_t _mid = 4; - return _stub_method_3(_halide_hexagon_remote_handle(), _mid, (uint32_t*)&module_ptr, (uint32_t*)&symbol, (void**)&input_buffers, (uint32_t*)&input_buffersLen, (void**)&output_buffers, (uint32_t*)&output_buffersLen, (void**)&input_scalars, (uint32_t*)&input_scalarsLen); + return _stub_method_3(_halide_hexagon_remote_handle(), _mid, (uint32_t*)&module_ptr, (uint32_t*)&symbol, (void**)&input_buffers, (uint32_t*)&input_buffersLen, (void**)&output_buffers, (uint32_t*)&output_buffersLen, (char**)&scalars, (uint32_t*)&scalarsLen); } static __inline int _stub_method_4(remote_handle _handle, uint32_t _mid, uint32_t _in0[1]) { remote_arg _pra[1]; diff --git a/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so b/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so index 446e3936b2be..d60c45cbd11b 100755 Binary files a/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so and b/src/runtime/hexagon_remote/bin/v60/libhalide_hexagon_remote_skel.so differ diff --git a/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so b/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so index 6a7362e405f5..c375de499745 100644 Binary files a/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so and b/src/runtime/hexagon_remote/bin/v60/signed_by_debug/libhalide_hexagon_remote_skel.so differ diff --git a/src/runtime/hexagon_remote/halide_hexagon_remote.idl b/src/runtime/hexagon_remote/halide_hexagon_remote.idl index 6f571dfaf1b8..d4d02abdcd21 100644 --- a/src/runtime/hexagon_remote/halide_hexagon_remote.idl +++ b/src/runtime/hexagon_remote/halide_hexagon_remote.idl @@ -1,7 +1,10 @@ +#include "AEEStdDef.idl" + interface halide_hexagon_remote { typedef sequence buffer; typedef unsigned long handle_t; + typedef unsigned long long scalar_t; // Routine to initialize a module on the remote side. Returns a pointer to the module. long initialize_kernels_v3(in buffer code, rout handle_t module_ptr); @@ -17,10 +20,11 @@ interface halide_hexagon_remote long power_hvx_off(); // Routine to run a pipeline on the remote side. - long run(in handle_t module_ptr, in handle_t symbol, - in sequence input_buffers, - rout sequence output_buffers, - in sequence input_scalars); + // v2: Pass all scalars in a single buffer as an array of scalar_t. + long run_v2(in handle_t module_ptr, in handle_t symbol, + in sequence input_buffers, + rout sequence output_buffers, + in sequence scalars); // Routine to clean up a module on the remote side. long release_kernels_v2(in handle_t module_ptr); diff --git a/src/runtime/hexagon_remote/halide_remote.cpp b/src/runtime/hexagon_remote/halide_remote.cpp index a92b56d055f1..38d2f11e37b0 100644 --- a/src/runtime/hexagon_remote/halide_remote.cpp +++ b/src/runtime/hexagon_remote/halide_remote.cpp @@ -21,6 +21,7 @@ const int stack_size = 1024 * 1024; typedef halide_hexagon_remote_handle_t handle_t; typedef halide_hexagon_remote_buffer buffer; +typedef halide_hexagon_remote_scalar_t scalar_t; extern "C" { @@ -312,12 +313,10 @@ int halide_hexagon_remote_get_symbol_v4(handle_t module_ptr, const char* name, i return *sym_ptr != 0 ? 0 : -1; } - -int halide_hexagon_remote_run(handle_t module_ptr, handle_t function, - const buffer *input_buffersPtrs, int input_buffersLen, - buffer *output_buffersPtrs, int output_buffersLen, - const buffer *input_scalarsPtrs, int input_scalarsLen) { - +int halide_hexagon_remote_run_v2(handle_t module_ptr, handle_t function, + const buffer *input_buffersPtrs, int input_buffersLen, + buffer *output_buffersPtrs, int output_buffersLen, + const scalar_t *scalars, int scalarsLen) { // Get a pointer to the argv version of the pipeline. pipeline_argv_t pipeline = reinterpret_cast(function); @@ -330,7 +329,7 @@ int halide_hexagon_remote_run(handle_t module_ptr, handle_t function, uint64_t dev; uint8_t* host; }; - void **args = (void **)__builtin_alloca((input_buffersLen + input_scalarsLen + output_buffersLen) * sizeof(void *)); + void **args = (void **)__builtin_alloca((input_buffersLen + scalarsLen + output_buffersLen) * sizeof(void *)); buffer_t *buffers = (buffer_t *)__builtin_alloca((input_buffersLen + output_buffersLen) * sizeof(buffer_t)); void **next_arg = &args[0]; @@ -346,8 +345,8 @@ int halide_hexagon_remote_run(handle_t module_ptr, handle_t function, *next_arg = next_buffer_t; } // Input scalars are last. - for (int i = 0; i < input_scalarsLen; i++, next_arg++) { - *next_arg = input_scalarsPtrs[i].data; + for (int i = 0; i < scalarsLen; i++, next_arg++) { + *next_arg = const_cast(&scalars[i]); } // Prior to running the pipeline, power HVX on (if it was not already on). diff --git a/src/runtime/hexagon_remote/host_shim.cpp b/src/runtime/hexagon_remote/host_shim.cpp new file mode 100644 index 000000000000..8e1a0ab6b114 --- /dev/null +++ b/src/runtime/hexagon_remote/host_shim.cpp @@ -0,0 +1,36 @@ +#include +#include + +#include "bin/src/halide_hexagon_remote.h" + +typedef halide_hexagon_remote_handle_t handle_t; +typedef halide_hexagon_remote_buffer buffer; +typedef halide_hexagon_remote_scalar_t scalar_t; + +extern "C" { + +// In v2, we pass all scalars and small input buffers in a single buffer. +int halide_hexagon_remote_run(handle_t module_ptr, handle_t function, + buffer *input_buffersPtrs, int input_buffersLen, + buffer *output_buffersPtrs, int output_buffersLen, + const buffer *input_scalarsPtrs, int input_scalarsLen) { + // Pack all of the scalars into an array of scalar_t. + scalar_t *scalars = (scalar_t *)__builtin_alloca(input_scalarsLen * sizeof(scalar_t)); + for (int i = 0; i < input_scalarsLen; i++) { + int scalar_size = input_scalarsPtrs[i].dataLen; + if (scalar_size > sizeof(scalar_t)) { + __android_log_print(ANDROID_LOG_ERROR, "halide", "Scalar argument %d is larger than %d bytes (%d bytes)", + i, sizeof(scalar_t), scalar_size); + return -1; + } + memcpy(&scalars[i], input_scalarsPtrs[i].data, scalar_size); + } + + // Call v2 with the adapted arguments. + return halide_hexagon_remote_run_v2(module_ptr, function, + input_buffersPtrs, input_buffersLen, + output_buffersPtrs, output_buffersLen, + scalars, input_scalarsLen); +} + +} // extern "C"