From 7db54440b844c2b539f60493897dcbbe2776f6d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Mon, 17 Apr 2023 11:36:06 +0800 Subject: [PATCH 01/17] Add get_gpu_instance() function and Organized the instance class codes. --- src/gpu.cpp | 37 +++++++++++++------------------------ src/gpu.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index a09a5f74453..14e43993cfe 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -41,37 +41,26 @@ #define ENABLE_VALIDATION_LAYER 0 namespace ncnn { - -// global +static __ncnn_vulkan_instance_holder g_instance; static Mutex g_instance_lock; -class __ncnn_vulkan_instance_holder +__ncnn_vulkan_instance_holder::__ncnn_vulkan_instance_holder() { -public: - __ncnn_vulkan_instance_holder() - { - instance = 0; + instance = 0; #if ENABLE_VALIDATION_LAYER - callback = 0; + callback = 0; #endif - } - - ~__ncnn_vulkan_instance_holder() - { - destroy_gpu_instance(); - } +} - operator VkInstance() - { - return instance; - } +__ncnn_vulkan_instance_holder::~__ncnn_vulkan_instance_holder() +{ + destroy_gpu_instance(); +} - VkInstance instance; -#if ENABLE_VALIDATION_LAYER - VkDebugUtilsMessengerEXT callback; -#endif -}; -static __ncnn_vulkan_instance_holder g_instance; +__ncnn_vulkan_instance_holder::operator VkInstance() +{ + return instance; +} static int g_gpu_count = 0; static int g_default_gpu_index = -1; diff --git a/src/gpu.h b/src/gpu.h index 345329f7d47..117e14b91c9 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -43,6 +43,20 @@ NCNN_EXPORT VkInstance get_gpu_instance(); // Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); +class __ncnn_vulkan_instance_holder +{ +public: + __ncnn_vulkan_instance_holder(); + ~__ncnn_vulkan_instance_holder(); + + operator VkInstance(); + + VkInstance instance; +#if ENABLE_VALIDATION_LAYER + VkDebugUtilsMessengerEXT callback; +#endif +}; + // instance extension capability extern int support_VK_KHR_external_memory_capabilities; extern int support_VK_KHR_get_physical_device_properties2; From e48623dc2f0f449837c4a109ed76cb4e60784b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Mon, 17 Apr 2023 14:50:43 +0800 Subject: [PATCH 02/17] Move class __ncnn_vulkan_instance_holder declaration from gpu.h to gpu.cpp --- src/gpu.cpp | 36 ++++++++++++++++++++++-------------- src/gpu.h | 14 -------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index 14e43993cfe..b87a132fd45 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -41,26 +41,34 @@ #define ENABLE_VALIDATION_LAYER 0 namespace ncnn { -static __ncnn_vulkan_instance_holder g_instance; static Mutex g_instance_lock; - -__ncnn_vulkan_instance_holder::__ncnn_vulkan_instance_holder() +class __ncnn_vulkan_instance_holder { - instance = 0; +public: + __ncnn_vulkan_instance_holder() + { + instance = 0; #if ENABLE_VALIDATION_LAYER - callback = 0; + callback = 0; #endif -} + } + ~__ncnn_vulkan_instance_holder() + { + destroy_gpu_instance(); + } -__ncnn_vulkan_instance_holder::~__ncnn_vulkan_instance_holder() -{ - destroy_gpu_instance(); -} + operator VkInstance() + { + return instance; + } + + VkInstance instance; +#if ENABLE_VALIDATION_LAYER + VkDebugUtilsMessengerEXT callback; +#endif +}; +static __ncnn_vulkan_instance_holder g_instance; -__ncnn_vulkan_instance_holder::operator VkInstance() -{ - return instance; -} static int g_gpu_count = 0; static int g_default_gpu_index = -1; diff --git a/src/gpu.h b/src/gpu.h index 117e14b91c9..345329f7d47 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -43,20 +43,6 @@ NCNN_EXPORT VkInstance get_gpu_instance(); // Usually called in the destructor of the main program exit NCNN_EXPORT void destroy_gpu_instance(); -class __ncnn_vulkan_instance_holder -{ -public: - __ncnn_vulkan_instance_holder(); - ~__ncnn_vulkan_instance_holder(); - - operator VkInstance(); - - VkInstance instance; -#if ENABLE_VALIDATION_LAYER - VkDebugUtilsMessengerEXT callback; -#endif -}; - // instance extension capability extern int support_VK_KHR_external_memory_capabilities; extern int support_VK_KHR_get_physical_device_properties2; From 10425659303cc546c7afab27ca56fa35bf71f5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Mon, 17 Apr 2023 15:20:31 +0800 Subject: [PATCH 03/17] Delete empty line changes --- src/gpu.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index b87a132fd45..a09a5f74453 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -41,7 +41,10 @@ #define ENABLE_VALIDATION_LAYER 0 namespace ncnn { + +// global static Mutex g_instance_lock; + class __ncnn_vulkan_instance_holder { public: @@ -52,6 +55,7 @@ class __ncnn_vulkan_instance_holder callback = 0; #endif } + ~__ncnn_vulkan_instance_holder() { destroy_gpu_instance(); @@ -69,7 +73,6 @@ class __ncnn_vulkan_instance_holder }; static __ncnn_vulkan_instance_holder g_instance; - static int g_gpu_count = 0; static int g_default_gpu_index = -1; From a479422e244499d7472e5021dd53217503ad8f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 28 Apr 2023 14:54:36 +0800 Subject: [PATCH 04/17] Reimplement the sleep() and get_current_time() functions using modern C++. --- benchmark/benchncnn.cpp | 20 +---------- src/benchmark.cpp | 79 +++++++++++++++++++++++++++++++---------- src/benchmark.h | 9 +++++ 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 3092e8837c7..40ab53c56b2 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -16,13 +16,6 @@ #include #include -#ifdef _WIN32 -#include -#include // Sleep() -#else -#include // sleep() -#endif - #ifdef __EMSCRIPTEN__ #include #endif @@ -106,18 +99,7 @@ void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& op if (g_enable_cooling_down) { // sleep 10 seconds for cooling down SOC :( -#ifdef _WIN32 - Sleep(10 * 1000); -#elif defined(__unix__) || defined(__APPLE__) - sleep(10); -#elif _POSIX_TIMERS - struct timespec ts; - ts.tv_sec = 10; - ts.tv_nsec = 0; - nanosleep(&ts, &ts); -#else - // TODO How to handle it ? -#endif + ncnn::sleep(10 * 1000); } ncnn::Mat out; diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 9376749b326..f4fd77fe893 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -12,12 +12,20 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#else // _WIN32 -#include -#endif // _WIN32 +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) + #include + #include + #include + #include +#else + #ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #include + #else // _WIN32 + #include //gettimeofday() + #include // sleep() + #endif // _WIN32 +#endif #include "benchmark.h" @@ -34,19 +42,52 @@ namespace ncnn { double get_current_time() { -#ifdef _WIN32 - LARGE_INTEGER freq; - LARGE_INTEGER pc; - QueryPerformanceFrequency(&freq); - QueryPerformanceCounter(&pc); - - return pc.QuadPart * 1000.0 / freq.QuadPart; -#else // _WIN32 - struct timeval tv; - gettimeofday(&tv, NULL); - - return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0; -#endif // _WIN32 +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) + auto now = std::chrono::high_resolution_clock::now(); + auto usec = std::chrono::duration_cast(now.time_since_epoch()); + return usec.count() / 1000.0; +#else + #ifdef _WIN32 + LARGE_INTEGER freq; + LARGE_INTEGER pc; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&pc); + + return pc.QuadPart * 1000.0 / freq.QuadPart; + #else // _WIN32 + struct timeval tv; + gettimeofday(&tv, NULL); + + return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0; + #endif // _WIN32 +#endif +} + +void sleep( +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) + std::int64_t milliseconds +#else + long long milliseconds +#endif + ) +{ +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) + std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); +#else + #ifdef _WIN32 + Sleep(milliseconds); + #elif defined(__unix__) || defined(__APPLE__) + sleep(milliseconds * 0.001); + #elif _POSIX_TIMERS + struct timespec ts; + ts.tv_sec = milliseconds * 0.001; + ts.tv_nsec = 0; + nanosleep(&ts, &ts); + #else + // TODO How to handle it ? + #endif +#endif + return; } #if NCNN_BENCHMARK diff --git a/src/benchmark.h b/src/benchmark.h index 3d5c0cda311..44614f76a85 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -24,6 +24,15 @@ namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); +// sleep milliseconds +NCNN_EXPORT void sleep( +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) + std::int64_t milliseconds +#else + long long milliseconds +#endif + = 100000); + #if NCNN_BENCHMARK NCNN_EXPORT void benchmark(const Layer* layer, double start, double end); From 6eb3bb75be151abaf39830854cf6e80e163ca028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 28 Apr 2023 15:25:33 +0800 Subject: [PATCH 05/17] Fixed build error for __riscv not support c++ 11 thread --- src/benchmark.cpp | 8 ++++---- src/benchmark.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index f4fd77fe893..cd860f62952 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -12,7 +12,7 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) #include #include #include @@ -42,7 +42,7 @@ namespace ncnn { double get_current_time() { -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) auto now = std::chrono::high_resolution_clock::now(); auto usec = std::chrono::duration_cast(now.time_since_epoch()); return usec.count() / 1000.0; @@ -64,14 +64,14 @@ double get_current_time() } void sleep( -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) std::int64_t milliseconds #else long long milliseconds #endif ) { -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); #else #ifdef _WIN32 diff --git a/src/benchmark.h b/src/benchmark.h index 44614f76a85..2ec10cea75f 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -26,7 +26,7 @@ NCNN_EXPORT double get_current_time(); // sleep milliseconds NCNN_EXPORT void sleep( -#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) std::int64_t milliseconds #else long long milliseconds From ed932ba70be9e8a43e6a22df25c8052a46a94a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 5 May 2023 10:43:16 +0800 Subject: [PATCH 06/17] Add NCNN_SIMPLESTL Macro --- src/benchmark.cpp | 8 ++++---- src/benchmark.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index cd860f62952..f4e23691a66 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -12,7 +12,7 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL #include #include #include @@ -42,7 +42,7 @@ namespace ncnn { double get_current_time() { -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL auto now = std::chrono::high_resolution_clock::now(); auto usec = std::chrono::duration_cast(now.time_since_epoch()); return usec.count() / 1000.0; @@ -64,14 +64,14 @@ double get_current_time() } void sleep( -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::int64_t milliseconds #else long long milliseconds #endif ) { -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); #else #ifdef _WIN32 diff --git a/src/benchmark.h b/src/benchmark.h index 2ec10cea75f..0ef417eabcc 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -26,7 +26,7 @@ NCNN_EXPORT double get_current_time(); // sleep milliseconds NCNN_EXPORT void sleep( -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) +#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::int64_t milliseconds #else long long milliseconds From 00b9f28a29058565346df5b72de3c54a0c6827a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 12 May 2023 15:37:24 +0800 Subject: [PATCH 07/17] Fix simple stl's compiler build error --- src/benchmark.cpp | 11 +++-------- src/benchmark.h | 8 +------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index f4e23691a66..e0ad09ec8d9 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -12,6 +12,7 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +#include "benchmark.h" #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL #include #include @@ -27,7 +28,6 @@ #endif // _WIN32 #endif -#include "benchmark.h" #if NCNN_BENCHMARK #include "layer/convolution.h" @@ -36,6 +36,7 @@ #include "layer/deconvolutiondepthwise.h" #include +#include #endif // NCNN_BENCHMARK namespace ncnn { @@ -63,13 +64,7 @@ double get_current_time() #endif } -void sleep( -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL - std::int64_t milliseconds -#else - long long milliseconds -#endif - ) +void sleep(uint64_t milliseconds) { #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); diff --git a/src/benchmark.h b/src/benchmark.h index 0ef417eabcc..a2ddb3b3a17 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -25,13 +25,7 @@ namespace ncnn { NCNN_EXPORT double get_current_time(); // sleep milliseconds -NCNN_EXPORT void sleep( -#if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL - std::int64_t milliseconds -#else - long long milliseconds -#endif - = 100000); +NCNN_EXPORT void sleep(uint64_t milliseconds = 100000); #if NCNN_BENCHMARK From 67ef2834676a03763e086acc18459a6b9137ac11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Fri, 12 May 2023 17:34:38 +0800 Subject: [PATCH 08/17] Fix linux-gcc-cpp03-nostdio-nostring-simplestl build error --- src/benchmark.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/benchmark.h b/src/benchmark.h index a2ddb3b3a17..ed321c810e8 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -19,6 +19,10 @@ #include "mat.h" #include "platform.h" +#if NCNN_BENCHMARK +#include +#endif // NCNN_BENCHMARK + namespace ncnn { // get now timestamp in ms From d1e079ca74ad095a5c78db3aeba811e41d15f516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Mon, 15 May 2023 11:11:13 +0800 Subject: [PATCH 09/17] Use u_int64_t type parameters in linux-gcc-cpp03-nostdio-nostring-simplestl --- src/benchmark.cpp | 5 +++++ src/benchmark.h | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index e0ad09ec8d9..8714d47b0c1 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -24,6 +24,7 @@ #include #else // _WIN32 #include //gettimeofday() + #include //uint64_t and u_int64_t #include // sleep() #endif // _WIN32 #endif @@ -64,7 +65,11 @@ double get_current_time() #endif } +#if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) +void sleep(u_int64_t milliseconds) +#else void sleep(uint64_t milliseconds) +#endif { #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); diff --git a/src/benchmark.h b/src/benchmark.h index ed321c810e8..c5ed1d1420c 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -21,15 +21,23 @@ #if NCNN_BENCHMARK #include + #if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) + #include //uint64_t and u_int64_t + #endif #endif // NCNN_BENCHMARK + namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); // sleep milliseconds +#if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) +NCNN_EXPORT void sleep(u_int64_t milliseconds = 100000); +#else NCNN_EXPORT void sleep(uint64_t milliseconds = 100000); +#endif #if NCNN_BENCHMARK From 2ad85f0bf6213542b5e294320b3cb5807cdc55ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Mon, 15 May 2023 11:40:16 +0800 Subject: [PATCH 10/17] change uint64_t&u_int64_t to unsigned long long int --- src/benchmark.cpp | 7 +------ src/benchmark.h | 9 +-------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 8714d47b0c1..365aad87c1c 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -24,7 +24,6 @@ #include #else // _WIN32 #include //gettimeofday() - #include //uint64_t and u_int64_t #include // sleep() #endif // _WIN32 #endif @@ -65,11 +64,7 @@ double get_current_time() #endif } -#if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) -void sleep(u_int64_t milliseconds) -#else -void sleep(uint64_t milliseconds) -#endif +void sleep(unsigned long long int milliseconds) { #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); diff --git a/src/benchmark.h b/src/benchmark.h index c5ed1d1420c..d01a4ba50f6 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -21,9 +21,6 @@ #if NCNN_BENCHMARK #include - #if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) - #include //uint64_t and u_int64_t - #endif #endif // NCNN_BENCHMARK @@ -33,11 +30,7 @@ namespace ncnn { NCNN_EXPORT double get_current_time(); // sleep milliseconds -#if defined(NCNN_SIMPLESTL) && NCNN_SIMPLESTL && !defined(_WIN32) //simplestl and !defined(win32) -NCNN_EXPORT void sleep(u_int64_t milliseconds = 100000); -#else -NCNN_EXPORT void sleep(uint64_t milliseconds = 100000); -#endif +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 100000); #if NCNN_BENCHMARK From cec75bc7f2381fb5422382d2e6f4c7c9945a6b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Tue, 16 May 2023 10:40:06 +0800 Subject: [PATCH 11/17] Remove include stdint.h and change function sleep() default parameter from 100000 to 1000. --- src/benchmark.cpp | 1 - src/benchmark.h | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 365aad87c1c..964df2a09ec 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -36,7 +36,6 @@ #include "layer/deconvolutiondepthwise.h" #include -#include #endif // NCNN_BENCHMARK namespace ncnn { diff --git a/src/benchmark.h b/src/benchmark.h index d01a4ba50f6..ed42c1acb5f 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -19,18 +19,13 @@ #include "mat.h" #include "platform.h" -#if NCNN_BENCHMARK -#include -#endif // NCNN_BENCHMARK - - namespace ncnn { // get now timestamp in ms NCNN_EXPORT double get_current_time(); // sleep milliseconds -NCNN_EXPORT void sleep(unsigned long long int milliseconds = 100000); +NCNN_EXPORT void sleep(unsigned long long int milliseconds = 1000); #if NCNN_BENCHMARK From 85c15a042f2195a927e6a0c02b1586301aed75fc Mon Sep 17 00:00:00 2001 From: whyb Date: Tue, 16 May 2023 03:42:34 +0000 Subject: [PATCH 12/17] apply code-format changes --- src/benchmark.cpp | 73 +++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 964df2a09ec..4461fc9b956 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -14,21 +14,20 @@ #include "benchmark.h" #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL - #include - #include - #include - #include +#include +#include +#include +#include #else - #ifdef _WIN32 - #define WIN32_LEAN_AND_MEAN - #include - #else // _WIN32 - #include //gettimeofday() - #include // sleep() - #endif // _WIN32 +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#else // _WIN32 +#include //gettimeofday() +#include // sleep() +#endif // _WIN32 #endif - #if NCNN_BENCHMARK #include "layer/convolution.h" #include "layer/convolutiondepthwise.h" @@ -47,19 +46,19 @@ double get_current_time() auto usec = std::chrono::duration_cast(now.time_since_epoch()); return usec.count() / 1000.0; #else - #ifdef _WIN32 - LARGE_INTEGER freq; - LARGE_INTEGER pc; - QueryPerformanceFrequency(&freq); - QueryPerformanceCounter(&pc); - - return pc.QuadPart * 1000.0 / freq.QuadPart; - #else // _WIN32 - struct timeval tv; - gettimeofday(&tv, NULL); - - return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0; - #endif // _WIN32 +#ifdef _WIN32 + LARGE_INTEGER freq; + LARGE_INTEGER pc; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&pc); + + return pc.QuadPart * 1000.0 / freq.QuadPart; +#else // _WIN32 + struct timeval tv; + gettimeofday(&tv, NULL); + + return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0; +#endif // _WIN32 #endif } @@ -68,18 +67,18 @@ void sleep(unsigned long long int milliseconds) #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL std::this_thread::sleep_for(std::chrono::milliseconds(milliseconds)); #else - #ifdef _WIN32 - Sleep(milliseconds); - #elif defined(__unix__) || defined(__APPLE__) - sleep(milliseconds * 0.001); - #elif _POSIX_TIMERS - struct timespec ts; - ts.tv_sec = milliseconds * 0.001; - ts.tv_nsec = 0; - nanosleep(&ts, &ts); - #else - // TODO How to handle it ? - #endif +#ifdef _WIN32 + Sleep(milliseconds); +#elif defined(__unix__) || defined(__APPLE__) + sleep(milliseconds * 0.001); +#elif _POSIX_TIMERS + struct timespec ts; + ts.tv_sec = milliseconds * 0.001; + ts.tv_nsec = 0; + nanosleep(&ts, &ts); +#else + // TODO How to handle it ? +#endif #endif return; } From be921fd0e3009f91de2f5af1c6186888968aa027 Mon Sep 17 00:00:00 2001 From: nihui Date: Tue, 16 May 2023 14:19:34 +0800 Subject: [PATCH 13/17] Update benchmark.cpp --- src/benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 4461fc9b956..d507b63c37c 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -13,6 +13,7 @@ // specific language governing permissions and limitations under the License. #include "benchmark.h" + #if (__cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)) && !defined(__riscv) && !NCNN_SIMPLESTL #include #include @@ -80,7 +81,6 @@ void sleep(unsigned long long int milliseconds) // TODO How to handle it ? #endif #endif - return; } #if NCNN_BENCHMARK From c0e5f516b11cdcb715c2d35728f33a9de3004afc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Tue, 30 May 2023 10:41:30 +0800 Subject: [PATCH 14/17] Add destroy_gpu_instance() function new parameter need_wait about it will wait for all devices to be idle before destroy --- src/gpu.cpp | 16 ++++++++++++++++ src/gpu.h | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index a09a5f74453..c37f949acbb 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -1697,6 +1697,22 @@ void destroy_gpu_instance() if ((VkInstance)g_instance == 0) return; + if (need_wait) + { + for (int i = 0; i < NCNN_MAX_GPU_COUNT; i++) + { + VulkanDevice* vulkan_device = g_default_vkdev[i]; + if (vulkan_device) + { + VkDevice vkdev = g_default_vkdev[i]->vkdevice(); + if (vkdev) + { + vkDeviceWaitIdle(vkdev); + } + } + } + } + // NCNN_LOGE("destroy_gpu_instance"); glslang::FinalizeProcess(); diff --git a/src/gpu.h b/src/gpu.h index 345329f7d47..df663b26097 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -41,7 +41,8 @@ NCNN_EXPORT VkInstance get_gpu_instance(); // Destroy VkInstance object and free the memory of the associated object // Usually called in the destructor of the main program exit -NCNN_EXPORT void destroy_gpu_instance(); +// If need_wait != 0, it will wait for all devices to be idle before destroy +NCNN_EXPORT void destroy_gpu_instance(int need_wait = 0); // instance extension capability extern int support_VK_KHR_external_memory_capabilities; From 7ba55f978c6fcae8f768e530d0ceb00a95e63297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Tue, 30 May 2023 10:50:39 +0800 Subject: [PATCH 15/17] Fix build error --- src/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index c37f949acbb..db2ddb44042 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -1690,7 +1690,7 @@ VkInstance get_gpu_instance() return (VkInstance)g_instance; } -void destroy_gpu_instance() +void destroy_gpu_instance(int need_wait) { MutexLockGuard lock(g_instance_lock); From 6ffb1f8c21f3bef9b53f2799b985c5c413441003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Tue, 30 May 2023 16:24:25 +0800 Subject: [PATCH 16/17] Fix build error for pybind --- python/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index ef65dc243a3..34f6d202cf2 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -1208,7 +1208,7 @@ PYBIND11_MODULE(ncnn, m) #if NCNN_VULKAN m.def("create_gpu_instance", &create_gpu_instance); - m.def("destroy_gpu_instance", &destroy_gpu_instance); + m.def("destroy_gpu_instance", &destroy_gpu_instance, py::arg("need_wait") = 0); m.def("get_gpu_count", &get_gpu_count); m.def("get_default_gpu_index", &get_default_gpu_index); m.def("get_gpu_info", &get_gpu_info, py::arg("device_index") = 0, py::return_value_policy::reference); From 70f1169f283d57f8b2c4eb6b9f51dd99ea5e3947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E5=B0=8F=E5=87=A1?= <2672931+whyb@users.noreply.github.com> Date: Sun, 7 Apr 2024 17:52:16 +0800 Subject: [PATCH 17/17] destroy_gpu_instance() will internally ensure that all vulkan devices are idle before proceeding with destruction. --- python/src/main.cpp | 2 +- src/gpu.cpp | 17 +++++++---------- src/gpu.h | 4 ++-- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 42ee1050a04..a7ed0528c6a 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -1212,7 +1212,7 @@ PYBIND11_MODULE(ncnn, m) #if NCNN_VULKAN m.def("create_gpu_instance", &create_gpu_instance, py::arg("driver_path") = ((const char*)0)); - m.def("destroy_gpu_instance", &destroy_gpu_instance, py::arg("need_wait") = 0); + m.def("destroy_gpu_instance", &destroy_gpu_instance); m.def("get_gpu_count", &get_gpu_count); m.def("get_default_gpu_index", &get_default_gpu_index); m.def("get_gpu_info", &get_gpu_info, py::arg("device_index") = 0, py::return_value_policy::reference); diff --git a/src/gpu.cpp b/src/gpu.cpp index d709bb3fe8c..77a63f4b3be 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -2084,25 +2084,22 @@ VkInstance get_gpu_instance() return (VkInstance)g_instance; } -void destroy_gpu_instance(int need_wait) +void destroy_gpu_instance() { MutexLockGuard lock(g_instance_lock); if (g_instance.created == 0) return; - if (need_wait) + for (int i = 0; i < NCNN_MAX_GPU_COUNT; i++) { - for (int i = 0; i < NCNN_MAX_GPU_COUNT; i++) + VulkanDevice* vulkan_device = g_default_vkdev[i]; + if (vulkan_device) { - VulkanDevice* vulkan_device = g_default_vkdev[i]; - if (vulkan_device) + VkDevice vkdev = g_default_vkdev[i]->vkdevice(); + if (vkdev) { - VkDevice vkdev = g_default_vkdev[i]->vkdevice(); - if (vkdev) - { - vkDeviceWaitIdle(vkdev); - } + vkDeviceWaitIdle(vkdev); } } } diff --git a/src/gpu.h b/src/gpu.h index 7c5cf556eed..4d131f71c8b 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -37,8 +37,8 @@ NCNN_EXPORT VkInstance get_gpu_instance(); // Destroy VkInstance object and free the memory of the associated object // Usually called in the destructor of the main program exit -// If need_wait != 0, it will wait for all devices to be idle before destroy -NCNN_EXPORT void destroy_gpu_instance(int need_wait = 0); +// The function will internally ensure that all vulkan devices are idle before proceeding with destruction. +NCNN_EXPORT void destroy_gpu_instance(); // vulkan core extern PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers;