⬆️ [email protected] (#722)

* Update TF to 2.8 * Always allocate im2col tensor in prepare stage to avoid errors * Various Bazel fixes * Update TF submodule * Update Android build tools version * Apply xnnpack armhf fix * More Bazel MLIR tablegen fixes * Update test case because of randomized output order * Update PassRegistration constructors * Remove DecodeConstantPass functionality * getValue -> getValues * Update PassRegistration constructors (part 2) * Update converter passes to match with TF's passes * Update flatbuffer options to toco options See tensorflow/tensorflow@b2b7933 * Fixes for padding in tablegen and .cc MLIR files * Fix NHWC constant * Fix linking issue with LarqDialect object * Update MLIR passes based on latest version from TF sources * Rename const dense to artih.const dense * Change tf.resource to tf_type.resource * Rename ConstantOp to Arith_ConstantOp in td files * More constant -> arith.constant fixes * Use `std.constant` for none values This doesn't make the unittests pass yet, but it fixes the `arith.constant` verification error: ``` unexpected error: 'arith.constant' op value must be an integer, float, or elements attribute. ``` * Fix optimization unittest * Fix `prepare-tf` test case * Implement custom constant materializer to fix constant folding * Fix compiler warning * Update submodule to `v2.8.0` tag * Pin flatbuffers on CI * Add `lq.quantize(tfl.dequantize(x))` -> `lq.quantize(x)` pattern * Sync `tf_tfl_passes` with upstresam TF * Workaround end2end test failure by using random bias * Remove optimize `lq.quantize(tfl.dequantize(x))` in more cases * TF 2.8: Update converter to sync with upstream tensorflow (#723) * Update converter to sync with upstream tensorflow * Update larq_compute_engine/mlir/tf_to_tfl_flatbuffer.cc Co-authored-by: Tom Bannink <[email protected]> Co-authored-by: Tom Bannink <[email protected]> * Replace make build system with basic CMake * Fix the im2col_id issue properly this time * Pin manylinux2010 image to TF 2.8 * Pin Windows 2019 as build environment * Checkin TF Addons `manylinux2010` toolchain * Increase timeout for Windows release builds from 6 (default) to 10 hours * Benchmark app from CMake: change name and add missing source files * Remove timeout beyond the maximum 6 hours Co-authored-by: Lukas Geiger <[email protected]> Co-authored-by: Lukas Geiger <[email protected]> Co-authored-by: Tom Bannink <[email protected]>
larq · Apr 19, 2022 · 5572b76 · 5572b76
1 parent 35c0593
commit 5572b76
Show file tree

Hide file tree

Showing 58 changed files with 2,669 additions and 761 deletions.
diff --git a/.bazelversion b/.bazelversion
@@ -1 +1 @@
-3.7.2
+4.2.1
diff --git a/.github/tools/release_linux.sh b/.github/tools/release_linux.sh
@@ -9,7 +9,7 @@ bazel build :build_pip_pkg \
   --copt=-mavx \
   --distinct_host_configuration=false \
   --verbose_failures \
-  --crosstool_top=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11.2:toolchain
+  --crosstool_top=//third_party/toolchains/gcc7_manylinux2010-nvcc-cuda11:toolchain
 
 # Package Whl
 bazel-bin/build_pip_pkg artifacts

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -210,7 +210,7 @@ jobs:
         run: |
           docker run -e LCE_RELEASE_VERSION=${{ github.event.inputs.version }} \
             -v ${PWD}:/compute-engine -w /compute-engine \
-            tensorflow/build:latest-python${{ matrix.python-version }} \
+            tensorflow/build:2.8-python${{ matrix.python-version }} \
             .github/tools/release_linux.sh
 
           sudo apt-get -y -qq install patchelf --no-install-recommends
@@ -228,7 +228,7 @@ jobs:
 
   windows-release-wheel:
     name: Build release wheels for Windows
-    runs-on: windows-latest
+    runs-on: windows-2019
     strategy:
       matrix:
         python-version: [3.7, 3.8, 3.9]

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -25,8 +25,12 @@ jobs:
         run: pip install numpy --no-cache-dir
       - name: Run C++ Unit Tests
         run: bazelisk test larq_compute_engine/tests:cc_tests --copt=-O2 --distinct_host_configuration=false --test_output=all
-      - name: Build TF Lite Static Library with Make
-        run: larq_compute_engine/tflite/build_make/build_lce.sh --native
+      - name: Build TF Lite Static Library with CMake
+        run: |
+          mkdir build
+          cd build
+          cmake ..
+          make -j2
 
   ARM:
     runs-on: ubuntu-latest
@@ -83,7 +87,7 @@ jobs:
         if: github.ref != 'refs/heads/main'
         shell: bash
       - name: Install pip dependencies
-        run: pip install tensorflow-cpu~=2.6.2 larq~=0.11 larq_zoo~=2.0 pytest tensorflow_datasets~=4.2 flatbuffers tqdm --no-cache-dir
+        run: pip install tensorflow-cpu~=2.8.0 larq~=0.11 larq_zoo~=2.0 pytest tensorflow_datasets~=4.4 flatbuffers==1.12 tqdm --no-cache-dir
       - name: Run Interpreter test
         run: bazelisk test larq_compute_engine/tflite/tests:interpreter_test --test_output=all
       - name: Run FileCheck tests
@@ -97,15 +101,15 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        tf-version: [1.14.0, 1.15.5, 2.0.4, 2.1.3, 2.2.2, 2.3.2, 2.4.1, 2.5.0, 2.6.2, 2.7.0]
+        tf-version: [1.14.0, 1.15.5, 2.0.4, 2.1.4, 2.2.3, 2.3.3, 2.4.4, 2.5.3, 2.6.3, 2.7.1, 2.8.0]
     if: "!contains(github.event.head_commit.message, 'ci-skip')"
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v3
         with:
           python-version: 3.7
       - name: Install dependencies
-        run: pip install tensorflow==${{matrix.tf-version}} larq~=0.11 larq_zoo~=2.0 tensorflow_datasets==1.3.2 packaging flatbuffers --no-cache-dir
+        run: pip install tensorflow==${{matrix.tf-version}} larq~=0.11 larq_zoo~=2.0 tensorflow_datasets==1.3.2 packaging flatbuffers==1.12 --no-cache-dir
       - name: Run Converter test
         run: PYTHONPATH=./ python larq_compute_engine/mlir/python/converter_test.py
 

diff --git a/.gitignore b/.gitignore
@@ -16,7 +16,8 @@ node_modules
 __pycache__
 *.swp
 .vscode/
-cmake_build/
+cmake_build*
+cmake-build*
 tensorflow/contrib/cmake/_build/
 .idea/**
 /build/

diff --git a/.tensorflow.bazelrc b/.tensorflow.bazelrc
@@ -136,6 +136,7 @@ build:elinux_aarch64 --config=elinux
 build:elinux_aarch64 --cpu=aarch64
 build:elinux_armhf --config=elinux
 build:elinux_armhf --cpu=armhf
+build:elinux_armhf --copt -mfp16-format=ieee
 
 # Address sanitizer
 # CC=clang bazel build --config asan

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,96 @@
+cmake_minimum_required(VERSION 3.16)
+project(larq_compute_engine C CXX)
+
+# Options and their default values
+option(COMPILE_EXAMPLE "Enable compilation of the minimal example" ON)
+option(COMPILE_BENCHMARK "Enable compilation of the benchmarking utility" ON)
+
+# TensorFlow dependency, see https://www.tensorflow.org/lite/guide/build_cmake
+set(TENSORFLOW_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/third_party/tensorflow/")
+set(TFLITE_SOURCE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow/lite")
+add_subdirectory("${TFLITE_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/tensorflow-lite" EXCLUDE_FROM_ALL)
+
+# Generic compilation options and settings
+set(CMAKE_CXX_STANDARD 14)
+include_directories(${CMAKE_CURRENT_LIST_DIR})
+
+# The LCE core files
+set(LCE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/larq_compute_engine")
+set(LCE_CORE_SRCS
+        ${LCE_SOURCE_DIR}/tflite/kernels/bconv2d.cc
+        ${LCE_SOURCE_DIR}/tflite/kernels/bmaxpool.cc
+        ${LCE_SOURCE_DIR}/tflite/kernels/quantization.cc
+        )
+set(LCE_CORE_HDRS  # such that they can be discovered by IDEs such as CLion Visual Studio
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/kernel.h
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/kernel_4x2_portable.h
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/kernel_8x4x4_aarch64.h
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/kernel_8x4x1_aarch64.h
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/select_kernel.h
+        ${LCE_SOURCE_DIR}/core/indirect_bgemm/kernel_8x4x2_aarch64.h
+        ${LCE_SOURCE_DIR}/core/bmaxpool.h
+        ${LCE_SOURCE_DIR}/core/bitpacking/utils.h
+        ${LCE_SOURCE_DIR}/core/bitpacking/bitpack.h
+        ${LCE_SOURCE_DIR}/core/bitpacking/bitpack_aarch64.h
+        ${LCE_SOURCE_DIR}/core/types.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/optimized_indirect_bgemm.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/reference.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/optimized_bgemm.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/zero_padding_correction.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/params.h
+        ${LCE_SOURCE_DIR}/core/bconv2d/output_transform.h
+        ${LCE_SOURCE_DIR}/core/bgemm/kernels_common.h
+        ${LCE_SOURCE_DIR}/core/bgemm/ruy_trmul_params.h
+        ${LCE_SOURCE_DIR}/core/bgemm/kernels_aarch64.h
+        ${LCE_SOURCE_DIR}/core/bgemm/kernels.h
+        ${LCE_SOURCE_DIR}/core/bgemm/ruy_pack.h
+        ${LCE_SOURCE_DIR}/core/bgemm/kernels_arm32.h
+        ${LCE_SOURCE_DIR}/core/bgemm/bgemm.h
+        ${LCE_SOURCE_DIR}/tflite/kernels/lce_ops_register.h
+        ${LCE_SOURCE_DIR}/tflite/kernels/utils.h
+        )
+
+# The example application
+if(COMPILE_EXAMPLE)
+    set(LCE_EXAMPLE_SRCS ${CMAKE_CURRENT_LIST_DIR}/examples/lce_minimal.cc)
+    add_executable(example ${LCE_CORE_SRCS} ${LCE_CORE_HDRS} ${LCE_EXAMPLE_SRCS})
+    target_link_libraries(example tensorflow-lite)
+endif()
+
+# The benchmarking binary
+if(COMPILE_BENCHMARK)
+    set(LCE_BENCHMARK_SRCS
+            ${LCE_SOURCE_DIR}/tflite/benchmark/lce_benchmark_tflite_model.cc
+            ${LCE_SOURCE_DIR}/tflite/benchmark/lce_benchmark_main.cc
+            )
+    set(LCE_BENCHMARK_HRDS
+            ${LCE_SOURCE_DIR}/tflite/benchmark/lce_benchmark_tflite_model.h
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_model.h
+            )
+    set(TFLITE_BENCHMARK_SRCS  # from ${TFLITE_SOURCE_DIR}/tools/benchmark/CMakeLists.txt
+            ${TENSORFLOW_SOURCE_DIR}/tensorflow/core/util/stats_calculator.cc
+            ${TFLITE_SOURCE_DIR}/kernels/internal/utils/sparsity_format_converter.cc
+            ${TFLITE_SOURCE_DIR}/profiling/memory_info.cc
+            ${TFLITE_SOURCE_DIR}/profiling/memory_usage_monitor.cc
+            ${TFLITE_SOURCE_DIR}/profiling/profile_summarizer.cc
+            ${TFLITE_SOURCE_DIR}/profiling/profile_summary_formatter.cc
+            ${TFLITE_SOURCE_DIR}/profiling/time.cc
+            ${TFLITE_SOURCE_DIR}/tools/command_line_flags.cc
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_model.cc
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_performance_options.cc
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_tflite_model.cc
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/benchmark_utils.cc
+            ${TFLITE_SOURCE_DIR}/tools/benchmark/profiling_listener.cc
+            ${TFLITE_SOURCE_DIR}/tools/delegates/default_execution_provider.cc
+            ${TFLITE_SOURCE_DIR}/tools/delegates/delegate_provider.cc
+            ${TFLITE_SOURCE_DIR}/tools/delegates/xnnpack_delegate_provider.cc
+            ${TFLITE_SOURCE_DIR}/tools/evaluation/utils.cc
+            ${TFLITE_SOURCE_DIR}/tools/tool_params.cc
+            )
+    add_executable(lce_benchmark_model
+            ${TFLITE_BENCHMARK_SRCS}
+            ${LCE_CORE_SRCS} ${LCE_CORE_HDRS}
+            ${LCE_BENCHMARK_SRCS} ${LCE_BENCHMARK_HRDS}
+            )
+    target_link_libraries(lce_benchmark_model tensorflow-lite)
+endif()
diff --git a/WORKSPACE b/WORKSPACE
@@ -15,11 +15,12 @@ http_archive(
     patch_tool = "patch",
     patches = [
         "//third_party/tensorflow_patches:disable_forced_mkl.patch",
+        "//third_party/tensorflow_patches:fix_armhf_xnnpack.patch",
     ],
-    sha256 = "e68c1d346fc3d529653530ca346b2c62f5b31bd4fcca7ffc9c65bb39ab2f6ed3",
-    strip_prefix = "tensorflow-2.6.2",
+    sha256 = "66b953ae7fba61fd78969a2e24e350b26ec116cf2e6a7eb93d02c63939c6f9f7",
+    strip_prefix = "tensorflow-2.8.0",
     urls = [
-        "https://github.com/tensorflow/tensorflow/archive/v2.6.2.tar.gz",
+        "https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz",
     ],
 )