diff --git a/.github/workflows/scripts/common-utils.sh b/.github/workflows/scripts/common-utils.sh index a0da18266ff3d..aa81fd9043d76 100644 --- a/.github/workflows/scripts/common-utils.sh +++ b/.github/workflows/scripts/common-utils.sh @@ -155,6 +155,29 @@ function ci-docker-run-gpu { $@ } +function ci-docker-run-amdgpu { + for i in {0..9}; do + if xset -display ":$i" -q >/dev/null 2>&1; then + break + fi + done + + if [ $? -ne 0 ]; then + echo "No display!" + exit 1 + fi + + ci-docker-run \ + --device=/dev/kfd \ + --device=/dev/dri \ + --group-add=video \ + -e DISPLAY=:$i \ + -e GPU_TEST=ON \ + -e AMDGPU_TEST=ON \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + $@ +} + function setup-android-ndk-env { export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/android-sdk/ndk-bundle} export ANDROID_CMAKE_ARGS="-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DANDROID_NATIVE_API_LEVEL=29 -DANDROID_ABI=arm64-v8a" diff --git a/.github/workflows/scripts/unix-build.sh b/.github/workflows/scripts/unix-build.sh index 75ddafb970530..31cd8546c3279 100755 --- a/.github/workflows/scripts/unix-build.sh +++ b/.github/workflows/scripts/unix-build.sh @@ -7,19 +7,26 @@ set -ex [[ "$IN_DOCKER" == "true" ]] && cd taichi if [[ $OSTYPE == "linux-"* ]]; then - if [ ! -d ~/taichi-llvm-15 ]; then - pushd ~ - if [ -f /etc/centos-release ] ; then - # FIXIME: prebuilt llvm15 on ubuntu didn't work on manylinux image of centos. Once that's fixed, remove this hack. - wget https://github.com/ailzhang/torchhub_example/releases/download/0.3/taichi-llvm-15-linux.zip - else - wget https://github.com/taichi-dev/taichi_assets/releases/download/llvm15/taichi-llvm-15-linux.zip + if [ ! -z "$AMDGPU_TEST" ]; then + sudo ln -s /usr/bin/clang++-10 /usr/bin/clang++ + sudo ln -s /usr/bin/clang-10 /usr/bin/clang + sudo ln -s /usr/bin/ld.lld-10 /usr/bin/ld.lld + export LLVM_DIR="/taichi-llvm-15.0.0-linux" + else + if [ ! -d ~/taichi-llvm-15 ]; then + pushd ~ + if [ -f /etc/centos-release ] ; then + # FIXIME: prebuilt llvm15 on ubuntu didn't work on manylinux image of centos. Once that's fixed, remove this hack. + wget https://github.com/ailzhang/torchhub_example/releases/download/0.3/taichi-llvm-15-linux.zip + else + wget https://github.com/taichi-dev/taichi_assets/releases/download/llvm15/taichi-llvm-15-linux.zip + fi + unzip taichi-llvm-15-linux.zip && rm taichi-llvm-15-linux.zip + popd fi - unzip taichi-llvm-15-linux.zip && rm taichi-llvm-15-linux.zip - popd + export LLVM_DIR="$HOME/taichi-llvm-15" fi - export LLVM_DIR="$HOME/taichi-llvm-15" elif [ "$(uname -s):$(uname -m)" == "Darwin:arm64" ]; then # The following commands are done manually to save time. if [ ! -d ~/taichi-llvm-15-m1 ]; then diff --git a/.github/workflows/scripts/unix_test.sh b/.github/workflows/scripts/unix_test.sh index 64d9b1d36a0ea..a2f41fd2d9d34 100755 --- a/.github/workflows/scripts/unix_test.sh +++ b/.github/workflows/scripts/unix_test.sh @@ -14,6 +14,11 @@ setup_python [[ "$IN_DOCKER" == "true" ]] && cd taichi +if [ ! -z "$AMDGPU_TEST" ]; then + sudo chmod 666 /dev/kfd + sudo chmod 666 /dev/dri/* +fi + python3 -m pip install dist/*.whl if [ -z "$GPU_TEST" ]; then python3 -m pip install -r requirements_test.txt @@ -103,6 +108,9 @@ if [ -z "$GPU_TEST" ]; then fi python3 tests/run_tests.py -vr2 -t4 -k "not paddle" -a "$TI_WANTED_ARCHS" fi +elif [ ! -z "$AMDGPU_TEST" ]; then + run-it cpu $(nproc) + # run-it amdgpu 4 else run-it cuda 8 run-it cpu $(nproc) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 848ec4603649d..a107f7696d96e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -268,6 +268,74 @@ jobs: path: taichi-release-tests/bad-compare/* retention-days: 7 + build_and_test_amdgpu_linux: + name: Build and Test (AMDGPU) + needs: check_files + timeout-minutes: ${{ github.event.schedule != '0 18 * * *' && 90 || 120 }} + + runs-on: [self-hosted, amdgpu] + + + steps: + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: '0' + + - name: Prepare Environment + run: | + . .github/workflows/scripts/common-utils.sh + prepare-build-cache + echo CI_DOCKER_RUN_EXTRA_ARGS="-v $(pwd):/home/dev/taichi" >> $GITHUB_ENV + + - name: Build & Install + run: | + [[ ${{needs.check_files.outputs.run_job}} == false ]] && exit 0 + . .github/workflows/scripts/common-utils.sh + + ci-docker-run-amdgpu --name taichi-build \ + registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \ + /home/dev/taichi/.github/workflows/scripts/unix-build.sh + + env: + PY: py38 + PROJECT_NAME: taichi + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_VULKAN:BOOL=OFF + -DTI_BUILD_TESTS:BOOL=ON + -DTI_WITH_CUDA:BOOL=OFF + + - name: Test + id: test + run: | + [[ ${{needs.check_files.outputs.run_job}} == false ]] && exit 0 + . .github/workflows/scripts/common-utils.sh + + ci-docker-run-amdgpu --name taichi-test \ + registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \ + /home/dev/taichi/.github/workflows/scripts/unix_test.sh + env: + PY: py38 + TI_WANTED_ARCHS: 'cpu,amdgpu' + TI_DEVICE_MEMORY_GB: '1' + TI_RUN_RELEASE_TESTS: '0' + + - name: Save wheel if test failed + if: failure() && steps.test.conclusion == 'failure' + uses: actions/upload-artifact@v3 + with: + name: broken-wheel + path: dist/* + retention-days: 7 + + - name: Save Bad Captures + if: failure() && steps.test.conclusion == 'failure' + uses: actions/upload-artifact@v3 + with: + name: bad-captures + path: taichi-release-tests/bad-compare/* + retention-days: 7 + build_and_test_windows: name: Build and Test Windows diff --git a/ci/Dockerfile.ubuntu.18.04.amdgpu b/ci/Dockerfile.ubuntu.18.04.amdgpu index 64aa4d29e53a6..c3eaf51a2371d 100644 --- a/ci/Dockerfile.ubuntu.18.04.amdgpu +++ b/ci/Dockerfile.ubuntu.18.04.amdgpu @@ -47,7 +47,7 @@ RUN apt-get update && \ # Install LLVM 15 WORKDIR / # Make sure this URL gets updated each time there is a new prebuilt bin release -RUN wget https://github.com/GaleSeLee/assets/releases/download/v0.0.1/taichi-llvm-15.0.0-linux.zip +RUN wget https://github.com/GaleSeLee/assets/releases/download/v0.0.2/taichi-llvm-15.0.0-linux.zip RUN unzip taichi-llvm-15.0.0-linux.zip && \ rm taichi-llvm-15.0.0-linux.zip ENV PATH="/taichi-llvm-15.0.0-linux/bin:$PATH" @@ -57,9 +57,7 @@ ENV CXX="clang++-10" # Create non-root user for running the container RUN useradd -m -s /bin/bash dev && \ - usermod -a -G video dev && \ - chmod 666 /dev/kfd && \ - chmod 666 /dev/dri/* + usermod -a -G video dev WORKDIR /home/dev USER dev diff --git a/ci/Dockerfile.ubuntu.20.04.amdgpu b/ci/Dockerfile.ubuntu.20.04.amdgpu index 586e7ab9add8b..f8d58dc117107 100644 --- a/ci/Dockerfile.ubuntu.20.04.amdgpu +++ b/ci/Dockerfile.ubuntu.20.04.amdgpu @@ -1,3 +1,6 @@ +// clang++-10 -> clang++ etc +// assets llvm v0.0.1 -> v0.0.2 +// apt install lld-10 # Taichi Dockerfile for development FROM rocm/dev-ubuntu-20.04:5.2 @@ -38,7 +41,7 @@ RUN apt-get update && \ # Install LLVM 15 WORKDIR / # Make sure this URL gets updated each time there is a new prebuilt bin release -RUN wget https://github.com/GaleSeLee/assets/releases/download/v0.0.1/taichi-llvm-15.0.0-linux.zip +RUN wget https://github.com/GaleSeLee/assets/releases/download/v0.0.2/taichi-llvm-15.0.0-linux.zip RUN unzip taichi-llvm-15.0.0-linux.zip && \ rm taichi-llvm-15.0.0-linux.zip ENV PATH="/taichi-llvm-15.0.0-linux/bin:$PATH"