From 4f8f4ade12885fbd0a0e95e0c76792e7f5e2505a Mon Sep 17 00:00:00 2001
From: Sukera <Seelengrab@users.noreply.github.com>
Date: Tue, 20 Jul 2021 09:49:43 +0200
Subject: [PATCH 01/65] Add isinf/isnan check for %g/%G formatting

(cherry picked from commit 860f99433210b2cc4b7ac8e9c4f4bfa1f9cdcf6d)
---
 stdlib/Printf/src/Printf.jl | 42 ++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index e3813af5dddca..b38298d35aceb 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -422,27 +422,31 @@ const __BIG_FLOAT_MAX__ = 8192
     elseif T == Val{'f'} || T == Val{'F'}
         newpos = Ryu.writefixed(buf, pos, x, prec, plus, space, hash, UInt8('.'))
     elseif T == Val{'g'} || T == Val{'G'}
-        # C11-compliant general format
-        prec = prec == 0 ? 1 : prec
-        # format the value in scientific notation and parse the exponent part
-        exp = let p = Ryu.writeexp(buf, pos, x, prec)
-            b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
-            Z = UInt8('0')
-            if b1 == UInt8('e')
-                # two-digit exponent
-                sign = b2 == UInt8('+') ? 1 : -1
-                exp = 10 * (b3 - Z) + (b4 - Z)
+        if isinf(x) || isnan(x)
+            newpos = Ryu.writeshortest(buf, pos, x, plus, space)
+        else
+            # C11-compliant general format
+            prec = prec == 0 ? 1 : prec
+            # format the value in scientific notation and parse the exponent part
+            exp = let p = Ryu.writeexp(buf, pos, x, prec)
+                b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
+                Z = UInt8('0')
+                if b1 == UInt8('e')
+                    # two-digit exponent
+                    sign = b2 == UInt8('+') ? 1 : -1
+                    exp = 10 * (b3 - Z) + (b4 - Z)
+                else
+                    # three-digit exponent
+                    sign = b1 == UInt8('+') ? 1 : -1
+                    exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                end
+                flipsign(exp, sign)
+            end
+            if -4 ≤ exp < prec
+                newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
             else
-                # three-digit exponent
-                sign = b1 == UInt8('+') ? 1 : -1
-                exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
             end
-            flipsign(exp, sign)
-        end
-        if -4 ≤ exp < prec
-            newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
-        else
-            newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
         end
     elseif T == Val{'a'} || T == Val{'A'}
         x, neg = x < 0 || x === -Base.zero(x) ? (-x, true) : (x, false)

From 642719e86f02d61371552d9fce927d2f4031ee8e Mon Sep 17 00:00:00 2001
From: Sukera <Seelengrab@users.noreply.github.com>
Date: Tue, 20 Jul 2021 09:54:39 +0200
Subject: [PATCH 02/65] Add test for Inf/NaN handling in %g/%G Printf

(cherry picked from commit 6a7c78b8a5ed0423fde478268bda42b4ff62f3ff)
---
 stdlib/Printf/test/runtests.jl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index f1438b0a0f2f1..5490bbf70e930 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -94,6 +94,15 @@ end
     @test Printf.@sprintf("%g", 123456.7) == "123457"
     @test Printf.@sprintf("%g", 1234567.8) == "1.23457e+06"
 
+    # %g regression gh #41631
+    for (val, res) in ((Inf, "Inf"),
+                       (-Inf, "-Inf"),
+                       (NaN, "NaN"),
+                       (-NaN, "NaN"))
+        @test Printf.@sprintf("%g", val) == res
+        @test Printf.@sprintf("%G", val) == res
+    end
+
     # zeros
     @test Printf.@sprintf("%.15g", 0) == "0"
     @test Printf.@sprintf("%#.15g", 0) == "0.00000000000000"

From 28caff0665214b09bebdadbd6273a05dbc22040b Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Thu, 5 Aug 2021 23:11:54 -0400
Subject: [PATCH 03/65] CI (Buildkite): on the `platform_linux` jobs, after
 building Julia, make sure that the working directory is clean (#41796)

(cherry picked from commit cb30aa7a089f4d5f1c9f834d96c3788f2e93ebcc)
---
 .buildkite/pipelines/main/platforms/linux64.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.buildkite/pipelines/main/platforms/linux64.yml b/.buildkite/pipelines/main/platforms/linux64.yml
index 1e88db1144b4c..44b0ebe29f557 100644
--- a/.buildkite/pipelines/main/platforms/linux64.yml
+++ b/.buildkite/pipelines/main/platforms/linux64.yml
@@ -35,6 +35,9 @@ steps:
       make release
       make install
 
+      echo "--- Make sure that the working directory is clean"
+      if [ -z "$(git status --short)" ]; then echo "INFO: The working directory is clean."; else echo "ERROR: The working directory is dirty."; echo "Output of git status:"; git status; exit 1; fi
+
       echo "--- Print Julia version info"
       ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
 

From f04c8827446e288077cbc5be3fed941005135e96 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Fri, 6 Aug 2021 22:11:27 -0400
Subject: [PATCH 04/65] CI: Disable Codecov commit statuses (#41812)

(cherry picked from commit 131b75b327557db49059aa20fc6404bc61b32148)
---
 .codecov.yml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 .codecov.yml

diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 0000000000000..35cde5cd5e854
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    project: off
+    patch: off

From f309ec192485f1caa26f63a4d0e15d72ce47c9e7 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Fri, 6 Aug 2021 22:46:50 -0400
Subject: [PATCH 05/65] CI (Buildbot): As soon as a PR is created or updated,
 create pending (yellow) commit statuses for all Buildbot jobs (#41811)

(cherry picked from commit 6fce8d50da57631e33ffc316c57a99bf5597d765)
---
 .github/CODEOWNERS             |  6 ++-
 .github/workflows/statuses.yml | 97 ++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/statuses.yml

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6cab5b68b11e9..5fc00a73b47d2 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,5 @@
-CODEOWNERS @JuliaLang/github-actions 
-/.github/ @JuliaLang/github-actions 
+CODEOWNERS @JuliaLang/github-actions
+/.github/ @JuliaLang/github-actions
 /.buildkite/ @JuliaLang/github-actions
+
+/.github/workflows/statuses.yml @DilumAluthge
diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
new file mode 100644
index 0000000000000..27422b4e69345
--- /dev/null
+++ b/.github/workflows/statuses.yml
@@ -0,0 +1,97 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# This is just a short-term solution until we have migrated all of CI to Buildkite.
+#
+# 1. TODO: delete this file once we have migrated all of CI to Buildkite.
+#
+# 2. TODO: disable GitHub Actions on the `JuliaLang/julia` repository once we have migrated all
+# of CI to Buildkite.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We do not run any external actions.
+
+name: Statuses
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+  # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
+  # write permissions, even if the PR is from a fork.
+  # Therefore, for security reasons, we do not checkout any code in this workflow.
+  pull_request:
+    branches:
+      - 'master'
+      - 'release-*'
+
+# These are the permissions for the `GITHUB_TOKEN` token.
+# We should only give the token the minimum necessary set of permissions.
+permissions:
+  statuses:            write
+  actions:             none
+  checks:              none
+  contents:            none
+  deployments:         none
+  issues:              none
+  discussions:         none
+  packages:            none
+  pull-requests:       none
+  repository-projects: none
+  security-events:     none
+
+jobs:
+  statuses:
+    name: statuses
+    runs-on: ubuntu-latest
+    if: github.repository == 'JuliaLang/julia'
+    strategy:
+      fail-fast: false
+    steps:
+      - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
+        if: github.event_name == 'pull_request'
+
+      - run: echo "SHA=${{ env.GITHUB_SHA }}" >> $GITHUB_ENV
+        if: github.event_name != 'pull_request'
+
+      - run: echo "The SHA is ${{ env.SHA }}"
+
+      # As we incrementally migrate individual jobs from Buildbot to Buildkite, we should
+      # remove them from the `context_list`.
+      - run: |
+          declare -a CONTEXT_LIST=(
+                "buildbot/package_freebsd64"
+                "buildbot/package_linux32"
+                "buildbot/package_linuxaarch64"
+                "buildbot/package_linuxarmv7l"
+                "buildbot/package_linuxppc64le"
+                "buildbot/package_macos64"
+                "buildbot/package_macosaarch64"
+                "buildbot/package_musl64"
+                "buildbot/package_win32"
+                "buildbot/package_win64"
+                "buildbot/tester_freebsd64"
+                "buildbot/tester_linux32"
+                "buildbot/tester_linux64"
+                "buildbot/tester_linuxaarch64"
+                "buildbot/tester_linuxarmv7l"
+                "buildbot/tester_linuxppc64le"
+                "buildbot/tester_macos64"
+                "buildbot/tester_macosaarch64"
+                "buildbot/tester_musl64"
+                "buildbot/tester_win32"
+                "buildbot/tester_win64"
+                )
+          for CONTEXT in "${CONTEXT_LIST[@]}"
+          do
+            curl \
+              -X POST \
+              -H "Authorization: token $GITHUB_TOKEN" \
+              -H "Accept: application/vnd.github.v3+json" \
+              -d "{\"context\": \"$CONTEXT\", \"state\": \"$STATE\"}" \
+            https://api.github.com/repos/JuliaLang/julia/statuses/${{ env.SHA }}
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STATE: "pending"

From d9159e471bb4a3414059f430d7cda66097dbdebd Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Fri, 6 Aug 2021 23:10:19 -0400
Subject: [PATCH 06/65] CI (Buildbot): for the "Create Pending Statuses" GitHub
 Actions workflow, use the `pull_request_target` event instead of the
 `pull_request` event (#41814)

(cherry picked from commit d62e2060584cd8005fbd04661952dff0618a6920)
---
 .github/workflows/statuses.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index 27422b4e69345..1c29288d4fe79 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -21,7 +21,7 @@ on:
   # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
   # write permissions, even if the PR is from a fork.
   # Therefore, for security reasons, we do not checkout any code in this workflow.
-  pull_request:
+  pull_request_target:
     branches:
       - 'master'
       - 'release-*'

From 80505366f68668400378cb3385e4345f60834284 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Fri, 6 Aug 2021 23:41:42 -0400
Subject: [PATCH 07/65] CI (Buildbot): Fix a bug in the "Create Pending
 Statuses" GitHub Actions workflow (#41815)

(cherry picked from commit ed866b5f0f36a245317d257d062bd000226bd18c)
---
 .github/workflows/statuses.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index 1c29288d4fe79..fa448e9d9fbf0 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -50,10 +50,10 @@ jobs:
       fail-fast: false
     steps:
       - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
-        if: github.event_name == 'pull_request'
+        if: github.event_name == 'pull_request_target'
 
       - run: echo "SHA=${{ env.GITHUB_SHA }}" >> $GITHUB_ENV
-        if: github.event_name != 'pull_request'
+        if: github.event_name != 'pull_request_target'
 
       - run: echo "The SHA is ${{ env.SHA }}"
 

From ccc994b59c3f1b9d6f2cfdd8a5212b56ec3560e5 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Sat, 7 Aug 2021 00:39:27 -0400
Subject: [PATCH 08/65] CI (Buildbot): a simpler way of getting the SHA for the
 "Statuses" action (#41817)

(cherry picked from commit 0e8bb9596e29703e65d11214284e2fcac1b19bda)
---
 .github/workflows/statuses.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index fa448e9d9fbf0..26ec245d1ad18 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -52,7 +52,7 @@ jobs:
       - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
         if: github.event_name == 'pull_request_target'
 
-      - run: echo "SHA=${{ env.GITHUB_SHA }}" >> $GITHUB_ENV
+      - run: echo "SHA=${{ github.sha }}" >> $GITHUB_ENV
         if: github.event_name != 'pull_request_target'
 
       - run: echo "The SHA is ${{ env.SHA }}"

From ff2d68d4ed9221b9b2b96015ae34715e40a32b41 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Sat, 7 Aug 2021 20:55:55 -0400
Subject: [PATCH 09/65] CI (Buildbot, GHA): in the "Statuses" workflow, we only
 need to create pending (yellow) statuses for the `tester_` jobs (#41822)

(cherry picked from commit 08fae511b9a616f40179f85f0d429b88fc5c815e)
---
 .github/workflows/statuses.yml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index 26ec245d1ad18..ca358e6f79487 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -61,16 +61,6 @@ jobs:
       # remove them from the `context_list`.
       - run: |
           declare -a CONTEXT_LIST=(
-                "buildbot/package_freebsd64"
-                "buildbot/package_linux32"
-                "buildbot/package_linuxaarch64"
-                "buildbot/package_linuxarmv7l"
-                "buildbot/package_linuxppc64le"
-                "buildbot/package_macos64"
-                "buildbot/package_macosaarch64"
-                "buildbot/package_musl64"
-                "buildbot/package_win32"
-                "buildbot/package_win64"
                 "buildbot/tester_freebsd64"
                 "buildbot/tester_linux32"
                 "buildbot/tester_linux64"

From 9ffd29a59eb189337d37fe19570c9a86ac7985b8 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Sat, 7 Aug 2021 23:02:29 -0400
Subject: [PATCH 10/65] CI (Buildbot, GHA): in the "Statuses" workflow, remove
 the `macosaarch64` and `musl64` statuses (#41824)

(cherry picked from commit bdd745722716693dc141545f7a83e59d1f13769c)
---
 .github/workflows/statuses.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index ca358e6f79487..a5e8161dd3b99 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -68,8 +68,6 @@ jobs:
                 "buildbot/tester_linuxarmv7l"
                 "buildbot/tester_linuxppc64le"
                 "buildbot/tester_macos64"
-                "buildbot/tester_macosaarch64"
-                "buildbot/tester_musl64"
                 "buildbot/tester_win32"
                 "buildbot/tester_win64"
                 )

From 6c2e1bbaceb09a321240862b98f50988380042fe Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Mon, 9 Aug 2021 00:35:47 -0400
Subject: [PATCH 11/65] CI (Buildbot, GHA): in the "Statuses" workflow, remove
 the `tester_linuxppc64le` status (#41831)

(cherry picked from commit 0d1e548d0a1fe9e0fcbe8c36caac2e344fc9a5fe)
---
 .github/workflows/statuses.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index a5e8161dd3b99..431b9fdfd89bf 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -66,7 +66,6 @@ jobs:
                 "buildbot/tester_linux64"
                 "buildbot/tester_linuxaarch64"
                 "buildbot/tester_linuxarmv7l"
-                "buildbot/tester_linuxppc64le"
                 "buildbot/tester_macos64"
                 "buildbot/tester_win32"
                 "buildbot/tester_win64"

From d224a1a9d3210f0001dbd456c41190c0cbfb8864 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Mon, 9 Aug 2021 01:20:13 -0400
Subject: [PATCH 12/65] CI (Buildbot, GHA): In the "Statuses" workflow, remove
 the `tester_linuxarmv7l` status (#41832)

(cherry picked from commit 9665d2a9248d7c7cb228a9ee0d9282041c337989)
---
 .github/workflows/statuses.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index 431b9fdfd89bf..0fcb8b37586e4 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -65,7 +65,6 @@ jobs:
                 "buildbot/tester_linux32"
                 "buildbot/tester_linux64"
                 "buildbot/tester_linuxaarch64"
-                "buildbot/tester_linuxarmv7l"
                 "buildbot/tester_macos64"
                 "buildbot/tester_win32"
                 "buildbot/tester_win64"

From 37f4dbc473fc4f8a7f7bb60cd0087e2d6d3a8ce1 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Mon, 9 Aug 2021 03:57:21 -0400
Subject: [PATCH 13/65] CI (Buildbot, GHA): add a note about token permissions
 (#41825)

(cherry picked from commit 7005b7d68be7e6dddb6fba70de855df0abdf3c22)
---
 .github/workflows/statuses.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index 0fcb8b37586e4..df86caa3acee3 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -10,6 +10,7 @@
 # Here are some steps that we take in this workflow file for security reasons:
 # 1. We do not checkout any code.
 # 2. We do not run any external actions.
+# 3. We only give `GITHUB_TOKEN` the minimum necessary set of permissions.
 
 name: Statuses
 

From 929ea9f41b98e5c7d51adbe9eb12430af6b2c9ba Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Tue, 10 Aug 2021 08:02:17 +0200
Subject: [PATCH 14/65] Include an additional patch for OpenBLAS. (#41842)

Fixes dynamic arch detection on certain ARMv8 cores.
See https://github.com/xianyi/OpenBLAS/pull/3060

(cherry picked from commit cd6e67f38bc8ffcd4fbb05971452630609649a91)
---
 deps/checksums/openblas                       | 184 +++++++++---------
 deps/openblas.mk                              |   7 +-
 .../openblas-armv8-volatile-detecion.patch    |  23 +++
 stdlib/OpenBLAS_jll/Project.toml              |   2 +-
 4 files changed, 122 insertions(+), 94 deletions(-)
 create mode 100644 deps/patches/openblas-armv8-volatile-detecion.patch

diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 4b4d477ddc3d2..5a1668aa45254 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,92 +1,92 @@
-OpenBLAS.v0.3.13+6.aarch64-apple-darwin-libgfortran5.tar.gz/md5/db35f9fcf744a3d86c0a20b8ab39c7c1
-OpenBLAS.v0.3.13+6.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/45ffdac8aa4150e96981cdc3d32242ebf7b7987fed5408d6cd6a9a6518cf7b7f204ca2a4226a0837cae76c2d4d4822d4ae93d2de1164428830c04e2147976341
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran3.tar.gz/md5/aecd6bc6356314108c4ef2bfd768d006
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1f5e861d3e14d64d0574a4dde4909f21cc0c22d1bfe052aae93a7ba3b3a9e564e01a7e1c921cb6175ba251a8aadfb0c4fce345803966dd7452db3d289bf144d3
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran4.tar.gz/md5/7d20f2dd20459cee45adb024adb43efd
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/b06dcbf820f74e3e3cda4cfdf76113e7f518639642f141dac13ca19b05fec09160c3728fb4f7b001a9aa63300f6e289cd2126da1179ef4efce18e814d8c32bbf
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5f0a683b55fc2f5a4dcd134a0d03c175
-OpenBLAS.v0.3.13+6.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/89c25976dd89e2e1c856790aaa4d0951f912fd7ded92223952316e40b08e4b9d9218b25a35cf9ab19021b356ccbb72c8bab2237bc62b8dac37317abe31edff6d
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran3.tar.gz/md5/5a7815b5981d30b89cb48a3e3bbf8f4d
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran3.tar.gz/sha512/358b7d25a069d50434b6621d1831903b88f6e120f10b5978235cc82f795da4d31ca4e6d02eb5eb1fd5587085828e95835e2ad84b2042865c552c5493cc272227
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran4.tar.gz/md5/02062032841900e941cfc66a0ef94dae
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran4.tar.gz/sha512/86f3072c3b8e36f3b33d90da755bf9d2a95ba0317852eaf1c74deb8a0f62a2b5c19a3b1d551c054536277da50865ef341c5e05fbab195edc4cd1fb160b4203b8
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran5.tar.gz/md5/ec50a9a3e5078726d6e3dd011b7a4713
-OpenBLAS.v0.3.13+6.aarch64-linux-musl-libgfortran5.tar.gz/sha512/548d4b893648de6c1a3d6b24f4531c4b190afc338887d1b8eb9040a9aae72cf846127e9c545841568a2f358f090451e13cf12a191456d4c27431661ca41f6e10
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/15cb058e906a1f042d51e8dcc44dac90
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/d74ba1d83199259b07787424832318e31013384d4f7217f6d7adb47dcbfe0836147997c044c8ca7a27b5a5eea435948a42d7f81a38014b7f7b3f4fb049e3578b
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/14b991b59eb27538331fae0544130d8a
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/d2c194692265325f9e5b6c09c23d7dcb45f1f2ce88edf2fbe6f9b021bfedf6b0c7c4b94a7ff5aa7095b7a131870759cd81ec80369e315660f5cbb0ac1c133e76
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/161a6506630eb035bc6afae69aea91dd
-OpenBLAS.v0.3.13+6.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/f64ce6bbaac4e16d5956b6298204628648b35e76f14a528aa8df815b0021053e4e1963438edc7e5f66fd82ea1e1d7bc38b14c52ad0ea7b90eeb1ee59d0927fd8
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b4d102165aff04f4a3ff583c754ec90c
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/f488da922deaac3fa42f5637003c9dbfd943aa267104e6fce46b77fd9f10dfc580191bd5aa4c97bf5b41ad6a92fd669daca8b11479a3a7e28f41047826f0e6bd
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/9411f83736cbcef0b840914ace71d869
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/3b0c9077255fa639d8798193fb1c5fd8ad7824f58889d0c99b388b3ddc7622122387acc49fc29f7c5b5a62ff7dd2335a47b6e60c14d613ba37e11b79faddf7d2
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/1222f66dbd5eb8dc910efe04d37fb763
-OpenBLAS.v0.3.13+6.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/a747df8a04d50ef4a4b90bb66e682cd7414b6d2f0cd9577e25b18c80d36b599e9506e8fcf24729a8bc0f5ef464c57d86a87e1e74140597466dbd862eeb9a0b18
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/35fd828c77d3e1817bebef49aa045f02
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/fd4ce90ea21f64abde4497d3d6518c341383eae4c8f5052951b5c1469c87f1464cc1c57f7047bd4881b55d70d6453ef558e6d6e1986fe463a98a0567bbb876a5
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/deaa63f74369dbf358946c6796e8bd6b
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/7b16a7f5b5710de0b38122af6ed9e4a6b3ede4cd9c18c79314fbde366ca92c2dae17d1ab9e43213b5a6f80470455afbb06d54ff326e0404d60f5454164f2c62a
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/450506080f49538628dc2407461b894d
-OpenBLAS.v0.3.13+6.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/95dc7f14c1b1f450de59a3f95673dc510bcd0e38b6d82a8657d4dbdd97158d2095002a61ecb4a4c514e530c0a9879afd232f24a71561e8516683c564406a0a55
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/cadda67c770ea3835170c63cf5c1a93f
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ccd326df1d3ce8e138fc22db37880a0f15b3b5740b75f4d6e54c6496735dea48d1011c31d0fbf6fcaf7f4ccc565cb2aa59bac473b9b12251da1adaa992998373
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/e89c9935ed19d9b6bedd1b70cbe1ea27
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/d537e954d424240315280fe632bfa83088825dd770042a750448e1553b2887a8c3d4edf193c89d2bccb7b0c3eae560937156eb989373accca1dbecee47e32cc4
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/7072bd88910ce5402e18527f178dcd56
-OpenBLAS.v0.3.13+6.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/0e4e038f317faa7a14cc29267202ad781a2551ef444b27f841ad2a39f5fb5032d20d50749d1b5a925e6552247aca40d84a1464c268022d8b9560c6e6fcf9a9bd
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran3.tar.gz/md5/261636c2b2b3a734e0d054b67fc0e617
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran3.tar.gz/sha512/0777c0cccb6f688024756e12f8a09ca107cf6f2408d04fb1efeae67299eb8de834de158b9ada232e3e50d4bb0481810181c54f6b63238ba8d4f1a779bf30ceab
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran4.tar.gz/md5/af9998d911a0919bbc611279f6957d8f
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran4.tar.gz/sha512/639d0d837dd62f4eff32071e2ef5d95d3d1a80995dc9da0a97e0a2f8bedf4637e3082acec309744d0d36dca8e82b3f7bf792ffb9ba47c18d8b9a44aa0f368adf
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran5.tar.gz/md5/7bee1a7c3470c32c10e3776289ce730f
-OpenBLAS.v0.3.13+6.i686-linux-gnu-libgfortran5.tar.gz/sha512/ff76d5fc5ff2432dfcd9a36cfb95943fecab3e75153c12260b729a89c6bc2269f7f0ad256f6334d58445de27d32f6073e830cee4a59e9196a0b7395c3a3b7ab0
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran3.tar.gz/md5/362e299c65ed4011563caf8555f55738
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran3.tar.gz/sha512/45eeae6bc817e8d78c0daa69ca2add3c32d714766e1e1341d14c445a1beb5a5a7ae93e88649c9a62f07c5463b6ee300b60acc06d9d29974cc6725d08d9df66d9
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran4.tar.gz/md5/791075ccd19280d58209f48b487ec42b
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran4.tar.gz/sha512/44b9bf0b5d31048fe05f78a71fe9ddee799bd70f7586061fdd9a1390a894701eb96678ad9c332a21f2c2b079896924bee14d64ea89f6314babae1faac289d6eb
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran5.tar.gz/md5/712e9c7ef4640dbc150371ef3a10e249
-OpenBLAS.v0.3.13+6.i686-linux-musl-libgfortran5.tar.gz/sha512/3407fab09ae6e2b12c2b586915557d121bfa345a4bf66597bec2d5850ce33ad70dddb45ad08a975097e2a428e65abffdbd9747f1b46fa944bc52218798fd2e34
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran3.tar.gz/md5/93d7254e1e03f4ef1acb6b4e8d63c813
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran3.tar.gz/sha512/198d4d0455f981345f20ff4a196cca056fbd7c5fd4d6a2b11e0ec6ba695c362d309947b9fcc13a6c51a44cc3ea73e559c0246a98b26fd6baa6cf07a055f5c972
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran4.tar.gz/md5/728d9f80b9e6b5ecce0ffab86b7e1c52
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1b8fc2e3e14fb172ec7d99d5beef54bcabdc807318f1b0415f1bdf7bb97a1e49c20168a9bfc0e89f4f9367dfbd1011e3cffe74b515da53fce00f06896387ca72
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran5.tar.gz/md5/b989478ab0496a27daf87f8ebb396316
-OpenBLAS.v0.3.13+6.i686-w64-mingw32-libgfortran5.tar.gz/sha512/c56ae711ecc9c6fe9e65e7610011f7189ecda4c0e94cfdd6bb150a32eac6f3d2343c671005f4008873e2f026fa312ce0257716a47fb4e91f82a6d29013dfc303
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/194ec8e4078fc6624acfefb29a9a1177
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/ecdd5b17232ae08e76f6822ec52cc96e4b5cde0748baf799799aa7946966b61f83c5b1d8a70e4f14b4e074e13e0cc72f2261f2a304ab8d8be15e68a004210be1
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/f08aad57a0d92ba7811b40deb7c40e5a
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/f5759dfce2854f929a73e11253edd37e100b9437829eca893f97a2c08a7ebc7af4815f588466cc8230985932f47b150e671d3a822e8463c1461bc3ce698f222d
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/bf291c76d9c9642e6964141eb541e4e0
-OpenBLAS.v0.3.13+6.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/0792f5d3c4c7f1ff5f43bcf6aafc8547c742e969ef4fc056f098649f7d99470538827349e5f39f0ce81ac15ec992f11d93a78f1ea9673a67ec076787b6d7b9c5
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran3.tar.gz/md5/9a1979528b2b54df3012e2182b834bbd
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/1752e0ee45107eec916a42370e19b6091b41423eb0f9443f23f78c3e8dd8db5fa0b8b72f5edf2d26e759e0f44056034dde1bce38b9c12f58d6c931ec873bd67c
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran4.tar.gz/md5/1b30b010ee8ecf949d83d98be7cd59a0
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/bab954ecbc2e9ece41807409bfef66063dc98cc7fbdbb0bbce24a331d5b121b0c63432a13cea935c5c27090f790e9fba599e1c129e0005656952805260732da6
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran5.tar.gz/md5/da031443b1bd5ed8abb8e956a05c616c
-OpenBLAS.v0.3.13+6.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/0009d10265ff16603c8552663b3c71ab619905b18fe87119a3203fe24d531148b8b18f727260fc125362c58a6226d1dca98a6517e9b7a93418a2cdbb2c66806e
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran3.tar.gz/md5/133b638a2efa22381cd70abe871e6ebe
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/98067cbaf1f5cf4a6ba01cf09ec9de044c04007f3a1953e51a75439cfb7215caa5b1a7f1b848b216926231a9511c45e78ba78abd39da06c6fbec4ce9542890f2
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran4.tar.gz/md5/3590e16f503a615a8c8886af39d3fd14
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/b7f3fd487e44a4f6cbbf035bc9fb433aa761f05bc1cf0c5351e6f9a9e5b80450ffbd11f86f904477c89aadbe24e22780ce108e228585e701d92141a735b454fd
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran5.tar.gz/md5/05472a418ff1d7f6bedb58894d6f5356
-OpenBLAS.v0.3.13+6.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/fc8a84b28db834b93a0c9a9c96ba22dfc6018cba90c0d43f4e1db7fcbda73c0aec04d7347db02b94df5375e785d447b3aeb993bf0ded69e5d43c2486c13b2aa5
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran3.tar.gz/md5/22200029744717079b3b8663d683273a
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran3.tar.gz/sha512/664bc2e95f10ac5668d51a2ffae488ad002f00995e5e7b620dd894e816bcaeeb7ccffb45f448365484f97f7aa5ac7b237ca1767e2a9421fd5c5fa39098c9fcb4
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran4.tar.gz/md5/b9fb6101fa172dd0f1a00c07673b308e
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran4.tar.gz/sha512/cf49792da8bc3e3a971b0361f2bdd835db46764c308d4ad0e20215c8bba5d6bd9b96e9e8fe2cdfb835bba4f21e62287f7b67245ff1d00a9ef3f9e44201b53412
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran5.tar.gz/md5/17c0ab204c65b252988bf873226f003d
-OpenBLAS.v0.3.13+6.x86_64-linux-musl-libgfortran5.tar.gz/sha512/02f493a6cb20c51c38203928a5a9e4890fc9285ce1907a552b61bd96bc64bc50a1932236d7617e83edc5ae1c40da84cc1d8db80c190605676869a8d1a57c4d7e
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/24e787f88452b2f304c269061ad07b0a
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/d45272120a6e15431b9a08afe5648afa903b588e2d65541f80ce123117dfc0e6d3b620ce4063211a420f1cfd398e969be69eb6a6302211fc368c4af3c9d6d3ef
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/9aa8cd86c2de41ed2ed47bccc315f19f
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/1c42e55fef774a34d3b0e0b0f899418a501cc9d56c4d38cfa0b4823a7622c7eb594f4ab222bd6994ba1c1eb7b69a37b10ec78b206a24d54276b03f69133b7b40
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/e09d926e41b3a52188cac7efe9d9aeed
-OpenBLAS.v0.3.13+6.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/eddc11f4b5535e629af6fe2705f24b142e457fd7721d6f9892e1c951d2722e996f32a59d05df803bc7a77c15ae011cc5f36a88709a7ebc9e6be00cd52789083b
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/5f09322a961185e965f8914b87fb769c
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/531860456a4604d7743b52632ca1562448e3b34015e0a7082935a12fe7537c3824fd6eca29813b8b28043c85db4c748ca2e42dfb443149e225b2ae1ebf641ece
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/68bf07ec07fab8eb000742f5b34a297a
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/5cf754e09737a9ccf67998a0dd64a6eb836784489b337bd9cd3379773ccc0d8261f6eb91ae6811dc45f3dd13480c6e0abc603f13add94bc5505ed4aa41e82951
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d30d1b10c1a98ecbed686a1d133f4abc
-OpenBLAS.v0.3.13+6.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/6a61cd1eb2b20f33bb6370d760cf98c8c3af2f323b3c83c866ab8d2e3010771da7345fccbbb94880ca0c0956b711d3127566f040bbb79166e281b9ea6d14b2c7
+OpenBLAS.v0.3.13+7.aarch64-apple-darwin-libgfortran5.tar.gz/md5/c9800f7e24105c92a5205c77ddbc4227
+OpenBLAS.v0.3.13+7.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/43703203a6d7c7aff8c8a257a2f5393d930e7e12064ff9a7890114dc250c28f0c73d735c4bbe09a811610cec6f4fe5079a806fb5898025c92067239dc70d6f62
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran3.tar.gz/md5/ffb49e047b54a68b9de82ac3ae64a94d
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/745f30477562ec0bcf754a36e0c0082a60afea518dbe9fc46f95ee7547c33eece8717f6c9ee54bf1420a221ab7fb779dd4ccc98ca74164315975b7ac8e8b6979
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran4.tar.gz/md5/ccdd01f36e19f8269750c93901c54bf7
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/ef619a6b5d140253a5f310f975d1aca378a8419f75b90713baf73aa1f3be9cdc43ae96a8fc40bc7d3502947bce87de4fd5d869f16220bde1ced4e6730437ea5e
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran5.tar.gz/md5/cbe589a28fa89287502eb591d26c0a1d
+OpenBLAS.v0.3.13+7.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7d894dda5e3eb124e17dcaa6ca7b02e29177f2ac678e18b6e57f6ce34c654c0dafb81fe470e93a42fa4b63f904b2cee00139cc149c7868d3f70b69313242187b
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran3.tar.gz/md5/ebf835b3264f4b63cedac8c10f65316c
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran3.tar.gz/sha512/2de3869ae6505789ff2ebbc83672ddb4f7a3f1610a88040ac9407dbafd23b7a0bdf19328964f4d901e7389636564fefb82442a1bb89a0d18e4a6441267d886b3
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran4.tar.gz/md5/3d20fe5e4c37e40eafd2a07ac5b11cf8
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran4.tar.gz/sha512/9dadabace2cec1260b2a02b42102453fa4c7e832c89d5c4f06b8a58ead30c2d926e6f615da03016aec596324619d34b7e25dec88e1dfa9725e420fbf6397c0ca
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran5.tar.gz/md5/53c4a96fb47aed019efc780d0e9e4929
+OpenBLAS.v0.3.13+7.aarch64-linux-musl-libgfortran5.tar.gz/sha512/2e3ebec1e64f29308d7e61e80ff95dd7ff45a7bf7696639f6b786990d4e183c5cb8357e65f16b9c98058ff8d197a3e1e7c355a304e56c644fbe8b321d8941988
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/59142e0c7a6e50c27e084f10982402c2
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/4e45b9a7087840edaf515af3a9ad15f6d5c2cbdb8be244b9c65542f6f6526e5f82fcd0939070b35a19a8ec1ea7c8886c82070359d74924b90f210ab3026b1d24
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/2c00dddb46cd74324c023d9cea5d12e0
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/e4b6b07016c59727be3cc01fbfe1ceb2fc19bbddf96bffbd8a0b8bbf10e30768a7d8ed04052cbc53093d8f6d6f8d57c0d05d6692ba456bb8aa31a0f736617d33
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/d98b02bba19262ebb4a6eae0971e19a8
+OpenBLAS.v0.3.13+7.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/f73413a520bf204d9c369dbc910e0df4b72e583ab31a66e5e64ae31e93464d6f8cc114be7c9f15738c900fa8762905f2c9ce198c45426eab22b119ac8fc489d3
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/5e34312709f68e06783e890d94447646
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/f127d0df4a0b4d91c810990108f0f3daf89373f29002249edd6225330bdc804a0a2282e05ab128b15ec58c75098213481e26bb0e9a5a2b642da9c43457b9f9f9
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/02d6189238fe4ef41e791f558324e32c
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/fd5d4c2f43c4e98f0c28afdc43f2dfc97123fbe723c577688acaab959e96980c14b0ae76d150807da1814f9ea55dbc21a9cce9454d52804ba815cf8026d17117
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/14d77e7332ed0949342a4b659ce98088
+OpenBLAS.v0.3.13+7.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/354113f9877ecb04dbdbc105991ee20c58d9bd49d499ee476af87135fbd277c7729e909e528843b5fd4ae2f076996ea74284ad384fc06584882ea5fdfd7d4206
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/b32a6caa13c26d964a456f440a2df0ee
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/4b19666ca3a32c1a145f89eed063cafea2eb468cafc3b47168977749a8e484e124b5008c58d00d40e4c7ba1b09669d7499fe259bd43c88ed0d064c326c85dea9
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/4b15013643544594e7a8a4948b9a4db3
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/2fd0e4d5ec22e3ef831a60909cbf803ba88e16b7ba12269cf22bd14808fb25dccad597e0690530b704dc971c681f1c075f3663f4d7cb05998835e584cb9779b8
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/7217154630d07a83fa84cbaf298d84e2
+OpenBLAS.v0.3.13+7.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/a9cded7959ef98027e1da87127464ac930dec3ba9c198296bf8c64918d31a8fcdd92d4d74e3aad2f3aff02553959e141986870873e01a5302c67e7efacc33616
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/65f601ff699b51719aacc4a495be5b82
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/c96e4fadf74eea0c74ccc7d57670222d6bc097ceecc38211a742bdaf8c48491266302a96a33317ab4e8e19669b41f3051c5ca409c076ae1993f5218fa30f2cd6
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/7d0f0e432a372ce94cd127153b8976ee
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/b017bb4ffe9caec8410c72a80c5307e81cbfe84e4c148f3f99ca774efd17b47378b24ce95cfe6df45e8ee890ff0c326d480fabfbac0e494e25f620e82fdbbb8e
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/398c894c743c181b4edcffebb5d91340
+OpenBLAS.v0.3.13+7.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/c4c7acae9dfb49252dab6a7d0217e8caa8df707da72fcab3dcb5ea6449e42effd1a9df18531e8077d91ad8fdb323482a7fa405eda1a4660f48e59927ef33f525
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran3.tar.gz/md5/ac7f07c75d419712e4ddad380fda0177
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran3.tar.gz/sha512/4522c5b5924ff976e58ee30bbd0bf933baa2f705e3d68a83aaeeabaa8cd4cacf41edf05612f4616052d78ce04e70d62a411f8e0cc7ab9ab3fbc56dbae2615b67
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran4.tar.gz/md5/08254b64198705d5062db0526d6d8cde
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran4.tar.gz/sha512/5d8c1062f15b11066d7400811492b8e2e1c33150bda4b74d7f9d7cd64529f0c0b89d7a1a2e644dc321be18fd3e3ba7dff92fe597f3d42aad5d903c8b26fa0e87
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran5.tar.gz/md5/5f610eff8a8e7b24868a2b915419d4a3
+OpenBLAS.v0.3.13+7.i686-linux-gnu-libgfortran5.tar.gz/sha512/8967a180b57ada258aea76a39b8c71b718314460eeef2bad1e407be0c60a54f04a0b733944feb036c5fd2d67a8f3761babef10530dd871006f7d7ba366b4c95b
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran3.tar.gz/md5/1867e826c4cdc6b6964bf2e7be43671e
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran3.tar.gz/sha512/c7e7206361a4ee379c40a483aae1dc5357b8ba77e561fbf961cd7269674705c83ba2b5b09891b4567a330f9621d5a7bc0c323c8ec9d2385222b4afb57e010f8e
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran4.tar.gz/md5/5a4511b55512387315e42555a1f35222
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran4.tar.gz/sha512/3381a32dd47d7106a131f8e6be89675c29b8ff496336e8f4dbc5bac13df3b1de9bd03a2c027b94ee43d66f4cb8fcab1545327552ac45280252046072dde3ebd7
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran5.tar.gz/md5/9766d92bb2a0bc7376d42537c0bff642
+OpenBLAS.v0.3.13+7.i686-linux-musl-libgfortran5.tar.gz/sha512/34149b0ae5ab26d4b5ec9f5a8a981eae713e355248c56af0d71eeb2720cd4e453d96fb9e885a48c94389759f5e015536b7a21d9720c6b46b167e6bbbccc717aa
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran3.tar.gz/md5/3e13fa70512da79cb19d15899e967e8b
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran3.tar.gz/sha512/c4e99d0bcfd9de4ddcdbf819872257a91e45cd778c7bf927b6a3c69e24d555d6a757236b08113160790a4a8fe0a3b258c17c8d43386a0d9ecf926e4e542b9a5b
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran4.tar.gz/md5/724db97fb05d2418325b84eff736b0d6
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4869bc155e2c5bbca2ea6a0107a2b12e7d7ec226dfa5ab81c8f46751f5e2a9342127c0c8ed24731cbfa05904db319ffdd907b248b725043d8bd089e911cf6808
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran5.tar.gz/md5/4fd5c17e2432dfbf2fbf6930c9b08617
+OpenBLAS.v0.3.13+7.i686-w64-mingw32-libgfortran5.tar.gz/sha512/92c4b8b61d9d4dfc8cc60c05cc1a8663e2cc33f3e0683d562f723939697237cfaf3925eb903ec2b443e1a943260c91e04029e582f9f07f2b3655ee5ed9812dad
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/6b509bdecd697472c5c96b947a3d016f
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/494b8fa97c383554edd62d04b985f2fe25f1365d149af041d1d2d9c617df0fe12fff271e21a3811205e05412229e2f55043876ba6b553f22a7616653928b1908
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/5be5d102fd4c0537403a0fa8266a95d2
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/2fb952a7da419d64f41b5fdff7c49e8e750f8724348180b52e61a50b4b55e5a3c3d072455d3ce870fb240b9c50c6f4572f6211813edb965ca41fa27efc85de6a
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/9bb7b2ac52789cd7eba17e4b564d95d8
+OpenBLAS.v0.3.13+7.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/a50700ab72ab169c91bd10eccb4a03231d0e223bcf3e158331af4a6805e2f17ab042eb6c9db335a800ab5b69554c0b9a5aa78c1f112c8b579f5148013afa15c2
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran3.tar.gz/md5/2bfd33949884b29e2621bd4db9ea790b
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/87e32e09c5fb6da7396a13646a19ddc9f0aa6070060fba1e4f4490fc447c041a0b6ae68cd338e1f1b2d610bf61e28274823c81ae229768d6825104337d82e3d5
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran4.tar.gz/md5/e0a3f443cd840d5a9cca3748c0fd247b
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/b4a55211a2a64fceb2eb89c1bbb06a5f658d4d81dcc6c67c1fc6a804ba16de8a05c341b81bae8b14788b85f772555785696a2b11b1cb6873ab9fbd174eebf1c1
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran5.tar.gz/md5/3acd992bd55a527ff96cbc1fd208300a
+OpenBLAS.v0.3.13+7.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/192300b6480efe523e3a196570743e3c091d9d29c6f16cb3f5f951b344e8ecc5a76f235c8fa2b2a9bd56c8dc394fca447c2bd1d4102ad7f9823ab71436154dbd
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran3.tar.gz/md5/35ede94f298e95ff772e3655692da81c
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/109fbc3ee23528640011df500f7843bec3f42d92b6be99e2101bfd0c6bb798d5c5a0c8a9d2567d11e50bdf54213347ea204c383660d2fd6445ae0735f210d211
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran4.tar.gz/md5/5d97a16fb7ba45d34aee0d1b55f81008
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/f957549494ec3c11fbb73cad4b43d8a39a7f0e509a3f797cd35898f40ed577aad27cc721a5720eb38e0ccd5064915a4ca93b67e517f7fa1cef86fbe3f88c2529
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran5.tar.gz/md5/85903515954b13d71dfc1cfed5364000
+OpenBLAS.v0.3.13+7.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/ee3fdeede35b52987565e55c96a1a47964cf1da52b25753302c2ac4671b2921a70656a5f9e39bf1b7db0d6037c305f66b4aa22529713239ca30c322f5c3d8f97
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran3.tar.gz/md5/b9a86f939ec7d76674d67653d3acdfee
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8a9d10605d9fad833ece78bc0c4690e43050e5e276b0c76ff194b617134e05261db2b385fe2c4b0b2313a3be107731b4cc71dae79d6e403e64a6ca635a3d4b47
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran4.tar.gz/md5/6a4a082c4d73cb6dad69460deb75afdd
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran4.tar.gz/sha512/8fa8c2f07ff044acc78caaf7e34f6e299567ac82436059c70d9a149ffd03d0d257a009592f845e22554a17dbd59fb13c23582a0e4131a186139c9be9c71b9f2d
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran5.tar.gz/md5/50148e4dc8881c9f1f4c2bc936c3f59e
+OpenBLAS.v0.3.13+7.x86_64-linux-musl-libgfortran5.tar.gz/sha512/df0019201f71a12867deb872d3a3f7b3c07919bb779b3f3f5612d52f9c02bba1c19e230c81c8421e32b5cf1140dc388f1dfae3379c081cecb4a516f1d28a3788
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/87b58444e006bb680e8139b2ee7bc998
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/c32638c812a3cc4e3c3916b6a9bf572ac202b2101946a18a7af89de1cd3171a44f9d9308a6de822bd703acd96533c963ad8a8205a1200039540191cd03fe1f6b
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/f8a07c961fa2b6d268ff1b3f4e02b729
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/b3faaa4155971047a8f58123da5dbf51cd6cac574a46e4a339b2be0570f1868fddbe9b46ce07f75fb561bb5da42bf19466035a05286832a85f1f39c58265bfb4
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/67d69caa1877b74ca652f0e96718ddde
+OpenBLAS.v0.3.13+7.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/bc72e2df6e0dec6962cd32b77f49ec93b05b34dbc95c0b0c31da6158d3a5f497f6321a895f3faa48c82ee956fbd8e82346bb8d8a20a9639e2e689f0b85ab5c2e
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/dc755ddb59511ef049daf85a38ef0804
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/fa5a559165b97cc5ca24d5df02dfeb1f97b8619512d8f99e3c8c8ee800aec0d3ca56ac4170569dafee7cec738e9abc5017958c153bd44d19d60fbecac53a0ee2
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/ff56c35358fb846827f8f869fdca4b21
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/c8c0029c9b95f5a11dfc84bd02b944ed96de5c96835b2e6dc5e8c401fc37b667f8ea956f3715794b01be68ea86e0c74b4ebd22f728d1f9777516bfdca944c1b3
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/e5bbc60da2dce3c4abe0583cff4b985e
+OpenBLAS.v0.3.13+7.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/344dac288566886013b59c40877b879578b11f57b90433e65909b56b7e06416c0ceffedf08cc3d05e08514a098309ae492fd57c30dc97737cdede1aebed2ea2d
diff --git a/deps/openblas.mk b/deps/openblas.mk
index a1ce15100ac4c..ee6712d883de6 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -118,7 +118,12 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-Only-filter-out-mavx-on-Sandybridge.pat
 		patch -p1 -f < $(SRCDIR)/patches/openblas-Only-filter-out-mavx-on-Sandybridge.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-Only-filter-out-mavx-on-Sandybridge.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-armv8-volatile-detecion.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-Only-filter-out-mavx-on-Sandybridge.patch-applied
+	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/openblas-armv8-volatile-detecion.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-armv8-volatile-detecion.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/patches/openblas-armv8-volatile-detecion.patch b/deps/patches/openblas-armv8-volatile-detecion.patch
new file mode 100644
index 0000000000000..f1fb36b6a34ab
--- /dev/null
+++ b/deps/patches/openblas-armv8-volatile-detecion.patch
@@ -0,0 +1,23 @@
+From 6fe0f1fab9d6a7f46d71d37ebb210fbf56924fbc Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Mon, 11 Jan 2021 19:05:29 +0100
+Subject: [PATCH] Label get_cpu_ftr as volatile to keep gcc from rearranging
+ the code
+
+---
+ driver/others/dynamic_arm64.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c
+index 4f1b12f27a..37c0694b6f 100644
+--- a/driver/others/dynamic_arm64.c
++++ b/driver/others/dynamic_arm64.c
+@@ -68,7 +68,7 @@ extern void openblas_warning(int verbose, const char * msg);
+ #endif
+ 
+ #define get_cpu_ftr(id, var) ({					\
+-		__asm__("mrs %0, "#id : "=r" (var));		\
++		__asm__ __volatile__("mrs %0, "#id : "=r" (var));		\
+ 	})
+ 
+ static char *corename[] = {
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index f7d4dcce97cd7..ac19d703cd523 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.13+6"
+version = "0.3.13+7"
 
 [deps]
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"

From 9a15a3558e807df1fe941be0554baa20253938cf Mon Sep 17 00:00:00 2001
From: Simeon Schaub <simeondavidschaub99@gmail.com>
Date: Tue, 10 Aug 2021 13:09:54 +0200
Subject: [PATCH 15/65] [IRShow] expose index information to postprinter
 (#41828)

I am experimenting with showing some other information besides just the
types here as well and I think this would be generally useful. This
passes that information as an `IOContext` as to not break any downstream
uses of this code.

(cherry picked from commit c2b4b382c11b5668cb9091138b1fa9178c47bff5)
---
 base/compiler/ssair/show.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 1f1c838c62ae7..a327de623bb09 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -628,7 +628,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
         if new_node_type === UNDEF # try to be robust against errors
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            line_info_postprinter(io, new_node_type, node_idx in used)
+            line_info_postprinter(IOContext(io, :idx => node_idx), new_node_type, node_idx in used)
         end
         println(io)
         i += 1
@@ -643,7 +643,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
             # This is an error, but can happen if passes don't update their type information
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            line_info_postprinter(io, type, idx in used)
+            line_info_postprinter(IOContext(io, :idx => idx), type, idx in used)
         end
     end
     println(io)

From 54ea26af63736e669de06119d5f1579ae104ae71 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Tue, 10 Aug 2021 13:33:04 -0400
Subject: [PATCH 16/65] CI (Buildbot, GHA): Simplify the `permissions` key in
 the workflow file for the "Statuses" workflow (#41851)

(cherry picked from commit 34dc0449ce5d6ffa1a936b38a64e3d1590ffff80)
---
 .github/workflows/statuses.yml | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
index df86caa3acee3..97ec290abe013 100644
--- a/.github/workflows/statuses.yml
+++ b/.github/workflows/statuses.yml
@@ -30,17 +30,7 @@ on:
 # These are the permissions for the `GITHUB_TOKEN` token.
 # We should only give the token the minimum necessary set of permissions.
 permissions:
-  statuses:            write
-  actions:             none
-  checks:              none
-  contents:            none
-  deployments:         none
-  issues:              none
-  discussions:         none
-  packages:            none
-  pull-requests:       none
-  repository-projects: none
-  security-events:     none
+  statuses: write
 
 jobs:
   statuses:

From 1b3fb60960320b33bf739591c884dfb6f712c964 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Tue, 10 Aug 2021 17:31:22 -0400
Subject: [PATCH 17/65] win: fix lock function call for backtrace collection
 (#41849)

Avoids an assert when this is triggered

(cherry picked from commit 2d8174254805b96ce936f233ce75264ad18051cf)
---
 src/signals-win.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/signals-win.c b/src/signals-win.c
index 984330dc434dc..5d1a963078df4 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -141,7 +141,7 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
                                               ct->gcstack);
         }
         else if (have_backtrace_fiber) {
-            JL_LOCK(&backtrace_lock);
+            JL_LOCK_NOGC(&backtrace_lock);
             stkerror_ctx = ctxThread;
             stkerror_ptls = ptls;
             jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);

From 7208ff233c564d7571313a52f7fc85c0b3def5bf Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 9 Aug 2021 06:16:34 -0500
Subject: [PATCH 18/65] Revert "Add vim bindings to TerminalMenus (#37940)"

This reverts commit 4a19b753804ed0b06fab9681309941862eaa7227.
Closes #41799.

(cherry picked from commit 702cf55497ca21274babc2708cd60535177bbb53)
---
 stdlib/REPL/src/TerminalMenus/AbstractMenu.jl |  6 +++---
 stdlib/REPL/test/TerminalMenus/runtests.jl    | 20 ++++---------------
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index ee5bd4d426795..f01df5c389324 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -203,9 +203,9 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Unio
             lastoption = numoptions(m)
             c = readkey(term.in_stream)
 
-            if c == Int(ARROW_UP) || c == Int('k')
+            if c == Int(ARROW_UP)
                 cursor[] = move_up!(m, cursor[], lastoption)
-            elseif c == Int(ARROW_DOWN) || c == Int('j')
+            elseif c == Int(ARROW_DOWN)
                 cursor[] = move_down!(m, cursor[], lastoption)
             elseif c == Int(PAGE_UP)
                 cursor[] = page_up!(m, cursor[], lastoption)
@@ -217,7 +217,7 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Unio
             elseif c == Int(END_KEY)
                 cursor[] = lastoption
                 m.pageoffset = lastoption - m.pagesize
-            elseif c == 13 || c == Int(' ') # <enter> or <space>
+            elseif c == 13 # <enter>
                 # will break if pick returns true
                 pick(m, cursor[]) && break
             elseif c == UInt32('q')
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index ac577dfd9ab27..62a91cc0a1256 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -6,22 +6,10 @@ using Test
 
 function simulate_input(expected, menu::TerminalMenus.AbstractMenu, keys...;
                         kwargs...)
-    keydict = Dict(:up => "\e[A",
-                   :down => "\e[B",
-                   :enter => "\r")
-    vimdict = Dict(:up => "k",
-                   :down => "j",
-                   :enter => " ")
-    errs = []
-    got = _simulate_input(keydict, deepcopy(menu), keys...; kwargs...)
-    got == expected || push!(errs, :arrows => got)
-    got = _simulate_input(vimdict, menu, keys...; kwargs...)
-    got == expected || push!(errs, :vim => got)
-    isempty(errs) || return errs
-end
+    keydict =  Dict(:up => "\e[A",
+                    :down => "\e[B",
+                    :enter => "\r")
 
-function _simulate_input(keydict, menu::TerminalMenus.AbstractMenu, keys...;
-                         kwargs...)
     for key in keys
         if isa(key, Symbol)
             write(stdin.buffer, keydict[key])
@@ -30,7 +18,7 @@ function _simulate_input(keydict, menu::TerminalMenus.AbstractMenu, keys...;
         end
     end
 
-    request(menu; suppress_output=true, kwargs...)
+    request(menu; suppress_output=true, kwargs...) == expected
 end
 
 include("radio_menu.jl")

From fc5df4462f4887988861fc9ba1accdf35de692d0 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Thu, 12 Aug 2021 13:29:12 -0400
Subject: [PATCH 19/65] CI (Buildkite): `llvmpasses`: use the latest rootfs
 image (#41864)

(cherry picked from commit 01d439e7e1d304903ccf2e37b47acea3d791238f)
---
 .buildkite/pipelines/main/misc/llvmpasses.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.buildkite/pipelines/main/misc/llvmpasses.yml b/.buildkite/pipelines/main/misc/llvmpasses.yml
index eed2f957855bb..a012ace41acff 100644
--- a/.buildkite/pipelines/main/misc/llvmpasses.yml
+++ b/.buildkite/pipelines/main/misc/llvmpasses.yml
@@ -33,8 +33,8 @@ steps:
       - JuliaCI/julia#v1:
           version: 1.6
       - staticfloat/sandbox#v1:
-          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
-          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "84a323ae8fcc724f8ea5aca5901bbbf4bda3e519"
           uid: 1000
           gid: 1000
           workspaces:

From ede08aa799161f6d74d065436a4ee8dbd09f2deb Mon Sep 17 00:00:00 2001
From: Ian Butterworth <i.r.butterworth@gmail.com>
Date: Sat, 14 Aug 2021 09:25:51 -0400
Subject: [PATCH 20/65] don't look for packages in code that isn't going to be
 run this eval (#41887)

(cherry picked from commit 19629d72702d908407eaa14769def8249e4a1e71)
---
 stdlib/REPL/src/REPL.jl  | 1 +
 stdlib/REPL/test/repl.jl | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index a661ffa218e97..cb28b2dc3b339 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -177,6 +177,7 @@ function check_for_missing_packages_and_run_hooks(ast)
 end
 
 function modules_to_be_loaded(ast, mods = Symbol[])
+    ast.head == :quote && return mods # don't search if it's not going to be run during this eval
     if ast.head in [:using, :import]
         for arg in ast.args
             if first(arg.args) isa Symbol # i.e. `Foo`
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 8c4ee75850fbf..8d67be4b773d2 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -1355,6 +1355,11 @@ end
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Core"))
         @test isempty(mods)
 
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line(":(using Foo)"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
+        @test isempty(mods)
+
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("# comment"))
         @test isempty(mods)
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))

From 4ef57b32feb0570ff588f3c46a5aced5fc68cfa8 Mon Sep 17 00:00:00 2001
From: Elliot Saba <staticfloat@gmail.com>
Date: Sun, 15 Aug 2021 08:20:19 -0700
Subject: [PATCH 21/65] [LinearAlgebra] flesh out LBT API a bit more (#41452)

This adds `lbt_find_backing_library()`, which is a useful debugging
routine to allow advanced users/package authors to query LBT to
determine which backing BLAS library will service a particular BLAS
call.  It also exposes the "footgun API", which allows users to directly
set/get forwarding on a per-function basis.  Because this has the
ability to generate truly bizarre setups, we do not advertise this
capability broadly (simply using `lbt_forward()` should be enough for
most usecases) however it's nice to have wrapped.

(cherry picked from commit b40ae6b79b51c9c83a947e4317e36f957c59ae0b)
---
 stdlib/LinearAlgebra/src/blas.jl |  2 +-
 stdlib/LinearAlgebra/src/lbt.jl  | 74 +++++++++++++++++++++++++++++---
 2 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 327beb020901b..c96fc9b965b1e 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -79,7 +79,7 @@ import LinearAlgebra: BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismat
 include("lbt.jl")
 
 """
-get_config()
+    get_config()
 
 Return an object representing the current `libblastrampoline` configuration.
 
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 67ce521a9aa7e..b1a2dc24b3449 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -17,6 +17,7 @@ const LBT_INTERFACE_MAP = Dict(
     LBT_INTERFACE_ILP64   => :ilp64,
     LBT_INTERFACE_UNKNOWN => :unknown,
 )
+const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
 
 const LBT_F2C_PLAIN         =  0
 const LBT_F2C_REQUIRED      =  1
@@ -26,6 +27,7 @@ const LBT_F2C_MAP = Dict(
     LBT_F2C_REQUIRED => :required,
     LBT_F2C_UNKNOWN  => :unknown,
 )
+const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
 
 struct LBTLibraryInfo
     libname::String
@@ -164,14 +166,74 @@ function lbt_get_default_func()
     return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
 end
 
-#=
-Don't define footgun API (yet)
+"""
+    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
 
-function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
-    return ccall((:lbt_get_forward, libblastrampoline), Ptr{Cvoid}, (Cstring, Int32, Int32), symbol_name, interface, f2c)
+Return the `LBTLibraryInfo` that represents the backing library for the given symbol
+exported from libblastrampoline.  This allows us to discover which library will service
+a particular BLAS call from Julia code.  This method returns `nothing` if either of the
+following conditions are met:
+
+ * No loaded library exports the desired symbol (the default function will be called)
+ * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
+
+If the given `symbol_name` is not contained within the list of exported symbols, an
+`ArgumentError` will be thrown.
+"""
+function lbt_find_backing_library(symbol_name, interface::Symbol;
+                                  config::LBTConfig = lbt_get_config())
+    if interface ∉ (:ilp64, :lp64)
+        throw(Argument("Invalid interface specification: '$(interface)'"))
+    end
+    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
+    if symbol_idx === nothing
+        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
+    end
+    # Convert to zero-indexed
+    symbol_idx -= 1
+
+    forward_byte_offset = div(symbol_idx, 8)
+    forward_byte_mask = 1 << mod(symbol_idx, 8)
+    for lib in filter(l -> l.interface == interface, config.loaded_libs)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            return lib
+        end
+    end
+
+    # No backing library was found
+    return nothing
 end
 
+
+## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
+## bizarre and complex setups to be created.  If you run into strange errors while using
+## it, the first thing you should ask yourself is whether you've set things up properly.
 function lbt_set_forward(symbol_name, addr, interface, f2c = LBT_F2C_PLAIN; verbose::Bool = false)
-    return ccall((:lbt_set_forward, libblastrampoline), Int32, (Cstring, Ptr{Cvoid}, Int32, Int32, Int32), symbol_name, addr, interface, f2c, verbose ? 1 : 0)
+    return ccall(
+        (:lbt_set_forward, libblastrampoline),
+        Int32,
+        (Cstring, Ptr{Cvoid}, Int32, Int32, Int32),
+        string(symbol_name),
+        addr,
+        Int32(interface),
+        Int32(f2c),
+        verbose ? Int32(1) : Int32(0),
+    )
+end
+function lbt_set_forward(symbol_name, addr, interface::Symbol, f2c::Symbol = :plain; kwargs...)
+    return lbt_set_forward(symbol_name, addr, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c]; kwargs...)
+end
+
+function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
+    return ccall(
+        (:lbt_get_forward, libblastrampoline),
+        Ptr{Cvoid},
+        (Cstring, Int32, Int32),
+        string(symbol_name),
+        Int32(interface),
+        Int32(f2c),
+    )
+end
+function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
+    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
 end
-=#
\ No newline at end of file

From 550e2a1fd12120eded78829d93a8fb7e63c5a3fa Mon Sep 17 00:00:00 2001
From: Petr Vana <petvana@centrum.cz>
Date: Mon, 16 Aug 2021 19:33:49 +0200
Subject: [PATCH 22/65] docs: add v1.7 compat for `@atomic` and fix links
 (#41873)

(cherry picked from commit 9ae49bcca34b0ef28c094cc2a3400e1617064589)
---
 base/expr.jl                      | 15 ++++++++++++---
 doc/src/manual/multi-threading.md |  3 +++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/base/expr.jl b/base/expr.jl
index 84b521543111b..817e3618a0dea 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -478,7 +478,7 @@ result into the field in the first argument and return the values `(old, new)`.
 This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
 
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -507,6 +507,9 @@ julia> @atomic max(a.x, 10) # change field x of a to the max value, with sequent
 julia> @atomic a.x max 5 # again change field x of a to the max value, with sequential consistency
 (10, 10)
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomic(ex)
     if !isa(ex, Symbol) && !is_expr(ex, :(::))
@@ -574,7 +577,7 @@ Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
 
 This operation translates to a `swapproperty!(a.b, :x, new)` call.
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -588,6 +591,9 @@ julia> @atomicswap a.x = 2+2 # replace field x of a with 4, with sequential cons
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomicswap(order, ex)
     order isa QuoteNode || (order = esc(order))
@@ -617,7 +623,7 @@ replacement was completed.
 
 This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
 
-See [atomics](#man-atomics) in the manual for more details.
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
@@ -642,6 +648,9 @@ julia> @atomicreplace a.x xchg
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
 ```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
 """
 macro atomicreplace(success_order, fail_order, ex, old_new)
     fail_order isa QuoteNode || (fail_order = esc(fail_order))
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 658bec21bbfb9..246fab74f8fe7 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -249,6 +249,9 @@ orderings (:monotonic, :acquire, :release, :acquire\_release, or
 with an atomic ordering constraint, or will be done with monotonic (relaxed)
 ordering if unspecified.
 
+!!! compat "Julia 1.7"
+    Per-field atomics requires at least Julia 1.7.
+
 
 ## Side effects and mutable function arguments
 

From c06cd488105cd1ea0baf625bf6c303778480d5c0 Mon Sep 17 00:00:00 2001
From: Kristoffer Carlsson <kcarlsson89@gmail.com>
Date: Tue, 17 Aug 2021 10:43:36 +0200
Subject: [PATCH 23/65] ensure that the non-REPL precompile code actually get
 precompiled (#41898)

(cherry picked from commit e8faf9d17b78cdc61146893f71b7902d9dd80d81)
---
 contrib/generate_precompile.jl | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index 278dda2812559..bb760f70ab867 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -250,16 +250,20 @@ function generate_precompile_statements()
               module $pkgname
               end
               """)
-        tmp = tempname()
+        tmp_prec = tempname()
+        tmp_proc = tempname()
         s = """
             pushfirst!(DEPOT_PATH, $(repr(prec_path)));
-            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp));
+            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
             Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
             $precompile_script
             """
-        run(`$(julia_exepath()) -O0 --sysimage $sysimg --startup-file=no -Cnative -e $s`)
-        for statement in split(read(tmp, String), '\n')
-            push!(statements, statement)
+        run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`)
+        for f in (tmp_prec, tmp_proc)
+            for statement in split(read(f, String), '\n')
+                occursin("Main.", statement) && continue
+                push!(statements, statement)
+            end
         end
     end
 

From cd1945cdfa48ac2916f1807dcdb904ff18517bd2 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 18 Aug 2021 03:19:23 -0500
Subject: [PATCH 24/65] Fix ~370 invalidations from Expr(:ncat, ...)
 pretty-printing (#41877)

These get invalidated by loading Static.jl, specifically the method
```
Base.convert(::Type{T}, ::StaticInt{N}) where {T<:Number,N} = convert(T, N)
```

(cherry picked from commit 232ee11fe99b89a1f72e462692cb4245a186b9ce)
---
 base/show.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/show.jl b/base/show.jl
index f3465a25d4abb..4e457a2047514 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1832,7 +1832,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         elseif head === :hcat || head === :row
             sep = " "
         elseif head === :ncat || head === :nrow
-            sep = ";"^args[1] * " "
+            sep = ";"^args[1]::Int * " "
             args = args[2:end]
             nargs = nargs - 1
         else

From 83bf08280a98b9ceb3bb9457cea8ebd0307a9871 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 18 Aug 2021 03:25:18 -0500
Subject: [PATCH 25/65] Fix ~50 invalidations stemming from
 `modules_to_be_loaded` (#41878)

ChainRulesCore defines `==(a, b::AbstractThunk)` and its converse,
and these end up invaliding parts of the REPL (including `eval_user_input`)
via inference failures in `modules_to_be_loaded`.

Co-authored by: Jameson Nash <vtjnash@gmail.com>

(cherry picked from commit 7a6336d41a4064c0bf3649fb3b1a2ec3531a7dd8)
---
 stdlib/REPL/src/REPL.jl  | 14 ++++++++------
 stdlib/REPL/test/repl.jl |  2 --
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index cb28b2dc3b339..3e2770ab8f4c7 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -167,6 +167,7 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
 end
 
 function check_for_missing_packages_and_run_hooks(ast)
+    isa(ast, Expr) || return
     mods = modules_to_be_loaded(ast)
     filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
     if !isempty(mods)
@@ -176,16 +177,18 @@ function check_for_missing_packages_and_run_hooks(ast)
     end
 end
 
-function modules_to_be_loaded(ast, mods = Symbol[])
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
     ast.head == :quote && return mods # don't search if it's not going to be run during this eval
     if ast.head in [:using, :import]
         for arg in ast.args
-            if first(arg.args) isa Symbol # i.e. `Foo`
-                if first(arg.args) != :. # don't include local imports
-                    push!(mods, first(arg.args))
+            arg = arg::Expr
+            arg1 = first(arg.args)
+            if arg1 isa Symbol # i.e. `Foo`
+                if arg1 != :. # don't include local imports
+                    push!(mods, arg1)
                 end
             else # i.e. `Foo: bar`
-                push!(mods, first(first(arg.args).args))
+                push!(mods, first((arg1::Expr).args))
             end
         end
     end
@@ -195,7 +198,6 @@ function modules_to_be_loaded(ast, mods = Symbol[])
     filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
     return mods
 end
-modules_to_be_loaded(::Nothing) = Symbol[] # comments are parsed as nothing
 
 """
     start_repl_backend(repl_channel::Channel, response_channel::Channel)
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 8d67be4b773d2..6724eb5e13ac3 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -1360,8 +1360,6 @@ end
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
         @test isempty(mods)
 
-        mods = REPL.modules_to_be_loaded(Base.parse_input_line("# comment"))
-        @test isempty(mods)
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))
         @test isempty(mods)
     end

From d71bd325e69826e70ef246db12b8bffd32de89c2 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Thu, 19 Aug 2021 12:41:32 -0400
Subject: [PATCH 26/65] small optimization to subtyping (#41672)

Zero and copy only the used portion of the union state buffer.

(cherry picked from commit 0258553a82aba0a609978d1719e05a20ebdf4826)
---
 src/subtype.c | 101 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 36 deletions(-)

diff --git a/src/subtype.c b/src/subtype.c
index 158a9dd70b3f3..152d17daeaaaa 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -42,11 +42,19 @@ extern "C" {
 // TODO: the stack probably needs to be artificially large because of some
 // deeper problem (see #21191) and could be shrunk once that is fixed
 typedef struct {
-    int depth;
-    int more;
+    int16_t depth;
+    int16_t more;
+    int16_t used;
     uint32_t stack[100];  // stack of bits represented as a bit vector
 } jl_unionstate_t;
 
+typedef struct {
+    int16_t depth;
+    int16_t more;
+    int16_t used;
+    void *stack;
+} jl_saved_unionstate_t;
+
 // Linked list storing the type variable environment. A new jl_varbinding_t
 // is pushed for each UnionAll type we encounter. `lb` and `ub` are updated
 // during the computation.
@@ -68,14 +76,14 @@ typedef struct jl_varbinding_t {
     //                     and we would need to return `intersect(var,other)`. in this case
     //                     we choose to over-estimate the intersection by returning the var.
     int8_t constraintkind;
-    int depth0;         // # of invariant constructors nested around the UnionAll type for this var
+    int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // when this variable's integer value is compared to that of another,
     // it equals `other + offset`. used by vararg length parameters.
-    int offset;
+    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
-    int intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
@@ -129,6 +137,23 @@ static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
         st->stack[i>>5] &= ~(1u<<(i&31));
 }
 
+#define push_unionstate(saved, src)                                     \
+    do {                                                                \
+        (saved)->depth = (src)->depth;                                  \
+        (saved)->more = (src)->more;                                    \
+        (saved)->used = (src)->used;                                    \
+        (saved)->stack = alloca(((src)->used+7)/8);                     \
+        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+    } while (0);
+
+#define pop_unionstate(dst, saved)                                      \
+    do {                                                                \
+        (dst)->depth = (saved)->depth;                                  \
+        (dst)->more = (saved)->more;                                    \
+        (dst)->used = (saved)->used;                                    \
+        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+    } while (0);
+
 typedef struct {
     int8_t *buf;
     int rdepth;
@@ -486,6 +511,10 @@ static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
     do {
+        if (state->depth >= state->used) {
+            statestack_set(state, state->used, 0);
+            state->used++;
+        }
         int ui = statestack_get(state, state->depth);
         state->depth++;
         if (ui == 0) {
@@ -514,11 +543,10 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         return 1;
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
-    jl_unionstate_t oldLunions = e->Lunions;
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int sub;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Lunions.used = e->Runions.used = 0;
     e->Runions.depth = 0;
     e->Runions.more = 0;
     e->Lunions.depth = 0;
@@ -526,8 +554,8 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
     sub = forall_exists_subtype(x, y, e, 0);
 
-    e->Runions = oldRunions;
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Runions, &oldRunions);
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
@@ -731,8 +759,8 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
@@ -1148,6 +1176,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
             jl_unionstate_t *state = &e->Runions;
+            if (state->depth >= state->used) {
+                statestack_set(state, state->used, 0);
+                state->used++;
+            }
             ui = statestack_get(state, state->depth);
             state->depth++;
             if (ui == 0)
@@ -1310,13 +1342,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
         return 0;
 
-    jl_unionstate_t oldLunions = e->Lunions;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    e->Lunions.used = 0;
     int sub;
 
     if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
-        jl_unionstate_t oldRunions = e->Runions;
-        memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
+        e->Runions.used = 0;
         e->Runions.depth = 0;
         e->Runions.more = 0;
         e->Lunions.depth = 0;
@@ -1324,7 +1356,7 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
         sub = forall_exists_subtype(x, y, e, 2);
 
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
     }
     else {
         int lastset = 0;
@@ -1342,13 +1374,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub && subtype(y, x, e, 0);
 }
 
 static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
 {
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     int lastset = 0;
     while (1) {
         e->Runions.depth = 0;
@@ -1379,7 +1411,7 @@ static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, in
     JL_GC_PUSH1(&saved);
     save_env(e, &saved, &se);
 
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    e->Lunions.used = 0;
     int lastset = 0;
     int sub;
     while (1) {
@@ -1415,6 +1447,7 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->emptiness_only = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
+    e->Lunions.used = 0;       e->Runions.used = 0;
 }
 
 // subtyping entry points
@@ -2084,14 +2117,14 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
 
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
     // TODO: this doesn't quite make sense
     e->invdepth = e->Rinvdepth = d;
 
     jl_value_t *res = intersect_all(x, y, e);
 
-    e->Runions = oldRunions;
+    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
     e->Rinvdepth = Rsavedepth;
     return res;
@@ -2102,10 +2135,10 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
     if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
-        jl_unionstate_t oldRunions = e->Runions;
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2600,8 +2633,8 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
     jl_savedenv_t se, se2;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
@@ -3159,7 +3192,7 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     e->Runions.depth = 0;
     e->Runions.more = 0;
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     jl_value_t **is;
     JL_GC_PUSHARGS(is, 3);
     jl_value_t **saved = &is[2];
@@ -3176,11 +3209,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         save_env(e, saved, &se);
     }
     while (e->Runions.more) {
-        if (e->emptiness_only && ii != jl_bottom_type) {
-            free_env(&se);
-            JL_GC_POP();
-            return ii;
-        }
+        if (e->emptiness_only && ii != jl_bottom_type)
+            break;
         e->Runions.depth = 0;
         int set = e->Runions.more - 1;
         e->Runions.more = 0;
@@ -3209,9 +3239,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
         total_iter++;
         if (niter > 3 || total_iter > 400000) {
-            free_env(&se);
-            JL_GC_POP();
-            return y;
+            ii = y;
+            break;
         }
     }
     free_env(&se);

From 3fa1bfc6d44edfd215f8b86d118d8288df4f8af6 Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Sun, 22 Aug 2021 22:42:24 -0400
Subject: [PATCH 27/65] README: add a link to the `base-buildkite-docs`
 repository (#41956)

(cherry picked from commit 310bf160e026f3867119bef7d14724279e4eec60)
---
 .buildkite/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.buildkite/README.md b/.buildkite/README.md
index 12887536b90c1..b3f74f2b23137 100644
--- a/.buildkite/README.md
+++ b/.buildkite/README.md
@@ -3,3 +3,5 @@
 This directory contains the Buildkite configuration files for Base Julia CI.
 
 The rootfs image definitions are located in the [rootfs-images](https://github.com/JuliaCI/rootfs-images) repository.
+
+The documentation for the Base Julia CI setup is located in the [base-buildkite-docs](https://github.com/JuliaCI/base-buildkite-docs) repository.

From 959d1fa717c3e92dbb343599908eb8ecece55327 Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Mon, 23 Aug 2021 09:12:51 +0200
Subject: [PATCH 28/65] Derive better name for methods with external method
 tables. (#41930)

(cherry picked from commit 6e1bae4b4e07c31bc563415a8c3480f17b27dcae)
---
 src/interpreter.c | 1 +
 src/method.c      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/interpreter.c b/src/interpreter.c
index 7858bd6ddc4ea..e169f9f829a63 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -79,6 +79,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 {
     jl_value_t **args = jl_array_ptr_data(ex->args);
 
+    // generic function definition
     if (jl_expr_nargs(ex) == 1) {
         jl_value_t **args = jl_array_ptr_data(ex->args);
         jl_sym_t *fname = (jl_sym_t*)args[0];
diff --git a/src/method.c b/src/method.c
index 48b074e800904..22145a4349853 100644
--- a/src/method.c
+++ b/src/method.c
@@ -831,7 +831,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
     // TODO: derive our debug name from the syntax instead of the type
     name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt) {
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
         jl_datatype_t *dt = jl_first_argument_datatype(argtype);

From 9dd394d8a55514ae5dde1b7e639ea6129891f917 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Mon, 23 Aug 2021 14:46:34 -0500
Subject: [PATCH 29/65] Signatures: restrict color to backtrace printing
 (#41929)

Fixes #41928

(cherry picked from commit c88db4e32a8807861825a31cfe176d5ffad058b1)
---
 base/show.jl | 4 +++-
 test/show.jl | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/base/show.jl b/base/show.jl
index 4e457a2047514..1f75c5481a86a 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -2378,7 +2378,9 @@ end
 function print_type_stacktrace(io, type; color=:normal)
     str = sprint(show, type, context=io)
     i = findfirst('{', str)
-    if i === nothing || !get(io, :backtrace, false)::Bool
+    if !get(io, :backtrace, false)::Bool
+        print(io, str)
+    elseif i === nothing
         printstyled(io, str; color=color)
     else
         printstyled(io, str[1:prevind(str,i)]; color=color)
diff --git a/test/show.jl b/test/show.jl
index c00dcf523898c..ed86a5b1dedb6 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -1377,6 +1377,11 @@ let m = which(T20332{Int}(), (Int,)),
     mi = Core.Compiler.specialize_method(m, Tuple{T20332{T}, Int} where T, Core.svec())
     # test that this doesn't throw an error
     @test occursin("MethodInstance for", repr(mi))
+    # issue #41928
+    str = sprint(mi; context=:color=>true) do io, mi
+        printstyled(io, mi; color=:light_cyan)
+    end
+    @test !occursin("\U1b[0m", str)
 end
 
 @test sprint(show, Main) == "Main"

From 9bed4bdf5ecab627d5f7444a8746cb6cd64eeea3 Mon Sep 17 00:00:00 2001
From: Jakob Nybo Nissen <jakobnybonissen@gmail.com>
Date: Wed, 25 Aug 2021 00:42:41 +0200
Subject: [PATCH 30/65] Fix collect on stateful generator (#41919)

Previously this code would drop 1 from the length of some generators.

Fixes #35530

(cherry picked from commit 8364a4ccd8885fa8d8c78094c7653c58e33d9f0d)
---
 base/array.jl     | 10 +++++++---
 test/iterators.jl | 11 ++++++++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/base/array.jl b/base/array.jl
index dedc717b056d3..d629064777ce0 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -698,8 +698,10 @@ else
     end
 end
 
-_array_for(::Type{T}, itr, ::HasLength) where {T} = Vector{T}(undef, Int(length(itr)::Integer))
-_array_for(::Type{T}, itr, ::HasShape{N}) where {T,N} = similar(Array{T,N}, axes(itr))
+_array_for(::Type{T}, itr, isz::HasLength) where {T} = _array_for(T, itr, isz, length(itr))
+_array_for(::Type{T}, itr, isz::HasShape{N}) where {T,N} = _array_for(T, itr, isz, axes(itr))
+_array_for(::Type{T}, itr, ::HasLength, len) where {T} = Vector{T}(undef, len)
+_array_for(::Type{T}, itr, ::HasShape{N}, axs) where {T,N} = similar(Array{T,N}, axs)
 
 function collect(itr::Generator)
     isz = IteratorSize(itr.iter)
@@ -707,12 +709,14 @@ function collect(itr::Generator)
     if isa(isz, SizeUnknown)
         return grow_to!(Vector{et}(), itr)
     else
+        shape = isz isa HasLength ? length(itr) : axes(itr)
         y = iterate(itr)
         if y === nothing
             return _array_for(et, itr.iter, isz)
         end
         v1, st = y
-        collect_to_with_first!(_array_for(typeof(v1), itr.iter, isz), v1, itr, st)
+        arr = _array_for(typeof(v1), itr.iter, isz, shape)
+        return collect_to_with_first!(arr, v1, itr, st)
     end
 end
 
diff --git a/test/iterators.jl b/test/iterators.jl
index fb8edcab92209..c7d00c4e7e2e8 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -292,6 +292,15 @@ let (a, b) = (1:3, [4 6;
     end
 end
 
+# collect stateful iterator
+let
+    itr = (i+1 for i in Base.Stateful([1,2,3]))
+    @test collect(itr) == [2, 3, 4]
+    A = zeros(Int, 0, 0)
+    itr = (i-1 for i in Base.Stateful(A))
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+end
+
 # with 1D inputs
 let a = 1:2,
     b = 1.0:10.0,
@@ -860,4 +869,4 @@ end
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
     @test Iterators.peel(x^2 for x in 2:4)[2] |> collect == [9, 16]
-end
\ No newline at end of file
+end

From 7a79ed1e9f139d349c64d7a2bd88a462cdfe90fd Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Tue, 24 Aug 2021 20:50:27 -0400
Subject: [PATCH 31/65] fix a case where Vararg.T might be accessed when
 undefined (#41867)

(cherry picked from commit 7a784de2bf4df9e931fa868b22231df9cdb01801)
---
 base/compiler/typelimits.jl | 2 +-
 test/compiler/inference.jl  | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 3145517630958..23045c65cc6bb 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -130,7 +130,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
     elseif isa(t, DataType)
         if isa(c, Core.TypeofVararg)
             # Tuple{Vararg{T}} --> Tuple{T} is OK
-            return _limit_type_size(t, c.T, sources, depth, 0)
+            return _limit_type_size(t, unwrapva(c), sources, depth, 0)
         elseif isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
             tt = unwrap_unionall(t.parameters[1])
             (!isa(tt, DataType) || isType(tt)) && (depth += 1)
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 4d7aed372f6c8..481fd84b87932 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -44,6 +44,11 @@ let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
     @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
+# obtain Vararg with 2 undefined fields
+let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
+    @test Core.Compiler.limit_type_size(Tuple, va, Union{}, 2, 2) === Any
+end
+
 let # 40336
     t = Type{Type{Int}}
     c = Type{Int}

From f3d2bca038b8b7ab03edf20018e2f86937926780 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Tue, 24 Aug 2021 22:27:10 -0400
Subject: [PATCH 32/65] fix #41908, inference error in subst_trivial_bounds
 (#41976)

Co-authored-by: Martin Holters <martin.holters@hsu-hh.de>
(cherry picked from commit e2aeefb60eaffbd2807089155789e19585f4e6fe)
---
 base/compiler/utilities.jl | 10 +++++++++-
 test/compiler/inference.jl |  5 +++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 3b84395c676d2..45c66a773815f 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -155,7 +155,15 @@ function subst_trivial_bounds(@nospecialize(atypes))
     end
     v = atypes.var
     if isconcretetype(v.ub) || v.lb === v.ub
-        return subst_trivial_bounds(atypes{v.ub})
+        subst = try
+            atypes{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
     end
     return UnionAll(v, subst_trivial_bounds(atypes.body))
 end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 481fd84b87932..2e3dd0b45f875 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -3411,3 +3411,8 @@ end
         @test @inferred(f40177(T)) == fieldtype(T, 1)
     end
 end
+
+# issue #41908
+f41908(x::Complex{T}) where {String<:T<:String} = 1
+g41908() = f41908(Any[1][1])
+@test only(Base.return_types(g41908, ())) <: Int

From a2160419d3640a062d7eb4d1f294c8d3755e9aa5 Mon Sep 17 00:00:00 2001
From: Philipp Gabler <phipsgabler@users.noreply.github.com>
Date: Wed, 11 Aug 2021 22:01:06 +0200
Subject: [PATCH 33/65] Fix firstindex in replace_ref_begin_end (fixes #41630)
 (#41695)

(cherry picked from commit 2ebbb2b3a0b54eae66549bd058ae334cb3642e50)
---
 base/views.jl       |  2 +-
 test/offsetarray.jl | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/base/views.jl b/base/views.jl
index f60dc04094a43..e26359a5c9fd7 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -42,7 +42,7 @@ function replace_ref_begin_end_!(ex, withex)
                 n = 1
                 J = lastindex(ex.args)
                 for j = 2:J
-                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S)),:($lastindex($S,$n))))
+                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))))
                     used_S |= used
                     ex.args[j] = exj
                     if isa(exj,Expr) && exj.head === :...
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 811d3dd26f509..deeb80f019510 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -795,3 +795,24 @@ end
     @test Iterators.partition(OffsetArray(reshape(collect(1:9),3,3), (3,3)), 5) |> collect == [1:5,6:9] #OffsetMatrix
     @test Iterators.partition(IdOffsetRange(2:7,10), 5) |> collect == [12:16,17:17] # IdOffsetRange
 end
+
+@testset "issue #41630: replace_ref_begin_end!/@view on offset-like arrays" begin
+    x = OffsetArray([1 2; 3 4], -10:-9, 9:10)  # 2×2 OffsetArray{...} with indices -10:-9×9:10
+
+    # begin/end with offset indices
+    @test (@view x[begin, 9])[] == 1
+    @test (@view x[-10, end])[] == 2
+    @test (@view x[-9, begin])[] == 3
+    @test (@view x[end, 10])[] == 4
+    @test (@view x[begin, begin])[] == 1
+    @test (@view x[begin, end])[] == 2
+    @test (@view x[end, begin])[] == 3
+    @test (@view x[end, end])[] == 4
+
+    # nested usages of begin/end
+    y = OffsetArray([-10, -9], (5,))
+    @test (@view x[begin, -y[end]])[] == 1
+    @test (@view x[y[begin], end])[] == 2
+    @test (@view x[end, -y[end]])[] == 3
+    @test (@view x[y[end], end])[] == 4
+end

From 3da1d17439414cac5e015a16b0d698eafb4b20c9 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Tue, 10 Aug 2021 17:31:58 -0400
Subject: [PATCH 34/65] atomics: switch to using exact types for return pairs
 (#41659)

This makes many more functions type-stable, by directly preserving the
element type when making the copy, and prints as `old => new`, like the
argument to atomic replace.

For replace, we use Pair{FT, FT} (like the argument). For modify, we use
NamedTuple{(:old, :success), Tuple{FT, Bool}}.

(cherry picked from commit cc3fc0b27b4793b231ea537795cc5f3fbff7dbc2)
---
 base/Base.jl              |  3 ++
 base/boot.jl              | 14 ++++++++-
 base/compiler/tfuncs.jl   | 64 ++++++++++++++++++++++++++++++---------
 base/expr.jl              | 12 ++++----
 base/pair.jl              | 13 --------
 src/cgutils.cpp           | 17 +++++------
 src/codegen.cpp           |  1 +
 src/datatype.c            | 47 ++++++++++++----------------
 src/gc.c                  |  3 ++
 src/init.c                |  1 +
 src/jl_exported_data.inc  |  1 +
 src/jl_exported_funcs.inc |  4 ++-
 src/jltypes.c             | 44 +++++++++++++++++++++------
 src/julia.h               |  5 ++-
 src/runtime_intrinsics.c  | 27 ++++++++++-------
 src/staticdata.c          |  3 +-
 test/atomics.jl           | 60 +++++++++++++++++++-----------------
 test/intrinsics.jl        | 24 ++++++++-------
 test/show.jl              |  2 +-
 19 files changed, 210 insertions(+), 135 deletions(-)

diff --git a/base/Base.jl b/base/Base.jl
index 42a506479326b..09fdcb1689de3 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -107,6 +107,9 @@ include("options.jl")
 include("promotion.jl")
 include("tuple.jl")
 include("expr.jl")
+Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B} = (@_inline_meta; Pair{A, B}(convert(A, a)::A, convert(B, b)::B))
+#Pair{Any, B}(@nospecialize(a::Any), b) where {B} = (@_inline_meta; Pair{Any, B}(a, Base.convert(B, b)::B))
+#Pair{A, Any}(a, @nospecialize(b::Any)) where {A} = (@_inline_meta; Pair{A, Any}(Base.convert(A, a)::A, b))
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
diff --git a/base/boot.jl b/base/boot.jl
index dcf62a0b9cab2..32ededb12be8d 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -171,7 +171,7 @@ export
     # key types
     Any, DataType, Vararg, NTuple,
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
-    AbstractArray, DenseArray, NamedTuple,
+    AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
     Function, Method,
     Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
@@ -813,4 +813,16 @@ _parse = nothing
 # support for deprecated uses of internal _apply function
 _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
 
+struct Pair{A, B}
+    first::A
+    second::B
+    # if we didn't inline this, it's probably because the callsite was actually dynamic
+    # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
+    # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
+    # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
+    Pair(a, b) = new{typeof(a), typeof(b)}(a, b)
+    Pair{A, B}(a::A, b::B) where {A, B} = new(a, b)
+    Pair{Any, Any}(@nospecialize(a::Any), @nospecialize(b::Any)) = new(a, b)
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index c31b32428a297..e270555426b42 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -466,29 +466,51 @@ add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 add_tfunc(applicable, 1, INT_INF, (@nospecialize(f), args...)->Bool, 100)
 add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecialize(x)->Int, 4)
 add_tfunc(arraysize, 2, 2, (@nospecialize(a), @nospecialize(d))->Int, 4)
+
 function pointer_eltype(@nospecialize(ptr))
     a = widenconst(ptr)
-    if a <: Ptr
-        if isa(a, DataType) && isa(a.parameters[1], Type)
-            return a.parameters[1]
-        elseif isa(a, UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw, DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            T isa Type && return rewrap_unionall(T, a)
         end
     end
     return Any
 end
+function atomic_pointermodify_tfunc(ptr, op, v, order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
+            T isa Type && return rewrap_unionall(Pair{T, T}, a)
+        end
+    end
+    return Pair
+end
+function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            T isa Type && return rewrap_unionall(ccall(:jl_apply_cmpswap_type, Any, (Any,), T), a)
+        end
+    end
+    return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+end
 add_tfunc(pointerref, 3, 3, (a, i, align) -> (@nospecialize; pointer_eltype(a)), 4)
 add_tfunc(pointerset, 4, 4, (a, v, i, align) -> (@nospecialize; a), 5)
-
 add_tfunc(atomic_fence, 1, 1, (order) -> (@nospecialize; Nothing), 4)
 add_tfunc(atomic_pointerref, 2, 2, (a, order) -> (@nospecialize; pointer_eltype(a)), 4)
 add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5)
 add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5)
-add_tfunc(atomic_pointermodify, 4, 4, (a, op, v, order) -> (@nospecialize; T = pointer_eltype(a); Tuple{T, T}), 5)
-add_tfunc(atomic_pointerreplace, 5, 5, (a, x, v, success_order, failure_order) -> (@nospecialize; Tuple{pointer_eltype(a), Bool}), 5)
+add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
+add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
@@ -911,11 +933,25 @@ setfield!_tfunc(o, f, v) = (@nospecialize; v)
 
 swapfield!_tfunc(o, f, v, order) = (@nospecialize; getfield_tfunc(o, f))
 swapfield!_tfunc(o, f, v) = (@nospecialize; getfield_tfunc(o, f))
-modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{T, T})
-modifyfield!_tfunc(o, f, op, v) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{T, T}) # TODO: also model op(o.f, v) call
+modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; modifyfield!_tfunc(o, f, op, v))
+function modifyfield!_tfunc(o, f, op, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    # note: we could sometimes refine this to a PartialStruct if we analyzed `op(o.f, v)::T`
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
+end
 replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
 replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
-replacefield!_tfunc(o, f, x, v) = (@nospecialize; T = getfield_tfunc(o, f); T === Bottom ? T : Tuple{widenconst(T), Bool})
+function replacefield!_tfunc(o, f, x, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(PT, T))[1]
+end
+
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
 
 add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
diff --git a/base/expr.jl b/base/expr.jl
index 817e3618a0dea..2bc59717fea47 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -496,16 +496,16 @@ julia> @atomic a.x += 1 # increment field x of a, with sequential consistency
 3
 
 julia> @atomic a.x + 1 # increment field x of a, with sequential consistency
-(3, 4)
+3 => 4
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 4
 
 julia> @atomic max(a.x, 10) # change field x of a to the max value, with sequential consistency
-(4, 10)
+4 => 10
 
 julia> @atomic a.x max 5 # again change field x of a to the max value, with sequential consistency
-(10, 10)
+10 => 10
 ```
 
 !!! compat "Julia 1.7"
@@ -632,18 +632,18 @@ julia> a = Atomic(1)
 Atomic{Int64}(1)
 
 julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
-(1, true)
+(old = 1, success = true)
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 2
 
 julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
-(2, false)
+(old = 2, success = false)
 
 julia> xchg = 2 => 0; # replace field x of a with 0 if it was 1, with sequential consistency
 
 julia> @atomicreplace a.x xchg
-(2, true)
+(old = 2, success = true)
 
 julia> @atomic a.x # fetch field x of a, with sequential consistency
 0
diff --git a/base/pair.jl b/base/pair.jl
index 7481d50b7458b..b5dffbb4e7e86 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -1,18 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct Pair{A, B}
-    first::A
-    second::B
-    function Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B}
-        @_inline_meta
-        # if we didn't inline this, it's probably because the callsite was actually dynamic
-        # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
-        # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
-        # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
-        return new(a, b)
-    end
-end
-Pair(a, b) = Pair{typeof(a), typeof(b)}(a, b)
 const => = Pair
 
 """
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 4ae6c4b21594a..288e98e9a712b 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -1560,9 +1560,8 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             Value *Success = emit_f_is(ctx, cmp, ghostValue(jltype));
             Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
-            // TODO: do better here
-            Value *instr = emit_jlcall(ctx, jltuple_func, V_rnull, argv, 2, JLCALL_F_CC);
-            return mark_julia_type(ctx, instr, true, jl_any_type);
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
         else {
             return ghostValue(jltype);
@@ -1803,10 +1802,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
-            // TODO: do better here
+            Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
-            instr = emit_jlcall(ctx, jltuple_func, V_rnull, argv, 2, JLCALL_F_CC);
-            oldval = mark_julia_type(ctx, instr, true, jl_any_type);
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
     }
     return oldval;
@@ -3247,10 +3246,10 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         if (needlock)
             emit_lockstate_value(ctx, strct, false);
         if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, T_int8);
             jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
-            // TODO: do better here
-            Value *instr = emit_jlcall(ctx, jltuple_func, V_rnull, argv, 2, JLCALL_F_CC);
-            oldval = mark_julia_type(ctx, instr, true, jl_any_type);
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
         return oldval;
     }
diff --git a/src/codegen.cpp b/src/codegen.cpp
index e6505f7b67870..1cc26dee22f1e 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -1166,6 +1166,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *t
                              jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
diff --git a/src/datatype.c b/src/datatype.c
index 1a3ffa78170ac..aecbe6f407ae6 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -953,18 +953,13 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
     return success;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
 {
     // dst must have the required alignment for an atomic of the given size
     // n.b.: this does not spuriously fail if there are padding bits
-    jl_value_t *params[2];
-    params[0] = (jl_value_t*)dt;
-    params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-    int isptr = jl_field_isptr(tuptyp, 0);
     jl_task_t *ct = jl_current_task;
-    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : tuptyp->size, isptr ? dt : tuptyp);
+    int isptr = jl_field_isptr(rettyp, 0);
+    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : rettyp->size, isptr ? dt : rettyp);
     int success;
     jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
     if (nb == 0) {
@@ -1053,7 +1048,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
     }
     if (isptr) {
         JL_GC_PUSH1(&y);
-        jl_value_t *z = jl_gc_alloc(ct->ptls, tuptyp->size, tuptyp);
+        jl_value_t *z = jl_gc_alloc(ct->ptls, rettyp->size, rettyp);
         *(jl_value_t**)z = y;
         JL_GC_POP();
         y = z;
@@ -1658,8 +1653,11 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
         args[0] = r;
         jl_gc_safepoint();
     }
-    // args[0] == r (old); args[1] == y (new)
-    args[0] = jl_f_tuple(NULL, args, 2);
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
     JL_GC_POP();
     return args[0];
 }
@@ -1671,6 +1669,8 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
         jl_type_error("replacefield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
     jl_value_t *r = expected;
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     if (jl_field_isptr(st, i)) {
         jl_value_t **p = (jl_value_t**)((char*)v + offs);
         int success;
@@ -1683,11 +1683,8 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
             if (success || !jl_egal(r, expected))
                 break;
         }
-        jl_value_t **args;
-        JL_GC_PUSHARGS(args, 2);
-        args[0] = r;
-        args[1] = success ? jl_true : jl_false;
-        r = jl_f_tuple(NULL, args, 2);
+        JL_GC_PUSH1(&r);
+        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
         JL_GC_POP();
     }
     else {
@@ -1695,7 +1692,7 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
         int isunion = jl_is_uniontype(ty);
         int needlock;
         jl_value_t *rty = ty;
-        size_t fsz;
+        size_t fsz = jl_field_size(st, i);
         if (isunion) {
             assert(!isatomic);
             hasptr = 0;
@@ -1708,7 +1705,7 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
             needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
         }
         if (isatomic && !needlock) {
-            r = jl_atomic_cmpswap_bits((jl_datatype_t*)rty, (char*)v + offs, r, rhs, fsz);
+            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
             int success = *((uint8_t*)r + fsz);
             if (success && hasptr)
                 jl_gc_multi_wb(v, rhs); // rhs is immutable
@@ -1717,23 +1714,17 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
             jl_task_t *ct = jl_current_task;
             uint8_t *psel;
             if (isunion) {
-                size_t fsz = jl_field_size(st, i);
                 psel = &((uint8_t*)v)[offs + fsz - 1];
                 rty = jl_nth_union_component(rty, *psel);
             }
-            jl_value_t *params[2];
-            params[0] = rty;
-            params[1] = (jl_value_t*)jl_bool_type;
-            jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-            JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-            assert(!jl_field_isptr(tuptyp, 0));
-            r = jl_gc_alloc(ct->ptls, tuptyp->size, (jl_value_t*)tuptyp);
+            assert(!jl_field_isptr(rettyp, 0));
+            r = jl_gc_alloc(ct->ptls, rettyp->size, (jl_value_t*)rettyp);
             int success = (rty == jl_typeof(expected));
             if (needlock)
                 jl_lock_value(v);
-            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-            memcpy((char*)r, (char*)v + offs, fsz);
+            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
             if (success) {
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
                 if (((jl_datatype_t*)rty)->layout->haspadding)
                     success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
                 else
diff --git a/src/gc.c b/src/gc.c
index 5429510f08651..8b9688833c5dd 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -2778,6 +2778,7 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp
 }
 
 void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp);
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
 static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
@@ -2809,6 +2810,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
 
     // constants
     gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
+    if (cmpswap_names != NULL)
+        gc_mark_queue_obj(gc_cache, sp, cmpswap_names);
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
diff --git a/src/init.c b/src/init.c
index 5c0ce45a77912..c5ebd4684205c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -841,6 +841,7 @@ static void post_boot_hooks(void)
     jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_pair_type           = core("Pair");
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index bad61d4ade35a..0870464c7d6b5 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -71,6 +71,7 @@
     XX(jl_nothing) \
     XX(jl_nothing_type) \
     XX(jl_number_type) \
+    XX(jl_pair_type) \
     XX(jl_partial_struct_type) \
     XX(jl_partial_opaque_type) \
     XX(jl_interconditional_type) \
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 81e68f3b78ee7..1d2a8a6bed5ac 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -11,6 +11,7 @@
     XX(jl_alloc_svec_uninit) \
     XX(jl_alloc_vec_any) \
     XX(jl_apply_array_type) \
+    XX(jl_apply_cmpswap_type) \
     XX(jl_apply_generic) \
     XX(jl_apply_tuple_type) \
     XX(jl_apply_tuple_type_v) \
@@ -542,4 +543,5 @@
     XX(jl_wakeup_thread) \
     XX(jl_yield) \
     XX(jl_print_backtrace) \
-    XX(jl_get_pgcstack)
+    XX(jl_get_pgcstack) \
+
diff --git a/src/jltypes.c b/src/jltypes.c
index 1ae49c0a32eab..aacd2ba19ccca 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
+
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 #define h2index(hv, sz) (size_t)((hv) & ((sz)-1))
@@ -977,20 +979,42 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1)
 {
-    JL_GC_PUSH1(&p1);
-    jl_value_t *t = jl_apply_type(tc, &p1, 1);
-    JL_GC_POP();
-    return t;
+    return jl_apply_type(tc, &p1, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2)
 {
-    jl_value_t **args;
-    JL_GC_PUSHARGS(args, 2);
-    args[0] = p1; args[1] = p2;
-    jl_value_t *t = jl_apply_type(tc, args, 2);
-    JL_GC_POP();
-    return t;
+    jl_value_t *args[2];
+    args[0] = p1;
+    args[1] = p2;
+    return jl_apply_type(tc, args, 2);
+}
+
+jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
+{
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
+}
+
+jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
+{
+    jl_value_t *params[2];
+    jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
+    if (names == NULL) {
+        params[0] = jl_symbol("old");
+        params[1] = jl_symbol("success");
+        jl_value_t *lnames = jl_f_tuple(NULL, params, 2);
+        if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
+            names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
+    }
+    params[0] = dt;
+    params[1] = (jl_value_t*)jl_bool_type;
+    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_namedtuple_type, names, tuptyp);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
 }
 
 JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
diff --git a/src/julia.h b/src/julia.h
index 3455817cf1a92..b2a8bd15bcb22 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -726,6 +726,7 @@ extern JL_DLLIMPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_pair_type JL_GLOBALLY_ROOTED;
 
 extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
@@ -1381,6 +1382,8 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
+JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
@@ -1403,7 +1406,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
 JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
 JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
-JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 7cb58bc230294..be78be74172cb 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -175,12 +175,16 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, j
         args[0] = expected;
         jl_gc_safepoint();
     }
-    // args[0] == expected (old); args[1] == y (new)
-    args[0] = jl_f_tuple(NULL, args, 2);
+    // args[0] == expected (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
     JL_GC_POP();
     return args[0];
 }
 
+
 JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *expected, jl_value_t *x, jl_value_t *success_order_sym, jl_value_t *failure_order_sym)
 {
     JL_TYPECHK(atomic_pointerreplace, pointer, p);
@@ -193,20 +197,21 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
     // TODO: filter other invalid orderings
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     if (ety == (jl_value_t*)jl_any_type) {
-        jl_value_t **result;
-        JL_GC_PUSHARGS(result, 2);
-        result[0] = expected;
+        jl_value_t *result;
+        JL_GC_PUSH1(&result);
+        result = expected;
         int success;
         while (1) {
-            success = jl_atomic_cmpswap((jl_value_t**)pp, &result[0], x);
-            if (success || !jl_egal(result[0], expected))
+            success = jl_atomic_cmpswap((jl_value_t**)pp, &result, x);
+            if (success || !jl_egal(result, expected))
                 break;
         }
-        result[1] = success ? jl_true : jl_false;
-        result[0] = jl_f_tuple(NULL, result, 2);
+        result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
         JL_GC_POP();
-        return result[0];
+        return result;
     }
     else {
         if (!is_valid_intrinsic_elptr(ety))
@@ -216,7 +221,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         size_t nb = jl_datatype_size(ety);
         if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
             jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
-        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, pp, expected, x, nb);
+        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
     }
 }
 
diff --git a/src/staticdata.c b/src/staticdata.c
index 8fa1613b075a8..f5892d4218e71 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -30,7 +30,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    150
+#define NUM_TAGS    151
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -127,6 +127,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_floatingpoint_type);
         INSERT_TAG(jl_number_type);
         INSERT_TAG(jl_signed_type);
+        INSERT_TAG(jl_pair_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
diff --git a/test/atomics.jl b/test/atomics.jl
index 4c32fc12d87ed..c53471ed0da26 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -4,6 +4,8 @@ using Test, Base.Threads
 using Core: ConcurrencyViolationError
 import Base: copy
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 mutable struct ARefxy{T}
     @atomic x::T
     y::T
@@ -86,17 +88,18 @@ Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int,
 
 @noinline function _test_field_operators(r)
     r = r[]
+    TT = fieldtype(typeof(r), :x)
     T = typeof(getfield(r, :x))
     @test getfield(r, :x, :sequentially_consistent) === T(123_10)
     @test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1)
     @test getfield(r, :x, :sequentially_consistent) === T(123_1)
-    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), false)
-    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), true)
+    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
     @test getfield(r, :x, :sequentially_consistent) === T(123_30)
-    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === (T(123_30), false)
+    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
     @test getfield(r, :x, :sequentially_consistent) === T(123_30)
-    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_30), T(123_31))
-    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_31), T(123_32))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
     @test getfield(r, :x, :sequentially_consistent) === T(123_32)
     @test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32)
     @test getfield(r, :x, :sequentially_consistent) === T(123_1)
@@ -120,6 +123,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
 @noinline function _test_field_orderings(r, x, y)
     @nospecialize x y
     r = r[]
+    TT = fieldtype(typeof(r), :x)
 
     @test getfield(r, :x) === x
     @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :u)
@@ -199,7 +203,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :release)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire_release)
     @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :sequentially_consistent)
-    @test modifyfield!(r, :y, swap, x, :not_atomic) === (y, x)
+    @test modifyfield!(r, :y, swap, x, :not_atomic) === Pair{TT,TT}(y, x)
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :u, :not_atomic)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :unordered, :not_atomic)
@@ -215,10 +219,10 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire_release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :sequentially_consistent)
-    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === (x, true)
-    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === (y, x === y)
-    @test replacefield!(r, :y, y, y, :not_atomic) === (y, true)
-    @test replacefield!(r, :y, y, y) === (y, true)
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :y, y, y, :not_atomic) === ReplaceType{TT}((y, true))
+    @test replacefield!(r, :y, y, y) === ReplaceType{TT}((y, true))
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :u)
     @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x, :not_atomic)
@@ -234,11 +238,11 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x, :not_atomic)
     @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :unordered)
-    @test modifyfield!(r, :x, swap, x, :monotonic) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :acquire) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :release) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :acquire_release) === (x, x)
-    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === (x, x)
+    @test modifyfield!(r, :x, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
 
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :u, :not_atomic)
     @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, x, x)
@@ -256,9 +260,9 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire_release)
     @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :sequentially_consistent)
-    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === (x, true)
-    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === (y, x === y)
-    @test replacefield!(r, :x, y, x, :sequentially_consistent) === (y, true)
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
     nothing
 end
 @noinline function test_field_orderings(r, x, y)
@@ -339,10 +343,10 @@ let a = ARefxy(1, -1)
     @test 12 === @atomic :monotonic a.x *= 3
 
     @test 12 === @atomic a.x
-    @test (12, 13) === @atomic a.x + 1
-    @test (13, 15) === @atomic :monotonic a.x + 2
-    @test (15, 19) === @atomic a.x max 19
-    @test (19, 20) === @atomic :monotonic a.x max 20
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
     @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
     @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
 
@@ -352,17 +356,17 @@ let a = ARefxy(1, -1)
     @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
 
     @test 2 === @atomic a.x
-    @test (2, true) === @atomicreplace a.x 2 => 1
-    @test (1, false) === @atomicreplace :monotonic a.x 2 => 1
-    @test (1, false) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
     @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
 
     @test 1 === @atomic a.x
     xchg = 1 => 2
-    @test (1, true) === @atomicreplace a.x xchg
-    @test (2, false) === @atomicreplace :monotonic a.x xchg
-    @test (2, false) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
     @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
     @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
 end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 7fa8ecb0ebe27..7fb6bd651ebc0 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -100,6 +100,8 @@ let f = Core.Intrinsics.ashr_int
     @test f(Int32(2), -1) == 0
 end
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 # issue #29929
 let p = Ptr{Nothing}(0)
     @test unsafe_store!(p, nothing) === C_NULL
@@ -107,9 +109,9 @@ let p = Ptr{Nothing}(0)
     @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
     @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
     @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
-    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === (nothing, nothing)
-    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === (nothing, true)
-    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === (nothing, false)
+    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
+    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
 end
 
 struct GhostStruct end
@@ -199,24 +201,24 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10)
                 @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === (T(1), true)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === (T(100), false)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === (T(100), T(101))
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === (T(101), T(102))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102)
                 @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102)
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === (T(103), false)
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103)
             end
             if TT === Any
-                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === (T(103), S(103))
+                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103)
                 @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p
                 @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === (S(100), false)
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === (S(100), true)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
                 @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2)
             end
         end)(TT,)
diff --git a/test/show.jl b/test/show.jl
index ed86a5b1dedb6..b9f8c45797b35 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -1774,7 +1774,7 @@ end
     # spurious binding resolutions
     show(IOContext(b, :module => TestShowType), Base.Pair)
     @test !Base.isbindingresolved(TestShowType, :Pair)
-    @test String(take!(b)) == "Base.Pair"
+    @test String(take!(b)) == "Core.Pair"
     show(IOContext(b, :module => TestShowType), Base.Complex)
     @test Base.isbindingresolved(TestShowType, :Complex)
     @test String(take!(b)) == "Complex"

From 96eab49eb7d55b08d6352fd17bacce9482b67789 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Mon, 23 Aug 2021 16:33:14 -0400
Subject: [PATCH 35/65] types: fix cache computation (#41935)

Need to compute `cacheable` after normalization, since the purpose of
the normalization was to turn these into normal cacheable objects,
when applicable.

Brokenness exposed by #36211
Fixes #41503

(cherry picked from commit 292f1a95e723c1a72011e525e050eaa971ee0085)
---
 src/jltypes.c | 68 +++++++++++++++++++++++----------------------------
 test/core.jl  |  7 ++++++
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/jltypes.c b/src/jltypes.c
index aacd2ba19ccca..f85c75a4a2d20 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -897,19 +897,19 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
 static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                     int cacheable, jl_typestack_t *stack, jl_typeenv_t *env, int c)
+                                     jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, cacheable, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
-    return inst_datatype_env(ua->body, p, iparams, ntp, cacheable, stack, &e, c + 1);
+    return inst_datatype_env(ua->body, p, iparams, ntp, stack, &e, c + 1);
 }
 
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
@@ -925,14 +925,7 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
         jl_value_t *u = jl_unwrap_unionall(tc);
         if (jl_is_datatype(u) && n == jl_nparams((jl_datatype_t*)u) &&
             ((jl_datatype_t*)u)->name->wrapper == tc) {
-            int cacheable = 1;
-            for (i = 0; i < n; i++) {
-                if (jl_has_free_typevars(params[i])) {
-                    cacheable = 0;
-                    break;
-                }
-            }
-            return inst_datatype_env(tc, NULL, params, n, cacheable, NULL, NULL, 0);
+            return inst_datatype_env(tc, NULL, params, n, NULL, NULL, 0);
         }
     }
     JL_GC_PUSH1(&tc);
@@ -1002,8 +995,8 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
     jl_value_t *params[2];
     jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
     if (names == NULL) {
-        params[0] = jl_symbol("old");
-        params[1] = jl_symbol("success");
+        params[0] = (jl_value_t*)jl_symbol("old");
+        params[1] = (jl_value_t*)jl_symbol("success");
         jl_value_t *lnames = jl_f_tuple(NULL, params, 2);
         if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
             names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
@@ -1012,7 +1005,7 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
     params[1] = (jl_value_t*)jl_bool_type;
     jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
     JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_namedtuple_type, names, tuptyp);
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     return rettyp;
 }
@@ -1343,18 +1336,32 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
     if (dt->name != jl_type_typename) {
-        for (size_t i = 0; i < ntp; i++)
+        size_t i;
+        for (i = 0; i < ntp; i++)
             iparams[i] = normalize_unionalls(iparams[i]);
     }
 
-    // check type cache
+    // check type cache, if applicable
+    int cacheable = 1;
+    if (istuple) {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+                cacheable = 0;
+    }
+    else {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (jl_has_free_typevars(iparams[i]))
+                cacheable = 0;
+    }
     if (cacheable) {
         size_t i;
         for (i = 0; i < ntp; i++) {
@@ -1553,13 +1560,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
 
 static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
 {
-    int cacheable = 1;
-    for (size_t i = 0; i < np; i++) {
-        assert(p[i]);
-        if (!jl_is_concrete_type(p[i]) && p[i] != jl_bottom_type)
-            cacheable = 0;
-    }
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, cacheable, NULL, NULL);
+    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
 }
 
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
@@ -1581,7 +1582,6 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
 {
     jl_tupletype_t *tt = (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
     if (tt == NULL) {
-        int cacheable = 1;
         size_t i;
         jl_svec_t *params = jl_alloc_svec(nargs);
         JL_GC_PUSH1(&params);
@@ -1593,14 +1593,13 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
                 // `jl_typeof(ai)`, but that will require some redesign of the caching
                 // logic.
                 ai = (jl_value_t*)jl_wrap_Type(ai);
-                cacheable = 0;
             }
             else {
                 ai = jl_typeof(ai);
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, cacheable, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
         JL_GC_POP();
     }
     return tt;
@@ -1668,9 +1667,6 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams = jl_svec_data(ip_heap);
     }
     int bound = 0;
-    int cacheable = 1;
-    if (jl_is_va_tuple(tt))
-        cacheable = 0;
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
@@ -1679,11 +1675,9 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
-        if (cacheable && !jl_is_concrete_type(pi))
-            cacheable = 0;
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1770,18 +1764,16 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
-    int cacheable = 1, bound = 0;
+    int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
         jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         bound |= (pi != elt);
-        if (cacheable && jl_has_free_typevars(pi))
-            cacheable = 0;
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
diff --git a/test/core.jl b/test/core.jl
index e55705b3e6923..56ddfb42e10f1 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -7577,3 +7577,10 @@ const T35130 = Tuple{Vector{Int}, <:Any}
 end
 h35130(x) = A35130(Any[x][1]::Vector{T35130})
 @test h35130(T35130[([1],1)]) isa A35130
+
+# issue #41503
+let S = Tuple{Tuple{Tuple{K, UInt128} where K<:Tuple{Int64}, Int64}},
+    T = Tuple{Tuple{Tuple{Tuple{Int64}, UInt128}, Int64}}
+    @test pointer_from_objref(T) === pointer_from_objref(S)
+    @test isbitstype(T)
+end

From f34e241f14e06ccb27146c298432250919cbff9c Mon Sep 17 00:00:00 2001
From: Nathan Daly <nhdaly@gmail.com>
Date: Thu, 29 Jul 2021 21:52:19 -0400
Subject: [PATCH 36/65] Make jl_cumulative_compile_time_ns global (and
 reentrant).

Now, multiple tasks (on the same or different Threads) can start and stop compilation
time measurement, without interrupting each other.

* Makes jl_cumulative_compile_time_ns into a global, atomic variable.

Instead of keeping per-task compilation time, this change keeps a
global counter of compilation time, protected with atomic mutations.

Fixes #41739

```julia
julia> include("./compilation-task-migration-17-example.jl")
start thread: 2
end thread: 2
  5.185706 seconds (3.53 M allocations: 2.570 GiB, 7.34% gc time, 15.57% compilation time)

julia> include("./compilation-task-migration-17-example.jl")
start thread: 3
WARNING: replacing module M.
end thread: 1
  4.110316 seconds (18.23 k allocations: 2.391 GiB, 5.67% gc time, 0.24% compilation time)
```

Compilation time measurement originally added in: https://github.com/JuliaLang/julia/pull/38885

Problems addressed:
- This fixes https://github.com/JuliaLang/julia/issues/41739, meaning it fixes compilation time reporting in 1.7 after task migration was enabled.
- It also fixes the race condition that existed previously, even on 1.6, where multiple Tasks on the thread measuring `@time` could break the measurement, as identified in (https://github.com/JuliaLang/julia/issues/41271#issuecomment-876564749).
  - It fixes reentrant `@time` by making the `enable` flag a _counter,_ instead of a boolean.
  - It fixes `@time` called from multiple threads by making that flag thread-safe (via atomics).

(cherry picked from commit b4ca19664c84e7a331bfc11375bec74bd0b72bdb)
---
 base/timing.jl       |  2 +-
 src/aotcompile.cpp   | 16 ++++++++--------
 src/gf.c             | 12 ++++++++----
 src/jitlayers.cpp    | 44 ++++++++++++++++++++++----------------------
 src/julia_internal.h |  5 +++--
 src/task.c           |  5 ++++-
 src/threading.c      |  6 ++----
 test/misc.jl         | 16 ++++++++++++++++
 8 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/base/timing.jl b/base/timing.jl
index ab7af23048305..45a27e3378977 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
     diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
 end
 
-# cumulative total time spent on compilation
+# cumulative total time spent on compilation, in nanoseconds
 cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
 cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
 
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 93683d320e6b9..11e35764190cc 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -286,8 +286,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     JL_GC_PUSH1(&src);
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
 
     CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -415,8 +415,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     }
 
     data->M = std::move(clone);
-    if (jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     if (policy == CompilationPolicy::ImagingMode)
         imaging_mode = 0;
     JL_UNLOCK(&codegen_lock); // Might GC
@@ -916,8 +916,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
         jl_llvm_functions_t decls;
         JL_LOCK(&codegen_lock);
         uint64_t compiler_start_time = 0;
-        int tid = jl_threadid();
-        if (jl_measure_compile_time[tid])
+        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+        if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
         std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
 
@@ -942,8 +942,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
             m.release(); // the return object `llvmf` will be the owning pointer
         }
         JL_GC_POP();
-        if (jl_measure_compile_time[tid])
-            jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+        if (measure_compile_time_enabled)
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
         JL_UNLOCK(&codegen_lock); // Might GC
         if (F)
             return F;
diff --git a/src/gf.c b/src/gf.c
index 41381ccc5178e..118a0a605c7fc 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -3160,19 +3160,23 @@ int jl_has_concrete_subtype(jl_value_t *typ)
 #define typeinf_lock codegen_lock
 
 static uint64_t inference_start_time = 0;
+static uint8_t inference_is_measuring_compile_time = 0;
 
 JL_DLLEXPORT void jl_typeinf_begin(void)
 {
     JL_LOCK(&typeinf_lock);
-    if (jl_measure_compile_time[jl_threadid()])
+    if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
         inference_start_time = jl_hrtime();
+        inference_is_measuring_compile_time = 1;
+    }
 }
 
 JL_DLLEXPORT void jl_typeinf_end(void)
 {
-    int tid = jl_threadid();
-    if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
+    if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
+        inference_is_measuring_compile_time = 0;
+    }
     JL_UNLOCK(&typeinf_lock);
 }
 
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index e86d6109ff427..a87e14f7a76a4 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -78,16 +78,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
 extern "C" JL_DLLEXPORT
 uint64_t jl_cumulative_compile_time_ns_before()
 {
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 1;
-    return jl_cumulative_compile_time[tid];
+    // Increment the flag to allow reentrant callers to `@time`.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 extern "C" JL_DLLEXPORT
 uint64_t jl_cumulative_compile_time_ns_after()
 {
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 0;
-    return jl_cumulative_compile_time[tid];
+    // Decrement the flag when done measuring, allowing other callers to continue measuring.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 
 // this generates llvm code for the lambda info
@@ -233,8 +233,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
 {
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     jl_codegen_params_t params;
     jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -258,8 +258,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
         if (success && llvmmod == NULL)
             jl_add_to_ee(std::unique_ptr<Module>(into));
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock);
     return success;
 }
@@ -315,8 +315,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
 {
     JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
@@ -354,8 +354,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
     else {
         codeinst = NULL;
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock);
     JL_GC_POP();
     return codeinst;
@@ -369,8 +369,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
     }
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     if (unspec->invoke == NULL) {
         jl_code_info_t *src = NULL;
@@ -398,8 +398,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
         }
         JL_GC_POP();
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock); // Might GC
 }
 
@@ -422,8 +422,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
             // so create an exception here so we can print pretty our lies
             JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
             uint64_t compiler_start_time = 0;
-            int tid = jl_threadid();
-            if (jl_measure_compile_time[tid])
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
                 compiler_start_time = jl_hrtime();
             specfptr = (uintptr_t)codeinst->specptr.fptr;
             if (specfptr == 0) {
@@ -448,8 +448,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
                 }
                 JL_GC_POP();
             }
-            if (jl_measure_compile_time[tid])
-                jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+            if (measure_compile_time_enabled)
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
             JL_UNLOCK(&codegen_lock);
         }
         if (specfptr != 0)
diff --git a/src/julia_internal.h b/src/julia_internal.h
index f4d63cc27cf78..a4e81453581f9 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)
 
 #include "timing.h"
 
-extern uint8_t *jl_measure_compile_time;
-extern uint64_t *jl_cumulative_compile_time;
+// Global *atomic* integers controlling *process-wide* measurement of compilation time.
+extern uint8_t jl_measure_compile_time_enabled;
+extern uint64_t jl_cumulative_compile_time;
 
 #ifdef _COMPILER_MICROSOFT_
 #  define jl_return_address() ((uintptr_t)_ReturnAddress())
diff --git a/src/task.c b/src/task.c
index 1acdb4d4e9794..0b59dec4c770d 100644
--- a/src/task.c
+++ b/src/task.c
@@ -561,7 +561,10 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
     ptls->io_wait = 0;
     // @time needs its compile timer disabled on error,
     // and cannot use a try-finally as it would break scope for assignments
-    jl_measure_compile_time[ptls->tid] = 0;
+    // We blindly disable compilation time tracking here, for all running Tasks, even though
+    // it may cause some incorrect measurements. This is a known bug, and is being tracked
+    // here: https://github.com/JuliaLang/julia/pull/39138
+    jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0);
     JL_GC_PUSH1(&exception);
     jl_gc_unsafe_enter(ptls);
     if (exception) {
diff --git a/src/threading.c b/src/threading.c
index 235bb9f870ba1..ffe53c07b45ee 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 #endif
 
 jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
-uint8_t *jl_measure_compile_time = NULL;
-uint64_t *jl_cumulative_compile_time = NULL;
+uint8_t jl_measure_compile_time_enabled = 0;
+uint64_t jl_cumulative_compile_time = 0;
 
 // return calling thread's ID
 // Also update the suspended_threads list in signals-mach when changing the
@@ -467,8 +467,6 @@ void jl_init_threading(void)
     }
     if (jl_n_threads <= 0)
         jl_n_threads = 1;
-    jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
-    jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
 #ifndef __clang_analyzer__
     jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
 #endif
diff --git a/test/misc.jl b/test/misc.jl
index 411135ac63313..94c35c43ffaec 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -263,6 +263,22 @@ function timev_macro_scope()
 end
 @test timev_macro_scope() == 1
 
+before = Base.cumulative_compile_time_ns_before();
+
+# exercise concurrent calls to `@time` for reentrant compilation time measurement.
+t1 = @async @time begin
+    sleep(2)
+    @eval module M ; f(x,y) = x+y ; end
+    @eval M.f(2,3)
+end
+t2 = @async begin
+    sleep(1)
+    @time 2 + 2
+end
+
+after = Base.cumulative_compile_time_ns_after();
+@test after >= before;
+
 # interactive utilities
 
 struct ambigconvert; end # inject a problematic `convert` method to ensure it still works

From 5bd502e598fc2759591a009669e91f050b36fe2b Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Thu, 26 Aug 2021 01:40:22 -0400
Subject: [PATCH 37/65] Fix #41975 - Dropped typecheck in GotoIfNot (#42010)

Recall the reproducer from the issue:
```
julia> f() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
f (generic function with 1 method)

julia> f()
Unreachable reached at 0x7fb33bb50090

signal (4): Illegal instruction
in expression starting at REPL[13]:1
unsafe_load at ./pointer.jl:105 [inlined]
unsafe_load at ./pointer.jl:105 [inlined]
```

There were actually two places where we were dropping the
GotoIfNot, one in type annotation after inference, one in
SSA conversion. The one in SSA conversion was structural:
When both branches target the same jump destination, the
GotoIfNot would be dropped. This was fine in general, except
that as shown above, GotoIfNot can actually itself have
a side effect, namely throwing a type error if the condition
is not a boolean. Thus in order to actually drop the node
we need to prove that the error check does not fire.

The reason we want to drop the GotoIfNot node here is
that IRCode has an invariant that every basic block is
in the predecessor list only once (otherwise PhiNodes
would have to carry extra state regarding which branch
they refer to).

To fix this, insert an `Expr(:call, typecheck, _, Bool)`
when dropping the GotoIfNot. We do lose the ability to
distinguish the GotoIfNot from literal typechecks as
a result, but at the moment they generate identical
errors. If we ever wanted to dinstinguish them, we could
create another typecheck intrinsic that throws a different
error or use an approach like #41994.

(cherry picked from commit 24450009edcd819238cc633ec3270be768716a13)
---
 base/compiler/ssair/slot2ssa.jl | 2 +-
 base/compiler/typeinfer.jl      | 2 +-
 test/compiler/ssair.jl          | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 3b6953fc53d19..21c0bf00ec755 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -823,7 +823,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             new_dest = block_for_inst(cfg, stmt.dest)
             if new_dest == bb+1
                 # Drop this node - it's a noop
-                new_code[idx] = stmt.cond
+                new_code[idx] = Expr(:call, GlobalRef(Core, :typeassert), stmt.cond, GlobalRef(Core, :Bool))
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 4ad96ae2e72f0..25a07fbb5ee7d 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -638,7 +638,7 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
         expr = body[i]
         if isa(expr, GotoIfNot)
             if !isa(states[expr.dest], VarTable)
-                body[i] = expr.cond
+                body[i] = Expr(:call, GlobalRef(Core, :typeassert), expr.cond, GlobalRef(Core, :Bool))
             end
         end
     end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index f90bb71e291d0..17a0753eddc64 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -310,3 +310,7 @@ let cfg = CFG(BasicBlock[
     Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
     @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end
+
+# Issue #41975 - SSA conversion drops type check
+f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
+@test_throws TypeError f_if_typecheck()

From 3746e6303555e864b2d48ef51a284d5e868d6887 Mon Sep 17 00:00:00 2001
From: Troels Nielsen <bn.troels@gmail.com>
Date: Thu, 26 Aug 2021 17:44:27 +0200
Subject: [PATCH 38/65] codegen: parameter attributes on CFunction closures
 sticks (#41827)

When CFunction closures are created an extra argument is added to the
function signature for holding the closure.

Make sure that the parameter attributes on already existing parameters
are not shifted when adding that parameter.

(cherry picked from commit 08f342230e056c118dedd9889c558ebcbbb26603)
---
 src/codegen.cpp | 41 ++++++++++++++++++++++++++++++++++++++++-
 test/ccall.jl   | 20 ++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/src/codegen.cpp b/src/codegen.cpp
index 1cc26dee22f1e..a8cec7abc536a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -5161,9 +5161,48 @@ static Function* gen_cfun_wrapper(
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
         std::vector<Type*> fargt_sig(sig.fargt_sig);
+
         fargt_sig.insert(fargt_sig.begin() + sig.sret, T_pprjlvalue);
+
+        // Shift LLVM attributes for parameters one to the right, as
+        // we are adding the extra nest parameter after sret arg.
+        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        newAttributes.reserve(attributes.getNumAttrSets() + 1);
+        auto it = attributes.index_begin();
+
+        // Skip past FunctionIndex
+        if (it == AttributeList::AttrIndex::FunctionIndex) {
+            ++it;
+        }
+
+        // Move past ReturnValue and parameter return value
+        for (;it < AttributeList::AttrIndex::FirstArgIndex + sig.sret; ++it) {
+            if (attributes.hasAttributes(it)) {
+                newAttributes.emplace_back(it, attributes.getAttributes(it));
+            }
+        }
+
+        // Add the new nest attribute
+        AttrBuilder attrBuilder;
+        attrBuilder.addAttribute(Attribute::Nest);
+        newAttributes.emplace_back(it, AttributeSet::get(jl_LLVMContext, attrBuilder));
+
+        // Shift forward the rest of the attributes
+        for(;it < attributes.index_end(); ++it) {
+            if (attributes.hasAttributes(it)) {
+                newAttributes.emplace_back(it + 1, attributes.getAttributes(it));
+            }
+        }
+
+        // Remember to add back FunctionIndex
+        if (attributes.hasAttributes(AttributeList::AttrIndex::FunctionIndex)) {
+            newAttributes.emplace_back(AttributeList::AttrIndex::FunctionIndex,
+                                       attributes.getAttributes(AttributeList::AttrIndex::FunctionIndex));
+        }
+
+        // Create the new AttributeList
+        attributes = AttributeList::get(jl_LLVMContext, newAttributes);
         functype = FunctionType::get(sig.sret ? T_void : sig.prt, fargt_sig, /*isVa*/false);
-        attributes = attributes.addAttribute(jl_LLVMContext, 1 + sig.sret, Attribute::Nest);
     }
     else {
         functype = sig.functype();
diff --git a/test/ccall.jl b/test/ccall.jl
index 02d005108459e..01f0f4f651aa8 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -982,6 +982,26 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
     end
 end
 
+
+#issue 40164
+@testset "llvm parameter attributes on cfunction closures" begin
+    struct Struct40164
+        x::Cdouble
+        y::Cdouble
+        z::Cdouble
+    end
+
+    function test_40164()
+        ret = Struct40164[]
+        f = x::Struct40164 -> (push!(ret, x); nothing)
+        f_c = @cfunction($f, Cvoid, (Struct40164,))
+        ccall(f_c.ptr, Ptr{Cvoid}, (Struct40164,), Struct40164(0, 1, 2))
+        ret
+    end
+
+    @test test_40164() == [Struct40164(0, 1, 2)]
+end
+
 else
 
 @test_broken "cfunction: no support for closures on this platform"

From 30e82b4cfd78bbb318856a800c5b5aec9a4b7d74 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 26 Aug 2021 12:08:48 -0400
Subject: [PATCH 39/65] atomics: optimize modify operation (partially) (#41859)

Optimize the load/store portion of the operations, but not yet the
invoke part.

(cherry picked from commit 690eae23ca0f2d2a6dbca2771d94db3daf851198)
---
 src/cgutils.cpp          | 295 ++++++++++++++++++++++++++-------------
 src/codegen.cpp          |  30 ++--
 src/intrinsics.cpp       |  25 ++--
 src/runtime_intrinsics.c |  22 ++-
 test/intrinsics.jl       |   4 +-
 5 files changed, 251 insertions(+), 125 deletions(-)

diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 288e98e9a712b..4b1f842effe22 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -1542,12 +1542,23 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
 }
 
 static jl_cgval_t typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, const jl_cgval_t &rhs, const jl_cgval_t &cmp,
+        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
-        bool needlock, bool issetfield, bool isreplacefield, bool maybe_null_if_boxed)
-{
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        bool maybe_null_if_boxed, const std::string &fname)
+{
+    auto newval = [&](const jl_cgval_t &lhs) {
+        jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+        argv[0] = mark_julia_type(ctx, callval, true, jl_any_type);
+        if (!jl_subtype(argv[0].typ, jltype)) {
+            emit_typecheck(ctx, argv[0], jltype, fname + "typed_store");
+            argv[0] = update_julia_type(ctx, argv[0], jltype);
+        }
+        return argv[0];
+    };
     assert(!needlock || parent != nullptr);
     Type *elty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty)) {
@@ -1563,9 +1574,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
             return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
-        else {
+        else if (isswapfield) {
             return ghostValue(jltype);
         }
+        else { // modifyfield
+            jl_cgval_t oldval = ghostValue(jltype);
+            jl_cgval_t argv[2] = { oldval, newval(oldval) };
+            jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
     }
     Value *intcast = nullptr;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
@@ -1582,13 +1599,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         if (nb != nb2)
             elty = Type::getIntNTy(jl_LLVMContext, nb2);
     }
-    Value *r;
-    if (!isboxed)
-        r = emit_unbox(ctx, realelty, rhs, jltype);
-    else
-        r = boxed(ctx, rhs);
-    if (realelty != elty)
-        r = ctx.builder.CreateZExt(r, elty);
+    Value *r = nullptr;
+    if (issetfield || isswapfield || isreplacefield)  {
+        if (!isboxed)
+            r = emit_unbox(ctx, realelty, rhs, jltype);
+        else
+            r = boxed(ctx, rhs);
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     if (ptr->getType() != ptrty)
         ptr = ctx.builder.CreateBitCast(ptr, ptrty);
@@ -1598,33 +1617,22 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         alignment = sizeof(void*);
     else if (!alignment)
         alignment = julia_alignment(jltype);
-    Instruction *instr = nullptr;
+    Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
-    BasicBlock *DoneBB = issetfield || (!isreplacefield && !isboxed) ? nullptr : BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
+    BasicBlock *DoneBB = nullptr;
     if (needlock)
         emit_lockstate_value(ctx, parent, true);
     jl_cgval_t oldval = rhs;
-    if (issetfield || Order == AtomicOrdering::NotAtomic) {
-        if (!issetfield) {
-            instr = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+    if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
+        if (isswapfield) {
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             if (aliasscope)
-                instr->setMetadata("noalias", aliasscope);
+                load->setMetadata("noalias", aliasscope);
             if (tbaa)
-                tbaa_decorate(tbaa, instr);
+                tbaa_decorate(tbaa, load);
             assert(realelty == elty);
-            if (isreplacefield) {
-                oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-                Value *first_ptr = nullptr;
-                if (maybe_null_if_boxed)
-                    first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-                Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
-                    return emit_f_is(ctx, oldval, cmp);
-                });
-                BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
-                ctx.builder.CreateCondBr(Success, BB, DoneBB);
-                ctx.builder.SetInsertPoint(BB);
-            }
+            instr = load;
         }
         StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
         store->setOrdering(Order);
@@ -1632,20 +1640,37 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             store->setMetadata("noalias", aliasscope);
         if (tbaa)
             tbaa_decorate(tbaa, store);
-        if (DoneBB)
-            ctx.builder.CreateBr(DoneBB);
     }
-    else if (isboxed || isreplacefield) {
-        // we have to handle isboxed here as a workaround for really bad LLVM design issue: plain Xchg only works with integers
+    else if (isswapfield && !isboxed) {
+        // we can't handle isboxed here as a workaround for really bad LLVM
+        // design issue: plain Xchg only works with integers
+#if JL_LLVM_VERSION >= 130000
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
+#else
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
+        store->setAlignment(Align(alignment));
+#endif
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+        instr = store;
+    }
+    else {
+        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+        DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
         bool needloop;
         PHINode *Succ = nullptr, *Current = nullptr;
         if (isreplacefield) {
-            if (!isboxed) {
+            if (Order == AtomicOrdering::NotAtomic) {
+                needloop = false;
+            }
+            else if (!isboxed) {
                 needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
                 Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
                 if (SameType != ConstantInt::getTrue(jl_LLVMContext)) {
                     BasicBlock *SkipBB = BasicBlock::Create(jl_LLVMContext, "skip_xchg", ctx.f);
-                    BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+                    BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "ok_xchg", ctx.f);
                     ctx.builder.CreateCondBr(SameType, BB, SkipBB);
                     ctx.builder.SetInsertPoint(SkipBB);
                     LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
@@ -1658,7 +1683,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     ctx.builder.CreateBr(DoneBB);
                     ctx.builder.SetInsertPoint(DoneBB);
                     Succ = ctx.builder.CreatePHI(T_int1, 2);
-                    Succ->addIncoming(ConstantInt::get(T_int1, 0), SkipBB);
+                    Succ->addIncoming(ConstantInt::get(T_int1, false), SkipBB);
                     Current = ctx.builder.CreatePHI(instr->getType(), 2);
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
@@ -1676,50 +1701,112 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 needloop = true;
             }
         }
-        else {
+        else { // swap or modify
             LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
-            Current->setOrdering(AtomicOrdering::Monotonic);
+            Current->setOrdering(Order == AtomicOrdering::NotAtomic ? Order : AtomicOrdering::Monotonic);
             if (aliasscope)
                 Current->setMetadata("noalias", aliasscope);
             if (tbaa)
                 tbaa_decorate(tbaa, Current);
             Compare = Current;
-            needloop = true;
+            needloop = !isswapfield || Order != AtomicOrdering::NotAtomic;
         }
         BasicBlock *BB;
+        PHINode *CmpPhi;
         if (needloop) {
             BasicBlock *From = ctx.builder.GetInsertBlock();
             BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
             ctx.builder.CreateBr(BB);
             ctx.builder.SetInsertPoint(BB);
-            PHINode *Cmp = ctx.builder.CreatePHI(r->getType(), 2);
-            Cmp->addIncoming(Compare, From);
-            Compare = Cmp;
-        }
-        if (Order == AtomicOrdering::Unordered)
-            Order = AtomicOrdering::Monotonic;
-        if (!isreplacefield)
-            FailOrder = AtomicOrdering::Monotonic;
-        else if (FailOrder == AtomicOrdering::Unordered)
-            FailOrder = AtomicOrdering::Monotonic;
+            CmpPhi = ctx.builder.CreatePHI(elty, 2);
+            CmpPhi->addIncoming(Compare, From);
+            Compare = CmpPhi;
+        }
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, parent, false);
+            Value *realCompare = Compare;
+            if (realelty != elty)
+                realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
+            if (intcast) {
+                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
+                if (maybe_null_if_boxed)
+                    realCompare = ctx.builder.CreateLoad(intcast);
+            }
+            if (maybe_null_if_boxed) {
+                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
+                if (first_ptr)
+                    null_pointer_check(ctx, first_ptr, nullptr);
+            }
+            if (intcast)
+                oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
+            else
+                oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
+            rhs = newval(oldval);
+            if (!isboxed)
+                r = emit_unbox(ctx, realelty, rhs, jltype);
+            else
+                r = boxed(ctx, rhs);
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
+            if (needlock)
+                emit_lockstate_value(ctx, parent, true);
+            cmp = oldval;
+        }
+        Value *Done;
+        if (Order == AtomicOrdering::NotAtomic) {
+            // modifyfield or replacefield
+            assert(elty == realelty && !intcast);
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            Value *first_ptr = nullptr;
+            if (maybe_null_if_boxed && !ismodifyfield)
+                first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            oldval = mark_julia_type(ctx, load, isboxed, jltype);
+            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
+                return emit_f_is(ctx, oldval, cmp);
+            });
+            if (needloop && ismodifyfield)
+                CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
+            assert(Succ == nullptr);
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            ctx.builder.CreateCondBr(Success, XchgBB, needloop && ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+            auto *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            ctx.builder.CreateBr(DoneBB);
+            instr = load;
+        }
+        else {
+            if (Order == AtomicOrdering::Unordered)
+                Order = AtomicOrdering::Monotonic;
+            if (!isreplacefield)
+                FailOrder = AtomicOrdering::Monotonic;
+            else if (FailOrder == AtomicOrdering::Unordered)
+                FailOrder = AtomicOrdering::Monotonic;
 #if JL_LLVM_VERSION >= 130000
-        auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
 #else
-        auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
-        store->setAlignment(Align(alignment));
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
+            store->setAlignment(Align(alignment));
 #endif
-        if (aliasscope)
-            store->setMetadata("noalias", aliasscope);
-        if (tbaa)
-            tbaa_decorate(tbaa, store);
-        instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
-        Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
-        Value *Done = Success;
-        if (needloop) {
-            if (isreplacefield) {
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
+            Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
+            Done = Success;
+            if (isreplacefield && needloop) {
                 Value *realinstr = instr;
                 if (realelty != elty)
-                    realinstr = ctx.builder.CreateTrunc(instr, realelty);
+                    realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
                     ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
                     oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
@@ -1739,7 +1826,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 });
                 Done = ctx.builder.CreateNot(Done);
             }
-            cast<PHINode>(Compare)->addIncoming(instr, ctx.builder.GetInsertBlock());
+            if (needloop)
+                ctx.builder.CreateCondBr(Done, DoneBB, BB);
+            else
+                ctx.builder.CreateBr(DoneBB);
+            if (needloop)
+                CmpPhi->addIncoming(instr, ctx.builder.GetInsertBlock());
         }
         if (Succ != nullptr) {
             Current->addIncoming(instr, ctx.builder.GetInsertBlock());
@@ -1747,31 +1839,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             Succ->addIncoming(Success, ctx.builder.GetInsertBlock());
             Success = Succ;
         }
-        if (needloop)
-            ctx.builder.CreateCondBr(Done, DoneBB, BB);
-        else
-            ctx.builder.CreateBr(DoneBB);
-    }
-    else {
-#if JL_LLVM_VERSION >= 130000
-        instr = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-#else
-        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
-        store->setAlignment(Align(alignment));
-        instr = store;
-#endif
-        if (aliasscope)
-            instr->setMetadata("noalias", aliasscope);
-        if (tbaa)
-            tbaa_decorate(tbaa, instr);
-        assert(DoneBB == nullptr);
     }
     if (DoneBB)
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
         emit_lockstate_value(ctx, parent, false);
     if (parent != NULL) {
-        BasicBlock *DoneBB;
         if (isreplacefield) {
             // TOOD: avoid this branch if we aren't making a write barrier
             BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg_wb", ctx.f);
@@ -1788,7 +1861,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             ctx.builder.SetInsertPoint(DoneBB);
         }
     }
-    if (!issetfield) {
+    if (ismodifyfield) {
+        jl_cgval_t argv[2] = { oldval, rhs };
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    else if (!issetfield) { // swapfield or replacefield
         if (realelty != elty)
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
@@ -3188,12 +3266,13 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
 
 static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
-        const jl_cgval_t &rhs, const jl_cgval_t &cmp,
+        jl_cgval_t rhs, jl_cgval_t cmp,
         bool checked, bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
-        bool needlock, bool issetfield, bool isreplacefield)
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        const std::string &fname)
 {
     if (!sty->name->mutabl && checked) {
-        std::string msg = "setfield!: immutable struct of type "
+        std::string msg = fname + "immutable struct of type "
             + std::string(jl_symbol_name(sty->name->name))
             + " cannot be changed";
         emit_error(ctx, msg);
@@ -3217,29 +3296,48 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
         if (rhs_union.typ == jl_bottom_type)
             return jl_cgval_t();
-        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
         Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8), ConstantInt::get(T_size, fsz));
         if (needlock)
             emit_lockstate_value(ctx, strct, true);
+        BasicBlock *BB = ctx.builder.GetInsertBlock();
         jl_cgval_t oldval = rhs;
         if (!issetfield)
             oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true);
         Value *Success;
         BasicBlock *DoneBB;
-        if (isreplacefield) {
-            BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+        if (isreplacefield || ismodifyfield) {
+            if (ismodifyfield) {
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, false);
+                jl_cgval_t argv[3] = { cmp, oldval, rhs };
+                Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+                rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                if (!jl_subtype(rhs.typ, jfty)) {
+                    emit_typecheck(ctx, rhs, jfty, fname);
+                    rhs = update_julia_type(ctx, rhs, jfty);
+                }
+               rhs_union = convert_julia_type(ctx, rhs, jfty);
+                if (rhs_union.typ == jl_bottom_type)
+                    return jl_cgval_t();
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, true);
+                cmp = oldval;
+                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true);
+            }
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
             DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
             Success = emit_f_is(ctx, oldval, cmp);
-            ctx.builder.CreateCondBr(Success, BB, DoneBB);
-            ctx.builder.SetInsertPoint(BB);
+            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
         }
+        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
+        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
         tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
         // copy data
         if (!rhs.isghost) {
             emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
         }
-        if (isreplacefield) {
+        if (isreplacefield || ismodifyfield) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
         }
@@ -3251,6 +3349,11 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
             oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
+        else if (ismodifyfield) {
+            jl_cgval_t argv[2] = {oldval, rhs};
+            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
         return oldval;
     }
     else {
@@ -3261,7 +3364,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
             wb ? maybe_bitcast(ctx, data_pointer(ctx, strct), T_pjlvalue) : nullptr,
             isboxed, Order, FailOrder, align,
-            needlock, issetfield, isreplacefield, maybe_null);
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, fname);
     }
 }
 
@@ -3440,7 +3543,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             else
                 need_wb = false;
             emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
-            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false);
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, "");
         }
         return strctinfo;
     }
diff --git a/src/codegen.cpp b/src/codegen.cpp
index a8cec7abc536a..5c4218a82222e 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -2986,7 +2986,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                     false,
                                     true,
                                     false,
-                                    false);
+                                    false,
+                                    false,
+                                    false,
+                                    "");
                     }
                 }
                 *ret = ary;
@@ -3128,18 +3131,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
-             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6))) {
+             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (true && f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
         bool issetfield = f == jl_builtin_setfield;
         bool isreplacefield = f == jl_builtin_replacefield;
+        bool isswapfield = f == jl_builtin_swapfield;
+        bool ismodifyfield = f == jl_builtin_modifyfield;
         const jl_cgval_t undefval;
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
-        jl_cgval_t val = argv[isreplacefield ? 4 : 3];
-        const jl_cgval_t &cmp = isreplacefield ? argv[3] : undefval;
+        jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
+        const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
         enum jl_memory_order order = jl_memory_order_notatomic;
-        const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : "swapfield!";
-        if (nargs >= (isreplacefield ? 5 : 4)) {
-            const jl_cgval_t &ord = argv[isreplacefield ? 5 : 4];
+        const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+        if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
+            const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
             emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
             if (!ord.constant)
                 return false;
@@ -3173,7 +3179,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             if (idx != -1) {
                 jl_value_t *ft = jl_svecref(uty->types, idx);
                 if (!jl_has_free_typevars(ft)) {
-                    if (!jl_subtype(val.typ, ft)) {
+                    if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
                         emit_typecheck(ctx, val, ft, fname);
                         val = update_julia_type(ctx, val, ft);
                     }
@@ -3189,8 +3195,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                 isreplacefield ?
                                 (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
                                           : "replacefield!: non-atomic field cannot be written atomically") :
+                                isswapfield ?
                                 (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
-                                          : "swapfield!: non-atomic field cannot be written atomically"));
+                                          : "swapfield!: non-atomic field cannot be written atomically") :
+                                (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                          : "modifyfield!: non-atomic field cannot be written atomically"));
                         *ret = jl_cgval_t();
                         return true;
                     }
@@ -3208,7 +3217,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             (needlock || fail_order <= jl_memory_order_notatomic)
                             ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
                             : get_llvm_atomic_order(fail_order),
-                            needlock, issetfield, isreplacefield);
+                            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                            fname);
                     return true;
                 }
             }
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index e1d821a34e42d..7883542c74a13 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -684,7 +684,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         if (!type_is_ghost(ptrty)) {
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
             typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, tbaa_data, nullptr, nullptr, isboxed,
-                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false);
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, "");
         }
     }
     return e;
@@ -778,15 +778,18 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 // e[i] = x (set)
 // e[i] <= x (swap)
 // e[i] y => x (replace)
-static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs)
+// x(e[i], y) (modify)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs)
 {
     bool issetfield = f == atomic_pointerset;
     bool isreplacefield = f == atomic_pointerreplace;
+    bool isswapfield = f == atomic_pointerswap;
+    bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield ? argv[2] : argv[1];
-    const jl_cgval_t &y = isreplacefield ? argv[1] : undefval;
-    const jl_cgval_t &ord = isreplacefield ? argv[3] : argv[2];
+    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
+    const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
 
     jl_value_t *aty = e.typ;
@@ -814,7 +817,7 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
         bool isboxed = true;
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, false);
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -826,7 +829,8 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+    if (!ismodifyfield)
+        emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
@@ -847,7 +851,7 @@ static jl_cgval_t emit_atomic_pointerset(jl_codectx_t &ctx, intrinsic f, const j
         assert(!isboxed);
         Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, false);
+                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -1087,10 +1091,9 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         return emit_atomic_pointerref(ctx, argv);
     case atomic_pointerset:
     case atomic_pointerswap:
-    case atomic_pointerreplace:
-        return emit_atomic_pointerset(ctx, f, argv, nargs);
     case atomic_pointermodify:
-        return emit_runtime_call(ctx, f, argv, nargs);
+    case atomic_pointerreplace:
+        return emit_atomic_pointerop(ctx, f, argv, nargs);
     case bitcast:
         return generic_bitcast(ctx, argv);
     case trunc_int:
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index be78be74172cb..741bb5448b847 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -142,15 +142,25 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_
     return y;
 }
 
-JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order_sym)
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order)
 {
-    // n.b. we use seq_cst always here, but need to verify the order sym
-    // against the weaker load-only that happens first
-    if (order_sym == (jl_value_t*)acquire_release_sym)
-        order_sym = (jl_value_t*)acquire_sym;
-    jl_value_t *expected = jl_atomic_pointerref(p, order_sym);
+    JL_TYPECHK(atomic_pointerref, pointer, p);
+    JL_TYPECHK(atomic_pointerref, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     char *pp = (char*)jl_unbox_long(p);
+    jl_value_t *expected;
+    if (ety == (jl_value_t*)jl_any_type) {
+        expected = jl_atomic_load((jl_value_t**)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointermodify: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointermodify: invalid pointer for atomic operation");
+        expected = jl_atomic_new_bits(ety, pp);
+    }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 2);
     args[0] = expected;
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 7fb6bd651ebc0..589590cf78d14 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -191,8 +191,8 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co
                 @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
                 @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
                 @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]

From 4d5d99d9628c5cf9a838fae71d4c5b5ac3c6bf0f Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Thu, 5 Aug 2021 18:03:39 -0400
Subject: [PATCH 40/65] simplify and improve type intersection algorithm a bit

This removes some code and makes a class of results more conservative,
fixing some potential cases of unsoundness.

(cherry picked from commit 13bcdf124f7b8baa7fca517ec015ec4b6eb34faa)
---
 src/subtype.c   | 128 +++++++++++++++++-------------------------------
 test/subtype.jl |  60 +++++++++++++++++------
 2 files changed, 92 insertions(+), 96 deletions(-)

diff --git a/src/subtype.c b/src/subtype.c
index 152d17daeaaaa..0a20b0db09b55 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -68,15 +68,14 @@ typedef struct jl_varbinding_t {
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
-    // in covariant position, we need to try constraining a variable in different ways:
-    // 0 - unconstrained
-    // 1 - less than
-    // 2 - greater than
-    // 3 - inexpressible - occurs when the var has non-trivial overlap with another type,
-    //                     and we would need to return `intersect(var,other)`. in this case
-    //                     we choose to over-estimate the intersection by returning the var.
+    // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
+    // let ub = var.ub ∩ type
+    // 0 - var.ub <: type ? var : ub
+    // 1 - var.ub = ub; return var
+    // 2 - either (var.ub = ub; return var), or return ub
     int8_t constraintkind;
     int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int8_t limited;
     int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // when this variable's integer value is compared to that of another,
     // it equals `other + offset`. used by vararg length parameters.
@@ -759,7 +758,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
                            R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
@@ -2318,67 +2317,31 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         JL_GC_POP();
         return ub;
     }
-    else if (bb->constraintkind == 0) {
-        if (!jl_is_typevar(bb->ub) && !jl_is_typevar(a)) {
-            if (try_subtype_in_env(bb->ub, a, e, 0, d))
-                return (jl_value_t*)b;
-        }
-        return R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-    }
-    else if (bb->concrete || bb->constraintkind == 1) {
-        jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-        if (ub == jl_bottom_type)
-            return jl_bottom_type;
-        JL_GC_PUSH1(&ub);
-        if (!R && !subtype_bounds_in_env(bb->lb, a, e, 0, d)) {
-            // this fixes issue #30122. TODO: better fix for R flag.
-            JL_GC_POP();
-            return jl_bottom_type;
-        }
-        JL_GC_POP();
-        set_bound(&bb->ub, ub, b, e);
-        return (jl_value_t*)b;
-    }
-    else if (bb->constraintkind == 2) {
-        // TODO: removing this case fixes many test_brokens in test/subtype.jl
-        // but breaks other tests.
-        if (!subtype_bounds_in_env(a, bb->ub, e, 1, d)) {
-            // mark var as unsatisfiable by making it circular
-            bb->lb = (jl_value_t*)b;
-            return jl_bottom_type;
-        }
-        jl_value_t *lb = simple_join(bb->lb, a);
-        set_bound(&bb->lb, lb, b, e);
-        return a;
-    }
-    assert(bb->constraintkind == 3);
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (jl_is_typevar(a))
-        return (jl_value_t*)b;
-    if (ub == a) {
-        if (bb->lb == jl_bottom_type) {
-            set_bound(&bb->ub, a, b, e);
+    if (bb->constraintkind == 0) {
+        JL_GC_PUSH1(&ub);
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+            JL_GC_POP();
             return (jl_value_t*)b;
         }
+        JL_GC_POP();
         return ub;
     }
-    else if (bb->ub == bb->lb) {
-        return ub;
+    else if (bb->constraintkind == 1) {
+        set_bound(&bb->ub, ub, b, e);
+        return (jl_value_t*)b;
     }
-    root = NULL;
-    JL_GC_PUSH2(&root, &ub);
-    save_env(e, &root, &se);
-    jl_value_t *ii = R ? intersect_aside(a, bb->lb, e, 1, d) : intersect_aside(bb->lb, a, e, 0, d);
-    if (ii == jl_bottom_type) {
-        restore_env(e, root, &se);
-        ii = (jl_value_t*)b;
+    assert(bb->constraintkind == 2);
+    if (!jl_is_typevar(a)) {
+        if (ub == a && bb->lb != jl_bottom_type)
+            return ub;
+        else if (jl_egal(bb->ub, bb->lb))
+            return ub;
         set_bound(&bb->ub, ub, b, e);
     }
-    free_env(&se);
-    JL_GC_POP();
-    return ii;
+    return (jl_value_t*)b;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2422,7 +2385,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
 }
 
 // Caller might not have rooted `res`
-static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_stenv_t *e)
+static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
     jl_value_t *varval = NULL;
     jl_tvar_t *newvar = vb->var;
@@ -2435,7 +2398,10 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    else if (!vb->occurs_inv && is_leaf_bound(vb->ub)) {
+    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
+    // which is valid but changes some ambiguity errors so we don't need to do it yet.
+    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
+             !var_occurs_invariant(u->body, u->var, 0)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2453,9 +2419,8 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         }
     }
 
-    // prefer generating a fresh typevar, to avoid repeated renaming if the result
-    // is compared to one of the intersected types later.
-    if (!varval)
+    // TODO: this can prevent us from matching typevar identities later
+    if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
     // remove/replace/rewrap free occurrences of this var in the environment
@@ -2573,8 +2538,10 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     int envsize = 0;
     while (btemp != NULL) {
         envsize++;
-        if (envsize > 150)
+        if (envsize > 120) {
+            vb->limited = 1;
             return t;
+        }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
             u = rename_unionall(u);
@@ -2624,7 +2591,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     }
     if (res != jl_bottom_type)
         // res is rooted by callee
-        res = finish_unionall(res, vb, e);
+        res = finish_unionall(res, vb, u, e);
     JL_GC_POP();
     return res;
 }
@@ -2633,16 +2600,21 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
     jl_savedenv_t se, se2;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
                            R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
-    if (res != jl_bottom_type) {
+    if (vb.limited) {
+        // if the environment got too big, avoid tree recursion and propagate the flag
+        if (e->vars)
+            e->vars->limited = 1;
+    }
+    else if (res != jl_bottom_type) {
         if (vb.concrete || vb.occurs_inv>1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
             restore_env(e, NULL, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.constraintkind = 3;
+            vb.constraintkind = vb.concrete ? 1 : 2;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
         else if (vb.occurs_cov) {
@@ -2652,17 +2624,10 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
             vb.lb = u->var->lb; vb.ub = u->var->ub;
             vb.constraintkind = 1;
             res2 = intersect_unionall_(t, u, e, R, param, &vb);
-            if (res2 == jl_bottom_type) {
-                restore_env(e, save, &se);
-                vb.occurs_cov = vb.occurs_inv = 0;
-                vb.lb = u->var->lb; vb.ub = u->var->ub;
-                vb.constraintkind = 2;
-                res2 = intersect_unionall_(t, u, e, R, param, &vb);
-                if (res2 == jl_bottom_type)
-                    restore_env(e, save2, &se2);
-            }
             if (res2 != jl_bottom_type)
                 res = res2;
+            else
+                restore_env(e, save2, &se2);
             free_env(&se2);
         }
     }
@@ -3049,14 +3014,13 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
                 ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
-                if (xlb == y)
+                if (reachable_var(xlb, (jl_tvar_t*)y, e))
                     lb = ylb;
                 else
                     lb = simple_join(xlb, ylb);
                 if (yy) {
-                    if (!subtype_by_bounds(lb, y, e))
-                        yy->lb = lb;
-                    if (!subtype_by_bounds(y, ub, e))
+                    yy->lb = lb;
+                    if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
diff --git a/test/subtype.jl b/test/subtype.jl
index 17d2b491003fe..5b8ebc6744e72 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -587,7 +587,7 @@ function test_old()
     @test !(Type{Tuple{Nothing}} <: Tuple{Type{Nothing}})
 end
 
-const menagerie =
+const easy_menagerie =
     Any[Bottom, Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
@@ -607,12 +607,14 @@ const menagerie =
         Array{(@UnionAll T<:Int T), 1},
         (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S}),
         Union{Int,Ref{Union{Int,Int8}}},
-        (@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}}),
         ]
 
-let new = Any[]
-    # add variants of each type
-    for T in menagerie
+const hard_menagerie =
+    Any[(@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}})]
+
+function add_variants!(types)
+    new = Any[]
+    for T in types
         push!(new, Ref{T})
         push!(new, Tuple{T})
         push!(new, Tuple{T,T})
@@ -620,9 +622,14 @@ let new = Any[]
         push!(new, @UnionAll S<:T S)
         push!(new, @UnionAll S<:T Ref{S})
     end
-    append!(menagerie, new)
+    append!(types, new)
 end
 
+add_variants!(easy_menagerie)
+add_variants!(hard_menagerie)
+
+const menagerie = [easy_menagerie; hard_menagerie]
+
 function test_properties()
     x→y = !x || y
     ¬T = @UnionAll X>:T Ref{X}
@@ -1057,14 +1064,15 @@ function test_intersection()
 end
 
 function test_intersection_properties()
-    approx = Tuple{Vector{Vector{T}} where T, Vector{Vector{T}} where T}
-    for T in menagerie
-        for S in menagerie
+    for i in eachindex(menagerie)
+        T = menagerie[i]
+        for j in eachindex(menagerie)
+            S = menagerie[j]
             I = _type_intersect(T,S)
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
-            if I == approx
-                # TODO: some of these cases give a conservative answer
+            if i > length(easy_menagerie) || j > length(easy_menagerie)
+                # TODO: these cases give a conservative answer
                 @test issub(I, T) || issub(I, S)
             else
                 @test issub(I, T) && issub(I, S)
@@ -1796,7 +1804,7 @@ let X1 = Tuple{AlmostLU, Vector{T}} where T,
     # TODO: the quality of this intersection is not great; for now just test that it
     # doesn't stack overflow
     @test I<:X1 || I<:X2
-    actual = Tuple{AlmostLU{S, X} where X<:Matrix{S}, Vector{S}} where S<:Union{Float32, Float64}
+    actual = Tuple{Union{AlmostLU{S, X} where X<:Matrix{S}, AlmostLU{S, <:Matrix}}, Vector{S}} where S<:Union{Float32, Float64}
     @test I == actual
 end
 
@@ -1898,8 +1906,8 @@ end
 # issue #39948
 let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
     I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
-    @test_broken I <: A
-    @test_broken !Base.has_free_typevars(I)
+    @test I <: A
+    @test !Base.has_free_typevars(I)
 end
 
 # issue #8915
@@ -1927,3 +1935,27 @@ let A = Tuple{Ref{T}, Vararg{T}} where T,
     J = typeintersect(A, C)
     @test_broken J != Union{}
 end
+
+let A = Tuple{Dict{I,T}, I, T} where T where I,
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I
+    # TODO: we should probably have I == T here
+    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+end
+
+let A = Tuple{UnionAll, Vector{Any}},
+    B = Tuple{Type{T}, T} where T<:AbstractArray,
+    I = typeintersect(A, B)
+    @test !isconcretetype(I)
+    @test_broken I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
+end
+
+@testintersect(Tuple{Type{Vector{<:T}}, T} where {T<:Integer},
+               Tuple{Type{T}, AbstractArray} where T<:Array,
+               Bottom)
+
+struct S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}
+end
+
+@testintersect(Tuple{Type{S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}} where _Z14 where _Z13 where _Z12 where _Z11 where _Z10 where _Z9 where _Z8 where _Z7 where _Z6 where _Z5 where _Z4 where _Z3 where _Z2 where _Z1 where _Z where _Y where _X where _W where _V where _U where _T where _S where _R where _Q where _P where _O where _N where _M where _L where _K where _J where _I where _H where _G where _F where _E where _D where _C where _B where _A, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any},
+               Tuple{Type{S40{A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1}, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1,
+               Bottom)

From 447f822fd19897f1d49c5eb454d0da808845da6f Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Thu, 12 Aug 2021 13:04:53 -0400
Subject: [PATCH 41/65] give wider/safer intersection result for vars used in
 both invariant and covariant position

fixes #41738

(cherry picked from commit 71757cd618472d291b168671fc3f5e75611b4fd8)
---
 src/subtype.c   | 62 ++++++++++++++++++++++++++-----------------------
 test/subtype.jl | 18 +++++++++++---
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/src/subtype.c b/src/subtype.c
index 0a20b0db09b55..c3512eeb17dac 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -101,6 +101,7 @@ typedef struct jl_stenv_t {
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
+    int triangular;           // when intersecting Ref{X} with Ref{<:Y}
 } jl_stenv_t;
 
 // state manipulation utilities
@@ -1444,6 +1445,7 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
+    e->triangular = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
     e->Lunions.used = 0;       e->Runions.used = 0;
@@ -2203,7 +2205,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
         return;
     jl_varbinding_t *btemp = e->vars;
     while (btemp != NULL) {
-        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+        if ((btemp->lb == (jl_value_t*)v || btemp->ub == (jl_value_t*)v) &&
             in_union(val, (jl_value_t*)btemp->var))
             return;
         btemp = btemp->prev;
@@ -2255,6 +2257,21 @@ static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
     return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
 }
 
+// check whether setting v == t implies v == SomeType{v}, which is unsatisfiable.
+static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    if (var_occurs_inside(t, v, 0, 0))
+        return 1;
+    jl_varbinding_t *btemp = e->vars;
+    while (btemp != NULL) {
+        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+            var_occurs_inside(t, btemp->var, 0, 0))
+            return 1;
+        btemp = btemp->prev;
+    }
+    return 0;
+}
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
@@ -2284,7 +2301,9 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
             ub = a;
         }
         else {
+            e->triangular++;
             ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+            e->triangular--;
             save_env(e, &root, &se);
             int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
             restore_env(e, root, &se);
@@ -2296,20 +2315,10 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         }
         if (ub != (jl_value_t*)b) {
             if (jl_has_free_typevars(ub)) {
-                // constraint X == Ref{X} is unsatisfiable. also check variables set equal to X.
-                if (var_occurs_inside(ub, b, 0, 0)) {
+                if (check_unsat_bound(ub, b, e)) {
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
-                jl_varbinding_t *btemp = e->vars;
-                while (btemp != NULL) {
-                    if (btemp->lb == (jl_value_t*)b && btemp->ub == (jl_value_t*)b &&
-                        var_occurs_inside(ub, btemp->var, 0, 0)) {
-                        JL_GC_POP();
-                        return jl_bottom_type;
-                    }
-                    btemp = btemp->prev;
-                }
             }
             bb->ub = ub;
             bb->lb = ub;
@@ -2320,7 +2329,13 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (bb->constraintkind == 0) {
+    if (bb->constraintkind == 1 || e->triangular) {
+        if (e->triangular && check_unsat_bound(ub, b, e))
+            return jl_bottom_type;
+        set_bound(&bb->ub, ub, b, e);
+        return (jl_value_t*)b;
+    }
+    else if (bb->constraintkind == 0) {
         JL_GC_PUSH1(&ub);
         if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
             JL_GC_POP();
@@ -2329,10 +2344,6 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         JL_GC_POP();
         return ub;
     }
-    else if (bb->constraintkind == 1) {
-        set_bound(&bb->ub, ub, b, e);
-        return (jl_value_t*)b;
-    }
     assert(bb->constraintkind == 2);
     if (!jl_is_typevar(a)) {
         if (ub == a && bb->lb != jl_bottom_type)
@@ -2598,11 +2609,11 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
 
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
-    jl_savedenv_t se, se2;
+    jl_value_t *res=NULL, *save=NULL;
+    jl_savedenv_t se;
     jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
                            R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
-    JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
+    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
     if (vb.limited) {
@@ -2617,18 +2628,11 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
             vb.constraintkind = vb.concrete ? 1 : 2;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
-        else if (vb.occurs_cov) {
-            save_env(e, &save2, &se2);
+        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
             restore_env(e, save, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.lb = u->var->lb; vb.ub = u->var->ub;
             vb.constraintkind = 1;
-            res2 = intersect_unionall_(t, u, e, R, param, &vb);
-            if (res2 != jl_bottom_type)
-                res = res2;
-            else
-                restore_env(e, save2, &se2);
-            free_env(&se2);
+            res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
     free_env(&se);
diff --git a/test/subtype.jl b/test/subtype.jl
index 5b8ebc6744e72..d403716646de0 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -1577,7 +1577,7 @@ f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
                Tuple{Type{Val{T}},Int,T} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T,
                Tuple{Type,Int,Integer},
-               Tuple{Type{Val{T}},Int,T} where T<:Integer)
+               Tuple{Type{Val{T}},Int,Integer} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T>:Integer,
                Tuple{Type,Int,Integer},
                Tuple{Type{Val{T}},Int,Integer} where T>:Integer)
@@ -1866,7 +1866,7 @@ let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref
     I = typeintersect(A,B)
     # this was a case where <: disagreed with === (due to a badly-normalized type)
     @test I == typeintersect(A,B)
-    @test I == Tuple{Type{T}, Ref{T}, Union{Ref{T}, T}} where T<:Ref
+    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
 end
 
 # issue #39218
@@ -1946,7 +1946,7 @@ let A = Tuple{UnionAll, Vector{Any}},
     B = Tuple{Type{T}, T} where T<:AbstractArray,
     I = typeintersect(A, B)
     @test !isconcretetype(I)
-    @test_broken I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
+    @test I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
 end
 
 @testintersect(Tuple{Type{Vector{<:T}}, T} where {T<:Integer},
@@ -1959,3 +1959,15 @@ end
 @testintersect(Tuple{Type{S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}} where _Z14 where _Z13 where _Z12 where _Z11 where _Z10 where _Z9 where _Z8 where _Z7 where _Z6 where _Z5 where _Z4 where _Z3 where _Z2 where _Z1 where _Z where _Y where _X where _W where _V where _U where _T where _S where _R where _Q where _P where _O where _N where _M where _L where _K where _J where _I where _H where _G where _F where _E where _D where _C where _B where _A, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any},
                Tuple{Type{S40{A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1}, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1,
                Bottom)
+
+let A = Tuple{Any, Type{Ref{_A}} where _A},
+    B = Tuple{Type{T}, Type{<:Union{Ref{T}, T}}} where T,
+    I = typeintersect(A, B)
+    @test I != Union{}
+    # TODO: this intersection result is still too narrow
+    @test_broken Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+end
+
+@testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
+               Tuple{Type{Tuple{Vararg{_A, N}} where _A<:F}, Pair{N, F}} where F where N,
+               Bottom)

From 4cddbea919a3c8d8f835394e94d1d112e9adf2e5 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 26 Aug 2021 11:42:14 -0400
Subject: [PATCH 42/65] fix ptrhash_remove (#42009)

Same bug as 5e57c214f872083ccacafa0f753e794ec654a21a (#26833), same fix.

(cherry picked from commit 82c4a2739f97c942508b29e7d946a0504f45fb23)
---
 src/Makefile           |   4 +-
 src/support/htable.inc | 112 ++++++++++++++++++++++-------------------
 2 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index 8da3e4e6687d5..f5cc50e6129e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -259,10 +259,10 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
-$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
-$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
 $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
diff --git a/src/support/htable.inc b/src/support/htable.inc
index fa59624a4998f..7a9be2514e2f0 100644
--- a/src/support/htable.inc
+++ b/src/support/htable.inc
@@ -13,67 +13,77 @@
 static void **HTNAME##_lookup_bp_r(htable_t *h, void *key, void *ctx)   \
 {                                                                       \
     uint_t hv;                                                          \
-    size_t i, orig, index, iter;                                        \
+    size_t i, orig, index, iter, empty_slot;                            \
     size_t newsz, sz = hash_size(h);                                    \
     size_t maxprobe = max_probe(sz);                                    \
     void **tab = h->table;                                              \
     void **ol;                                                          \
                                                                         \
     hv = HFUNC((uintptr_t)key, ctx);                                    \
- retry_bp:                                                              \
-    iter = 0;                                                           \
-    index = (size_t)(hv & (sz-1)) * 2;                                  \
-    sz *= 2;                                                            \
-    orig = index;                                                       \
-                                                                        \
-    do {                                                                \
-        if (tab[index+1] == HT_NOTFOUND) {                              \
-            tab[index] = key;                                           \
-            return &tab[index+1];                                       \
+    while (1) {                                                         \
+        iter = 0;                                                       \
+        index = (size_t)(hv & (sz-1)) * 2;                              \
+        sz *= 2;                                                        \
+        orig = index;                                                   \
+        empty_slot = -1;                                                \
+                                                                        \
+        do {                                                            \
+            if (tab[index] == HT_NOTFOUND) {                            \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+                break;                                                  \
+            }                                                           \
+            if (tab[index+1] == HT_NOTFOUND) {                          \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+            }                                                           \
+                                                                        \
+            if (EQFUNC(key, tab[index], ctx))                           \
+                return &tab[index+1];                                   \
+                                                                        \
+            index = (index+2) & (sz-1);                                 \
+            iter++;                                                     \
+            if (iter > maxprobe)                                        \
+                break;                                                  \
+        } while (index != orig);                                        \
+                                                                        \
+        if (empty_slot != -1) {                                         \
+            tab[empty_slot] = key;                                      \
+            return &tab[empty_slot+1];                                  \
         }                                                               \
                                                                         \
-        if (EQFUNC(key, tab[index], ctx))                               \
-            return &tab[index+1];                                       \
-                                                                        \
-        index = (index+2) & (sz-1);                                     \
-        iter++;                                                         \
-        if (iter > maxprobe)                                            \
-            break;                                                      \
-    } while (index != orig);                                            \
-                                                                        \
-    /* table full */                                                    \
-    /* quadruple size, rehash, retry the insert */                      \
-    /* it's important to grow the table really fast; otherwise we waste */ \
-    /* lots of time rehashing all the keys over and over. */            \
-    sz = h->size;                                                       \
-    ol = h->table;                                                      \
-    if (sz < HT_N_INLINE)                                              \
-        newsz = HT_N_INLINE;                                            \
-    else if (sz >= (1<<19) || (sz <= (1<<8)))                           \
-        newsz = sz<<1;                                                  \
-    else                                                                \
-        newsz = sz<<2;                                                  \
-    /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
-    tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                       \
-    if (tab == NULL)                                                    \
-        return NULL;                                                    \
-    for(i=0; i < newsz; i++)                                            \
-        tab[i] = HT_NOTFOUND;                                           \
-    h->table = tab;                                                     \
-    h->size = newsz;                                                    \
-    for(i=0; i < sz; i+=2) {                                            \
-        if (ol[i+1] != HT_NOTFOUND) {                                   \
-            (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];           \
+        /* table full */                                                \
+        /* quadruple size, rehash, retry the insert */                  \
+        /* it's important to grow the table really fast; otherwise we waste */ \
+        /* lots of time rehashing all the keys over and over. */        \
+        sz = h->size;                                                   \
+        ol = h->table;                                                  \
+        if (sz < HT_N_INLINE)                                           \
+            newsz = HT_N_INLINE;                                        \
+        else if (sz >= (1<<19) || (sz <= (1<<8)))                       \
+            newsz = sz<<1;                                              \
+        else                                                            \
+            newsz = sz<<2;                                              \
+        /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
+        tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                   \
+        if (tab == NULL)                                                \
+            return NULL;                                                \
+        for (i = 0; i < newsz; i++)                                     \
+            tab[i] = HT_NOTFOUND;                                       \
+        h->table = tab;                                                 \
+        h->size = newsz;                                                \
+        for (i = 0; i < sz; i += 2) {                                   \
+            if (ol[i+1] != HT_NOTFOUND) {                               \
+                (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];       \
+            }                                                           \
         }                                                               \
-    }                                                                   \
-    if (ol != &h->_space[0])                                            \
-        LLT_FREE(ol);                                                   \
+        if (ol != &h->_space[0])                                        \
+            LLT_FREE(ol);                                               \
                                                                         \
-    sz = hash_size(h);                                                  \
-    maxprobe = max_probe(sz);                                           \
-    tab = h->table;                                                     \
-                                                                        \
-    goto retry_bp;                                                      \
+        sz = hash_size(h);                                              \
+        maxprobe = max_probe(sz);                                       \
+        tab = h->table;                                                 \
+    }                                                                   \
                                                                         \
     return NULL;                                                        \
 }                                                                       \

From e609a27cd27bd476556226e932890fe112f5bba6 Mon Sep 17 00:00:00 2001
From: Kristoffer <kcarlsson89@gmail.com>
Date: Tue, 31 Aug 2021 18:08:22 +0200
Subject: [PATCH 43/65] bump Pkg to latest v1.7

---
 .../Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/md5     | 1 +
 .../Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/sha512  | 1 +
 .../Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/md5     | 1 -
 .../Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/sha512  | 1 -
 stdlib/Pkg.version                                              | 2 +-
 5 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/md5
 create mode 100644 deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/sha512
 delete mode 100644 deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/md5
 delete mode 100644 deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/sha512

diff --git a/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/md5 b/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/md5
new file mode 100644
index 0000000000000..eee45b02bc589
--- /dev/null
+++ b/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/md5
@@ -0,0 +1 @@
+4f5b2832ade28f50ecb0a97bbe313749
diff --git a/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/sha512 b/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/sha512
new file mode 100644
index 0000000000000..e832a841c1f77
--- /dev/null
+++ b/deps/checksums/Pkg-9f30b81e367d7fed6ae13577cd8a434fc30e625e.tar.gz/sha512
@@ -0,0 +1 @@
+df42d57a4f438e193283e35ff307725ce548bb5742613135d63658dc070ce41d43c1d71545f64e8f0c0feaffa6b7cf9e12d5ff188c6bad83bb2ad1fb4118e177
diff --git a/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/md5 b/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/md5
deleted file mode 100644
index c44d4e836688b..0000000000000
--- a/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c848e464b9643d5396cd51ffe492f0b3
diff --git a/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/sha512 b/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/sha512
deleted file mode 100644
index e5c06cafc4e8f..0000000000000
--- a/deps/checksums/Pkg-df2476a01c01cdadd553dbf010b21fabe7e211d3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4ceb13b64bf66ae50fb0ee87e3dc3e57a52dbdf68592226d36c8d37aaa6179d205de269073fc08be137430deb6c06c6cda65f3e0215f9a32854f1ea07be7ca26
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 937bae89a12d2..6b2533e141938 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,2 +1,2 @@
 PKG_BRANCH = release-1.7
-PKG_SHA1 = df2476a01c01cdadd553dbf010b21fabe7e211d3
+PKG_SHA1 = 9f30b81e367d7fed6ae13577cd8a434fc30e625e

From f14e50be7574b1be173f598d6d29e6ce274670f5 Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Sat, 28 Aug 2021 14:36:12 +0900
Subject: [PATCH 44/65] improve atomic docs (#42024)

* improve atomic docs

* Update base/docs/basedocs.jl

Co-authored-by: Jameson Nash <vtjnash@gmail.com>

* Update base/docs/basedocs.jl

Co-authored-by: Jameson Nash <vtjnash@gmail.com>

Co-authored-by: Jameson Nash <vtjnash@gmail.com>
(cherry picked from commit f916b94298fa15407b139bbdd2d6079a65ae6ed8)
---
 base/docs/basedocs.jl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 35cf88e269c51..6cb4d69b0b746 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -1980,24 +1980,23 @@ setfield!
 
 These atomically perform the operations to simultaneously get and set a field:
 
-    y = getfield!(value, name)
+    y = getfield(value, name)
     setfield!(value, name, x)
     return y
-```
 """
 swapfield!
 
 """
-    modifyfield!(value, name::Symbol, op, x, [order::Symbol])
-    modifyfield!(value, i::Int, op, x, [order::Symbol])
+    modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
+    modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
 
 These atomically perform the operations to get and set a field after applying
 the function `op`.
 
-    y = getfield!(value, name)
+    y = getfield(value, name)
     z = op(y, x)
     setfield!(value, name, z)
-    return y, z
+    return y => z
 
 If supported by the hardware (for example, atomic increment), this may be
 optimized to the appropriate hardware instruction, otherwise it'll use a loop.
@@ -2006,18 +2005,19 @@ modifyfield!
 
 """
     replacefield!(value, name::Symbol, expected, desired,
-        [success_order::Symbol, [fail_order::Symbol=success_order]) =>
-        (old, Bool)
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+    replacefield!(value, i::Int, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
 
 These atomically perform the operations to get and conditionally set a field to
 a given value.
 
-    y = getfield!(value, name, fail_order)
+    y = getfield(value, name, fail_order)
     ok = y === expected
     if ok
         setfield!(value, name, desired, success_order)
     end
-    return y, ok
+    return (; old = y, success = ok)
 
 If supported by the hardware, this may be optimized to the appropriate hardware
 instruction, otherwise it'll use a loop.

From 211692228f1f24e6bac21a56dd4f9840a9f22b1a Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Sat, 28 Aug 2021 05:55:19 -0400
Subject: [PATCH 45/65] fix missing layout allocation (#42035)

Fixes #41503

(cherry picked from commit 10755f7dccc6aa8a291b621366e8ca436d9f8d0c)
---
 src/jltypes.c            | 3 +--
 test/compiler/codegen.jl | 8 ++++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/jltypes.c b/src/jltypes.c
index f85c75a4a2d20..c4f5a1aff88bd 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -1547,9 +1547,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     // leading to incorrect layouts and data races (#40050: the A{T} should be
     // an isbitstype singleton of size 0)
     if (cacheable) {
-        if (dt->layout == NULL && !jl_is_primitivetype(dt) && ndt->types != NULL && ndt->isconcretetype) {
+        if (ndt->layout == NULL && ndt->types != NULL && ndt->isconcretetype)
             jl_compute_field_offsets(ndt);
-        }
         jl_cache_type_(ndt);
         JL_UNLOCK(&typecache_lock); // Might GC
     }
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index cc6095b867d41..7c89261bea92b 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -594,6 +594,14 @@ f41438(y) = y[].x
 @test f41438(Ref{A41438}(A41438(C_NULL))) === C_NULL
 @test f41438(Ref{B41438}(B41438(C_NULL))) === C_NULL
 
+const S41438 = Pair{Any, Ptr{T}} where T
+g41438() = Array{S41438,1}(undef,1)[1].first
+get_llvm(g41438, ()); # cause allocation of layout
+@test S41438.body.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438.body)
+@test S41438{Int}.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438{Int})
+
 # issue #41157
 f41157(a, b) = a[1] = b[1]
 @test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])

From 1a8387f550a5becdda7dca96e486c29d825afc0b Mon Sep 17 00:00:00 2001
From: Martin Holters <martin.holters@hsu-hh.de>
Date: Mon, 30 Aug 2021 22:44:44 +0200
Subject: [PATCH 46/65] Fix a precision issue in `abstract_iteration` (#41839)

If the first loop exits in the first iteration, the `statetype` is still
`Bottom`. In that case, the new `stateordonet` needs to be determined
with the two-arg version of `iterate` again.

Explicitly test that inference produces a sound (and reasonably precise)
result when splatting an iterator (in this case a long range) that
allows constant-propagation up to the `MAX_TUPLE_SPLAT` limit.

Fixes #41022

Co-authored-by: Jameson Nash <vtjnash@gmail.com>
(cherry picked from commit 92337b560a3a5b71181ed420bea1890d196517a6)
---
 base/compiler/abstractinterpretation.jl | 35 ++++++++++++++++++-------
 test/compiler/inference.jl              | 17 +++++++++++-
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 0de1f45460be0..6efb1b2e4b236 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -796,9 +796,11 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
             return ret, AbstractIterationInfo(calls)
         end
         if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).MAX_TUPLE_SPLAT
+            stateordonet = stateordonet_widened
             break
         end
         if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+            stateordonet = stateordonet_widened
             break
         end
         nstatetype = getfield_tfunc(stateordonet, Const(2))
@@ -816,27 +818,40 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     end
     # From here on, we start asking for results on the widened types, rather than
     # the precise (potentially const) state type
-    statetype = widenconst(statetype)
-    valtype = widenconst(valtype)
+    # statetype and valtype are reinitialized in the first iteration below from the
+    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+    statetype = Bottom
+    valtype = Bottom
+    may_have_terminated = Nothing <: stateordonet
     while valtype !== Any
-        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
-        stateordonet = widenconst(stateordonet)
-        nounion = typesubtract(stateordonet, Nothing, 0)
-        if !isa(nounion, DataType) || !(nounion <: Tuple) || isvatuple(nounion) || length(nounion.parameters) != 2
+        nounion = typeintersect(stateordonet, Tuple{Any,Any})
+        if nounion !== Union{} && !isa(nounion, DataType)
+            # nounion is of a type we cannot handle
             valtype = Any
             break
         end
-        if nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype
+        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+            # reached a fixpoint or iterator failed/gave invalid answer
             if typeintersect(stateordonet, Nothing) === Union{}
-                # Reached a fixpoint, but Nothing is not possible => iterator is infinite or failing
-                return Any[Bottom], nothing
+                # ... but cannot terminate
+                if !may_have_terminated
+                    #  ... and cannot have terminated prior to this loop
+                    return Any[Bottom], nothing
+                else
+                    # iterator may have terminated prior to this loop, but not during it
+                    valtype = Bottom
+                end
             end
             break
         end
         valtype = tmerge(valtype, nounion.parameters[1])
         statetype = tmerge(statetype, nounion.parameters[2])
+        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
+        stateordonet = widenconst(stateordonet)
+    end
+    if valtype !== Union{}
+        push!(ret, Vararg{valtype})
     end
-    push!(ret, Vararg{valtype})
     return ret, nothing
 end
 
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 2e3dd0b45f875..567df71a7b16a 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -2880,9 +2880,24 @@ partial_return_2(x) = Val{partial_return_1(x)[2]}
 
 @test Base.return_types(partial_return_2, (Int,)) == Any[Type{Val{1}}]
 
-# Precision of abstract_iteration
+# Soundness and precision of abstract_iteration
+f41839() = (1:100...,)
+@test NTuple{100,Int} <: only(Base.return_types(f41839, ())) <: Tuple{Vararg{Int}}
 f_splat(x) = (x...,)
 @test Base.return_types(f_splat, (Pair{Int,Int},)) == Any[Tuple{Int, Int}]
+@test Base.return_types(f_splat, (UnitRange{Int},)) == Any[Tuple{Vararg{Int}}]
+struct Itr41839_1 end # empty or infinite
+Base.iterate(::Itr41839_1) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_1, ::Nothing) = (nothing, nothing)
+@test Base.return_types(f_splat, (Itr41839_1,)) == Any[Tuple{}]
+struct Itr41839_2 end # empty or failing
+Base.iterate(::Itr41839_2) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_2, ::Nothing) = error()
+@test Base.return_types(f_splat, (Itr41839_2,)) == Any[Tuple{}]
+struct Itr41839_3 end
+Base.iterate(::Itr41839_3 ) = rand(Bool) ? nothing : (nothing, 1)
+Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
+@test only(Base.return_types(f_splat, (Itr41839_3,))) <: Tuple{Vararg{Union{Nothing, Int}}}
 
 # issue #32699
 f32699(a) = (id = a[1],).id

From 0c8508389708a7fb2a5600c6da17bb916b2ed683 Mon Sep 17 00:00:00 2001
From: Sebastian Stock <42280794+sostock@users.noreply.github.com>
Date: Tue, 31 Aug 2021 16:30:02 +0200
Subject: [PATCH 47/65] Fix depwarn grammar (#42069)

(cherry picked from commit bc4d49a67a406cb1cbb4c399d3663ea4d0998d45)
---
 stdlib/Test/src/Test.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 461c48026944e..c916501dc2543 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -706,7 +706,7 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
         if isa(extype, Type)
             success =
                 if from_macroexpand && extype == LoadError && exc isa Exception
-                    Base.depwarn("macroexpand no longer throw a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
+                    Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
                     true
                 else
                     isa(exc, extype)

From 5e995510bd7f19fa28b47727e9c16c5f12bb51d1 Mon Sep 17 00:00:00 2001
From: Kristoffer Carlsson <kcarlsson89@gmail.com>
Date: Tue, 31 Aug 2021 17:34:01 +0200
Subject: [PATCH 48/65] make failure to precompile a method return a value
 instead of a unconditionally warn (#41447)

(cherry picked from commit 613eea96ce06cf80f58f5c4c05c41e6e84058073)
---
 base/loading.jl   | 6 ++++--
 test/ambiguous.jl | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/base/loading.jl b/base/loading.jl
index 84ad47517cc31..bf444831363e2 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -1926,11 +1926,13 @@ function precompile(@nospecialize(f), args::Tuple)
     precompile(Tuple{Core.Typeof(f), args...})
 end
 
+const ENABLE_PRECOMPILE_WARNINGS = Ref(false)
 function precompile(argt::Type)
-    if ccall(:jl_compile_hint, Int32, (Any,), argt) == 0
+    ret = ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
+    if !ret && ENABLE_PRECOMPILE_WARNINGS[]
         @warn "Inactive precompile statement" maxlog=100 form=argt _module=nothing _file=nothing _line=0
     end
-    true
+    return ret
 end
 
 precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 265d97776c053..0516d9a74e436 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -66,7 +66,7 @@ end
 ## Other ways of accessing functions
 # Test that non-ambiguous cases work
 let io = IOBuffer()
-    @test @test_logs precompile(ambig, (Int, Int))
+    @test precompile(ambig, (Int, Int))
     cf = @eval @cfunction(ambig, Int, (Int, Int))
     @test ccall(cf, Int, (Int, Int), 1, 2) == 4
     @test length(code_lowered(ambig, (Int, Int))) == 1
@@ -75,7 +75,7 @@ end
 
 # Test that ambiguous cases fail appropriately
 let io = IOBuffer()
-    @test @test_logs (:warn,) precompile(ambig, (UInt8, Int))
+    @test !precompile(ambig, (UInt8, Int))
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))

From 3744a02b4302f830c90e16919a490f55b4f3f09b Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Sat, 28 Aug 2021 14:33:10 +0900
Subject: [PATCH 49/65] inference: add missing `LimitedAccuracy` handlings
 (#42034)

I found we need to handle `LimitedAccuracy` (i.e. collect its
limitations into the current frame and unwrap its type) whenever
we do inter-procedural inference. Especially, we need to handle
it where we use `abstract_call_method` and `abstract_call_method_with_const_args`.
Otherwise we may encounter nested `LimitedAccuracy`, which is really not
expected. So this commit also adds the assertion that checks we never
form nested `LimitedAccuracy`.

I encountered errors due to this when analyzing JET itself by JET,
probably because its codebase makes heavy use of `invoke`.
I couldn't pack them up as simple test cases though.

(cherry picked from commit 6341fa5d4f7963b5b6eefe768192821e0d32cbb2)
---
 base/compiler/abstractinterpretation.jl | 11 ++++-------
 base/compiler/typelattice.jl            | 14 ++++++++++++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 6efb1b2e4b236..bb6702dede1db 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -211,10 +211,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         info = ConstCallInfo(info, const_results)
     end
 
-    if rettype isa LimitedAccuracy
-        union!(sv.pclimitations, rettype.causes)
-        rettype = rettype.typ
-    end
+    rettype = collect_limitations!(rettype, sv)
     # if we have argument refinement information, apply that now to get the result
     if is_lattice_bool(rettype) && conditionals !== nothing && fargs !== nothing
         slot = 0
@@ -1177,9 +1174,9 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     # end
     const_rt, const_result = abstract_call_method_with_const_args(interp, result, argtype_to_function(ft′), argtypes′, match, sv, false)
     if const_rt !== rt && const_rt ⊑ rt
-        return CallMeta(const_rt, InvokeCallInfo(match, const_result))
+        return CallMeta(collect_limitations!(const_rt, sv), InvokeCallInfo(match, const_result))
     else
-        return CallMeta(rt, InvokeCallInfo(match, nothing))
+        return CallMeta(collect_limitations!(rt, sv), InvokeCallInfo(match, nothing))
     end
 end
 
@@ -1296,7 +1293,7 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::Part
             info = ConstCallInfo(info, Union{Nothing,InferenceResult}[const_result])
         end
     end
-    return CallMeta(rt, info)
+    return CallMeta(collect_limitations!(rt, sv), info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 6391d4029b58e..2d65211c273b2 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -96,8 +96,18 @@ end
 struct LimitedAccuracy
     typ
     causes::IdSet{InferenceState}
-    LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState}) =
-        new(typ, causes)
+    function LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState})
+        @assert !isa(typ, LimitedAccuracy) "malformed LimitedAccuracy"
+        return new(typ, causes)
+    end
+end
+
+@inline function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
 end
 
 struct NotFound end

From 38418ad61198f5b1979f56d7ab73a06040b00b4d Mon Sep 17 00:00:00 2001
From: SamuraiAku <61489439+SamuraiAku@users.noreply.github.com>
Date: Tue, 29 Jun 2021 15:12:33 -0700
Subject: [PATCH 50/65] Refactor LICENSE.md to make it scanner friendly
 (#41095)

Create THIRDPARTY.md to hold license information for all code not covered by the main MIT license. This format allows for automated scanning and categorization of Julia's license.  The licenses were broken up this way because this is the format that many automated license scanners (including GitHub).

(cherry picked from commit 161e38496dff86873c595428e4da12620e528c57)
---
 LICENSE.md    | 96 +++++++++++----------------------------------------
 THIRDPARTY.md | 56 ++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 76 deletions(-)
 create mode 100644 THIRDPARTY.md

diff --git a/LICENSE.md b/LICENSE.md
index e2b9c6606b1fd..79127224d049b 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,82 +1,26 @@
-The Julia language is licensed under the MIT License. The "language" consists
-of the compiler (the contents of src/), most of the standard library (base/),
-and some utilities (most of the rest of the files in this repository). See below
-for exceptions.
+MIT License
 
-> Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
-> and other contributors:
->
-> https://github.com/JuliaLang/julia/contributors
->
-> Permission is hereby granted, free of charge, to any person obtaining
-> a copy of this software and associated documentation files (the
-> "Software"), to deal in the Software without restriction, including
-> without limitation the rights to use, copy, modify, merge, publish,
-> distribute, sublicense, and/or sell copies of the Software, and to
-> permit persons to whom the Software is furnished to do so, subject to
-> the following conditions:
->
-> The above copyright notice and this permission notice shall be
-> included in all copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
-Julia includes code from the following projects, which have their own licenses:
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
 
-- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
-- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
-- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/jitlayers.cpp and src/disasm.cpp) [BSD-3, effectively]
-- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for getopt implementation on Windows) [MIT]
-- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
-- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
-- [Python](https://docs.python.org/3/license.html) (for strtod and joinpath implementation on Windows) [BSD-3, effectively]
-- [Google Benchmark](https://github.com/google/benchmark) (for cyclecount implementation) [Apache 2.0]
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
 
-The following components included in Julia `Base` have their own separate licenses:
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
-- base/grisu/* [BSD-3] (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
-- base/special/{exp,rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+end of terms and conditions
 
-The Julia language links to the following external libraries, which have their
-own licenses:
-
-- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
-- [LIBUNWIND](https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=blob_plain;f=LICENSE;hb=master) [MIT]
-- [LIBUV](https://github.com/joyent/libuv/blob/master/LICENSE) [MIT]
-- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [BSD-3, effectively]
-- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
-
-Julia's `stdlib` uses the following external libraries, which have their own licenses:
-
-- [DSFMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/LICENSE.txt) [BSD-3]
-- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
-- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
-- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
-- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
-- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://tls.mbed.org/how-to-get) [either GPLv2 or Apache 2.0]
-- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
-- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
-- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
-- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](http://suitesparse.com) [mix of LGPL2+ and GPL2+; see individual module licenses]
-
-Julia's build process uses the following external tools:
-
-- [PATCHELF](https://nixos.org/patchelf.html)
-- [OBJCONV](https://www.agner.org/optimize/#objconv)
-
-Julia bundles the following external programs and libraries:
-
-- [7-Zip](https://www.7-zip.org/license.txt)
-- [ZLIB](https://zlib.net/zlib_license.html)
-
-On some platforms, distributions of Julia contain SSL certificate authority certificates,
-released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
+Please see THIRDPARTY.md for license information for other software used in this project.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
new file mode 100644
index 0000000000000..87304437183d6
--- /dev/null
+++ b/THIRDPARTY.md
@@ -0,0 +1,56 @@
+The Julia language is licensed under the MIT License (see `LICENSE.md`). The "language" consists
+of the compiler (the contents of src/), most of the standard library (base/),
+and some utilities (most of the rest of the files in this repository). See below
+for exceptions.
+
+- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
+- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
+- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/jitlayers.cpp and src/disasm.cpp) [BSD-3, effectively]
+- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for getopt implementation on Windows) [MIT]
+- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
+- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
+- [Python](https://docs.python.org/3/license.html) (for strtod and joinpath implementation on Windows) [BSD-3, effectively]
+- [Google Benchmark](https://github.com/google/benchmark) (for cyclecount implementation) [Apache 2.0]
+
+The following components included in Julia `Base` have their own separate licenses:
+
+- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
+- base/grisu/* [BSD-3] (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
+- base/special/{exp,rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+
+The Julia language links to the following external libraries, which have their
+own licenses:
+
+- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
+- [LIBUNWIND](https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=blob_plain;f=LICENSE;hb=master) [MIT]
+- [LIBUV](https://github.com/joyent/libuv/blob/master/LICENSE) [MIT]
+- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [BSD-3, effectively]
+- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
+
+Julia's `stdlib` uses the following external libraries, which have their own licenses:
+
+- [DSFMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/LICENSE.txt) [BSD-3]
+- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
+- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
+- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
+- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
+- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
+- [MBEDTLS](https://tls.mbed.org/how-to-get) [either GPLv2 or Apache 2.0]
+- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
+- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
+- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
+- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
+- [SUITESPARSE](http://suitesparse.com) [mix of LGPL2+ and GPL2+; see individual module licenses]
+
+Julia's build process uses the following external tools:
+
+- [PATCHELF](https://nixos.org/patchelf.html)
+- [OBJCONV](https://www.agner.org/optimize/#objconv)
+
+Julia bundles the following external programs and libraries:
+
+- [7-Zip](https://www.7-zip.org/license.txt)
+- [ZLIB](https://zlib.net/zlib_license.html)
+
+On some platforms, distributions of Julia contain SSL certificate authority certificates,
+released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).

From 0824c1bca3f7b1e770178086f4991896cfcc291b Mon Sep 17 00:00:00 2001
From: Martin Holters <martin.holters@hsu-hh.de>
Date: Wed, 1 Sep 2021 19:43:37 +0200
Subject: [PATCH 51/65] Avoid impossible unionall normalization (#42003)

If the unionall bounds are inconsistent with the wrapper's bound, avoid
throwing due to an impossible type instantiation.

(cherry picked from commit b5b0684ec0a66c1bd50e75c25c724c20526bb702)
---
 src/jltypes.c | 11 +++++++++--
 test/core.jl  |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/jltypes.c b/src/jltypes.c
index c4f5a1aff88bd..43171ee332e87 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -1326,8 +1326,15 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
             u = (jl_unionall_t*)t;
         }
 
-        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var))
-            t = jl_instantiate_unionall(u, u->var->ub);
+        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            JL_TRY {
+                t = jl_instantiate_unionall(u, u->var->ub);
+            }
+            JL_CATCH {
+                // just skip normalization
+                // (may happen for bounds inconsistent with the wrapper's bounds)
+            }
+        }
     }
     JL_GC_POP();
     return t;
diff --git a/test/core.jl b/test/core.jl
index 56ddfb42e10f1..74edc7cddf7f4 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -7584,3 +7584,6 @@ let S = Tuple{Tuple{Tuple{K, UInt128} where K<:Tuple{Int64}, Int64}},
     @test pointer_from_objref(T) === pointer_from_objref(S)
     @test isbitstype(T)
 end
+
+# avoid impossible normalization (don't try to form Tuple{Complex{String}} here)
+@test Tuple{Complex{T} where String<:T<:String} == Tuple{Complex{T} where String<:T<:String}

From a8e6b33ff1341b917676bd8dcad6c7b101b69f10 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 1 Sep 2021 13:53:16 -0400
Subject: [PATCH 52/65] [AllocOpt] fix iterator invalidation (#42059)

We might previously accidentally visit this use after deletion, if the
orig_inst ended up back in the workqueue.

Fixes #41916

(cherry picked from commit d8a8db23a5208480fe7cd56b413c9743ea58db83)
---
 src/llvm-alloc-opt.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index ec7060bd10a5e..18b54b117c323 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -1149,6 +1149,7 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
 {
     auto tag = orig_inst->getArgOperand(2);
     // `julia.typeof` is only legal on the original pointer, no need to scan recursively
+    size_t last_deleted = removed.size();
     for (auto user: orig_inst->users()) {
         if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
@@ -1161,6 +1162,8 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             }
         }
     }
+    while (last_deleted < removed.size())
+        removed[last_deleted++]->replaceUsesOfWith(orig_inst, UndefValue::get(orig_inst->getType()));
 }
 
 void Optimizer::splitOnStack(CallInst *orig_inst)

From 35f675d7736dcc58e687d042bb7c0f34c57c8bf6 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 1 Sep 2021 13:53:28 -0400
Subject: [PATCH 53/65] GMP: fix warning in init (#42062)

(cherry picked from commit 4598966f8b3fabac1865a8fd1b5f69b4ac1e246a)
---
 base/gmp.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/base/gmp.jl b/base/gmp.jl
index 3e80c2bca8bfa..06409dd0cfab1 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -94,10 +94,10 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 function __init__()
     try
         if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
-            msg = bits_per_limb() != BITS_PER_LIMB ? error : warn
-            msg("The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))\n",
-                "does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).\n",
-                "Please rebuild Julia.")
+            msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
+                     does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
+                     Please rebuild Julia."""
+            bits_per_limb() != BITS_PER_LIMB ? @error(msg) : @warn(msg)
         end
 
         ccall((:__gmp_set_memory_functions, :libgmp), Cvoid,

From 1f34cd34df327cce13127dcc602184a14efe5e38 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 2 Sep 2021 14:20:59 -0400
Subject: [PATCH 54/65] avoid loading duplicate libraries (#42058)

We will not use the duplicate, so best to try to avoid loading it.

(cherry picked from commit c53669f664c22d45cdd0dcc375f1b18cf7379ad2)
---
 cli/loader_lib.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 94cec50ae7e6a..d921055f08221 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -31,12 +31,27 @@ void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char *
 
 /* Wrapper around dlopen(), with extra relative pathing thrown in*/
 static void * load_library(const char * rel_path, const char * src_dir) {
+    void * handle = NULL;
+
+    // See if a handle is already open to the basename
+    const char *basename = rel_path + strlen(rel_path);
+    while (basename-- > rel_path)
+        if (*basename == PATHSEPSTRING[0] || *basename == '/')
+            break;
+    basename++;
+#if defined(_OS_WINDOWS_)
+    if ((handle = GetModuleHandleW(basename)))
+        return handle;
+#else
+    if ((handle = dlopen(basename, RTLD_NOLOAD | RTLD_NOW | RTLD_GLOBAL)))
+        return handle;
+#endif
+
     char path[2*PATH_MAX + 1] = {0};
     strncat(path, src_dir, sizeof(path) - 1);
     strncat(path, PATHSEPSTRING, sizeof(path) - 1);
     strncat(path, rel_path, sizeof(path) - 1);
 
-    void * handle = NULL;
 #if defined(_OS_WINDOWS_)
     wchar_t wpath[2*PATH_MAX + 1] = {0};
     if (!utf8_to_wchar(path, wpath, 2*PATH_MAX)) {

From a3a91829227efd438e30e83215bdf7a04bc5869d Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Fri, 3 Sep 2021 06:41:29 +0900
Subject: [PATCH 55/65] inference: fix #42090, make sure not to wrap
 `Conditional` in `PartialStruct` (#42091)

(cherry picked from commit 03e7b23033aa8d05233893ebdb73c3f081cf0ffe)
---
 base/compiler/abstractinterpretation.jl |  2 +-
 test/compiler/inference.jl              | 33 +++++++++++++++++++++----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index bb6702dede1db..47758f70a8715 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1476,7 +1476,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
             anyconst = false
             allconst = true
             for i = 2:length(e.args)
-                at = abstract_eval_value(interp, e.args[i], vtypes, sv)
+                at = widenconditional(abstract_eval_value(interp, e.args[i], vtypes, sv))
                 if !anyconst
                     anyconst = has_nontrivial_const_info(at)
                 end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 567df71a7b16a..aa33f4a112aac 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -1828,16 +1828,39 @@ end
         return c, d # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 
-    # shouldn't use the old constraint when the subject of condition has changed
+    # should invalidate old constraint when the subject of condition has changed
     @test Base.return_types((Union{Nothing,Int},)) do a
-        b = a === nothing
-        c = b ? 0 : a # c::Int
+        cond = a === nothing
+        r1 = cond ? 0 : a # r1::Int
         a = 0
-        d = b ? a : 1 # d::Int, not d::Union{Nothing,Int}
-        return c, d # ::Tuple{Int,Int}
+        r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
+        return r1, r2 # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 end
 
+# https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
+# `PartialStruct` shoudln't wrap `Conditional`
+let M = Module()
+    @eval M begin
+        struct BePartialStruct
+            val::Int
+            cond
+        end
+    end
+
+    rt = @eval M begin
+        Base.return_types((Union{Nothing,Int},)) do a
+            cond = a === nothing
+            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
+            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+            a = $(gensym(:anyvar))::Any
+            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constrait invalidation here)
+            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
+        end |> only
+    end
+    @test rt == Tuple{Union{Nothing,Int},Any}
+end
+
 @testset "conditional constraint propagation from non-`Conditional` object" begin
     @test Base.return_types((Bool,)) do b
         if b

From 0e0a974bb66d865ba1822757a82640a9a2d131b2 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Fri, 3 Sep 2021 05:18:26 -0400
Subject: [PATCH 56/65] InteractiveUtils: recursive correctly in varinfo, et
 al. (#42061)

* InteractiveUtils: recursive correctly in varinfo, et al.

Fixes #42045

(cherry picked from commit a163e374a4b8c66978ac6ff7652742f1adee1a51)
---
 .../InteractiveUtils/src/InteractiveUtils.jl  | 115 ++++++++----------
 1 file changed, 48 insertions(+), 67 deletions(-)

diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 6f8ba9ea0b080..b116216acc80c 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -34,40 +34,40 @@ The memory consumption estimate is an approximate lower bound on the size of the
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
 """
 function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false)
-    @assert sortby in [:name, :size, :summary] "Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"
-    function _populate_rows(m2::Module, allrows, include_self::Bool, prep::String)
-        newrows = Any[
-            let
-                value = getfield(m2, v)
-                ssize_str, ssize = if value===Base || value===Main || value===Core
+    sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
+    rows = Vector{Any}[]
+    workqueue = [(m, ""),]
+    while !isempty(workqueue)
+        m2, prep = popfirst!(workqueue)
+        for v in names(m2; all, imported)
+            if !isdefined(m2, v) || !occursin(pattern, string(v))
+                continue
+            end
+            value = getfield(m2, v)
+            isbuiltin = value === Base || value === Main || value === Core
+            if recursive && !isbuiltin && isa(value, Module) && value !== m2 && nameof(value) === v && parentmodule(value) === m2
+                push!(workqueue, (value, "$prep$v."))
+            end
+            ssize_str, ssize = if isbuiltin
                     ("", typemax(Int))
                 else
                     ss = summarysize(value)
                     (format_bytes(ss), ss)
                 end
-                Any[string(prep, v), ssize_str, summary(value), ssize]
-            end
-            for v in names(m2; all, imported)
-            if (string(v) != split(string(m2), ".")[end] || include_self) && isdefined(m2, v) && occursin(pattern, string(v)) ]
-        append!(allrows, newrows)
-        if recursive
-            for row in newrows
-                if row[3] == "Module" && !in(split(row[1], ".")[end], [split(string(m2), ".")[end], "Base", "Main", "Core"])
-                    _populate_rows(getfield(m2, Symbol(split(row[1], ".")[end])), allrows, false, prep * "$(row[1]).")
-                end
-            end
+            push!(rows, Any[string(prep, v), ssize_str, summary(value), ssize])
         end
-        return allrows
     end
-    rows = _populate_rows(m, Vector{Any}[], true, "")
-    if sortby == :name
-        col, reverse = 1, false
-    elseif sortby == :size
-        col, reverse = 4, true
-    elseif sortby == :summary
-        col, reverse = 3, false
+    let (col, rev) = if sortby == :name
+            1, false
+        elseif sortby == :size
+            4, true
+        elseif sortby == :summary
+            3, false
+        else
+            @assert "unreachable"
+        end
+        sort!(rows; by=r->r[col], rev)
     end
-    rows = sort!(rows, by=r->r[col], rev=reverse)
     pushfirst!(rows, Any["name", "size", "summary"])
 
     return Markdown.MD(Any[Markdown.Table(map(r->r[1:3], rows), Symbol[:l, :r, :l])])
@@ -208,54 +208,35 @@ function methodswith(t::Type; supertypes::Bool=false)
 end
 
 # subtypes
-function _subtypes(m::Module, x::Type, sts=Base.IdSet{Any}(), visited=Base.IdSet{Module}())
-    push!(visited, m)
+function _subtypes_in!(mods::Array, x::Type)
     xt = unwrap_unionall(x)
-    if !isa(xt, DataType)
-        return sts
+    if !isabstracttype(x) || !isa(xt, DataType)
+        # Fast path
+        return Type[]
     end
-    xt = xt::DataType
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if isa(t, DataType)
-                t = t::DataType
-                if t.name.name === s && supertype(t).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
-                end
-            elseif isa(t, UnionAll)
-                t = t::UnionAll
-                tt = unwrap_unionall(t)
-                isa(tt, DataType) || continue
-                tt = tt::DataType
-                if tt.name.name === s && supertype(tt).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
+    sts = Vector{Any}()
+    while !isempty(mods)
+        m = pop!(mods)
+        xt = xt::DataType
+        for s in names(m, all = true)
+            if isdefined(m, s) && !isdeprecated(m, s)
+                t = getfield(m, s)
+                dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
+                if isa(dt, DataType)
+                    if dt.name.name === s && dt.name.module == m && supertype(dt).name == xt.name
+                        ti = typeintersect(t, x)
+                        ti != Bottom && push!(sts, ti)
+                    end
+                elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m && t !== m
+                    t === Base || push!(mods, t) # exclude Base, since it also parented by Main
                 end
-            elseif isa(t, Module)
-                t = t::Module
-                in(t, visited) || _subtypes(t, x, sts, visited)
             end
         end
     end
-    return sts
-end
-
-function _subtypes_in(mods::Array, x::Type)
-    if !isabstracttype(x)
-        # Fast path
-        return Type[]
-    end
-    sts = Base.IdSet{Any}()
-    visited = Base.IdSet{Module}()
-    for m in mods
-        _subtypes(m, x, sts, visited)
-    end
-    return sort!(collect(sts), by=string)
+    return permute!(sts, sortperm(map(string, sts)))
 end
 
-subtypes(m::Module, x::Type) = _subtypes_in([m], x)
+subtypes(m::Module, x::Type) = _subtypes_in!([m], x)
 
 """
     subtypes(T::DataType)
@@ -274,7 +255,7 @@ julia> subtypes(Integer)
  Unsigned
 ```
 """
-subtypes(x::Type) = _subtypes_in(Base.loaded_modules_array(), x)
+subtypes(x::Type) = _subtypes_in!(Base.loaded_modules_array(), x)
 
 """
     supertypes(T::Type)

From f2ecc321e10c0dc68de3ab25c2ffbe422c8f42dc Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Fri, 13 Aug 2021 12:48:01 -0400
Subject: [PATCH 57/65] threads: fix semantic error in old Threads.Atomic

(cherry picked from commit aa421fff9e6f3efceea8d7cacc223b5ef9f89239)
---
 base/atomics.jl        | 12 ++++++------
 src/llvm-alloc-opt.cpp | 31 +++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/base/atomics.jl b/base/atomics.jl
index 97405d88fd408..e6d62c3fc807b 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -356,13 +356,13 @@ for typ in atomictypes
     rt = "$lt, $lt*"
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
@@ -371,7 +371,7 @@ for typ in atomictypes
     # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
@@ -380,7 +380,7 @@ for typ in atomictypes
                      unsafe_convert(Ptr{$typ}, x), cmp, new)
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
@@ -403,7 +403,7 @@ for typ in atomictypes
         if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
@@ -411,7 +411,7 @@ for typ in atomictypes
         else
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index 18b54b117c323..f7130f6904479 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -628,6 +628,21 @@ void Optimizer::checkInst(Instruction *I)
                 use_info.hasunknownmem = true;
             return true;
         }
+        if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
+            // Only store value count
+            if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                use_info.escaped = true;
+                return false;
+            }
+            use_info.hasload = true;
+            auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            if (cur.offset == UINT32_MAX || !use_info.addMemOp(inst, use->getOperandNo(),
+                                                               cur.offset, storev->getType(),
+                                                               true, *pass.DL))
+                use_info.hasunknownmem = true;
+            use_info.refload = true;
+            return true;
+        }
         if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
             push_inst(inst);
             return true;
@@ -1331,6 +1346,22 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             store->eraseFromParent();
             return;
         }
+        else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            auto slot_idx = find_slot(offset);
+            auto &slot = slots[slot_idx];
+            assert(slot.offset <= offset && slot.offset + slot.size >= offset);
+            IRBuilder<> builder(user);
+            Value *newptr;
+            if (slot.isref) {
+                assert(slot.offset == offset);
+                newptr = slot.slot;
+            }
+            else {
+                Value *Val = isa<AtomicCmpXchgInst>(user) ? cast<AtomicCmpXchgInst>(user)->getNewValOperand() : cast<AtomicRMWInst>(user)->getValOperand();
+                newptr = slot_gep(slot, offset, Val->getType(), builder);
+            }
+            *use = newptr;
+        }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             assert(callee); // makes it clear for clang analyser that `callee` is not NULL

From dc181942cd3d4d86085b674ca1257c28ca6fe294 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 12 Aug 2021 15:53:10 -0400
Subject: [PATCH 58/65] atomics: optimize atomic modify operations (mostly)

Lacking inlining, but now expressing the direct invoke:
this gets us within about 2x of a primitive atomicrmw add.

(cherry picked from commit 85518c8ab9e79e4659be615206f0928fc8f521d4)
---
 base/compiler/abstractinterpretation.jl |  23 ++-
 base/compiler/optimize.jl               |   2 +-
 base/compiler/ssair/inlining.jl         |  16 ++
 base/compiler/ssair/ir.jl               |   3 +-
 base/compiler/tfuncs.jl                 |  32 ++-
 base/compiler/validation.jl             |   8 +-
 src/ast.c                               |   2 +
 src/cgutils.cpp                         |  49 +++--
 src/codegen.cpp                         | 261 ++++++++++++++----------
 src/dump.c                              |   2 +-
 src/interpreter.c                       |   3 +
 src/intrinsics.cpp                      |  10 +-
 src/julia_internal.h                    |   1 +
 13 files changed, 266 insertions(+), 146 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 47758f70a8715..2573e38c21a60 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1193,6 +1193,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_apply(interp, argtypes, sv, max_methods)
         elseif f === invoke
             return abstract_invoke(interp, argtypes, sv)
+        elseif f === modifyfield!
+            return abstract_modifyfield!(interp, argtypes, sv)
         end
         return CallMeta(abstract_call_builtin(interp, f, fargs, argtypes, sv, max_methods), false)
     elseif f === Core.kwfunc
@@ -1458,7 +1460,8 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
         return abstract_eval_special_value(interp, e, vtypes, sv)
     end
     e = e::Expr
-    if e.head === :call
+    ehead = e.head
+    if ehead === :call
         ea = e.args
         argtypes = collect_argtypes(interp, ea, vtypes, sv)
         if argtypes === nothing
@@ -1468,7 +1471,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
             sv.stmt_info[sv.currpc] = callinfo.info
             t = callinfo.rt
         end
-    elseif e.head === :new
+    elseif ehead === :new
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
         if isconcretetype(t) && !ismutabletype(t)
             args = Vector{Any}(undef, length(e.args)-1)
@@ -1505,7 +1508,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 end
             end
         end
-    elseif e.head === :splatnew
+    elseif ehead === :splatnew
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
         if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
@@ -1518,7 +1521,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = PartialStruct(t, at.fields)
             end
         end
-    elseif e.head === :new_opaque_closure
+    elseif ehead === :new_opaque_closure
         t = Union{}
         if length(e.args) >= 5
             ea = e.args
@@ -1537,7 +1540,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 end
             end
         end
-    elseif e.head === :foreigncall
+    elseif ehead === :foreigncall
         abstract_eval_value(interp, e.args[1], vtypes, sv)
         t = sp_type_rewrap(e.args[2], sv.linfo, true)
         for i = 3:length(e.args)
@@ -1545,21 +1548,21 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Bottom
             end
         end
-    elseif e.head === :cfunction
+    elseif ehead === :cfunction
         t = e.args[1]
         isa(t, Type) || (t = Any)
         abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif e.head === :method
+    elseif ehead === :method
         t = (length(e.args) == 1) ? Any : Nothing
-    elseif e.head === :copyast
+    elseif ehead === :copyast
         t = abstract_eval_value(interp, e.args[1], vtypes, sv)
         if t isa Const && t.val isa Expr
             # `copyast` makes copies of Exprs
             t = Expr
         end
-    elseif e.head === :invoke
+    elseif ehead === :invoke || ehead === :invoke_modify
         error("type inference data-flow error: tried to double infer a function")
-    elseif e.head === :isdefined
+    elseif ehead === :isdefined
         sym = e.args[1]
         t = Bool
         if isa(sym, SlotNumber)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 6d059247a43ea..ad0426860ece9 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -502,7 +502,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
             return 0
         end
         return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke
+    elseif head === :foreigncall || head === :invoke || head == :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 78edef88439e9..05ed9511b23d8 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -1148,6 +1148,22 @@ function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sta
         ir.stmts[idx][:inst] = res
         return nothing
     end
+    if (sig.f === modifyfield! || sig.ft ⊑ typeof(modifyfield!)) && 5 <= length(stmt.args) <= 6
+        let info = ir.stmts[idx][:info]
+            info isa MethodResultPure && (info = info.info)
+            info isa ConstCallInfo && (info = info.call)
+            info isa MethodMatchInfo || return nothing
+            length(info.results) == 1 || return nothing
+            match = info.results[1]::MethodMatch
+            match.fully_covers || return nothing
+            case = compileable_specialization(state.et, match)
+            case === nothing && return nothing
+            stmt.head = :invoke_modify
+            pushfirst!(stmt.args, case)
+            ir.stmts[idx][:inst] = stmt
+        end
+        return nothing
+    end
 
     check_effect_free!(ir, stmt, calltype, idx)
 
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 50483ffa79465..9d4657fcf20f4 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -403,7 +403,8 @@ function getindex(x::UseRef)
 end
 
 function is_relevant_expr(e::Expr)
-    return e.head in (:call, :invoke, :new, :splatnew, :(=), :(&),
+    return e.head in (:call, :invoke, :invoke_modify,
+                      :new, :splatnew, :(=), :(&),
                       :gc_preserve_begin, :gc_preserve_end,
                       :foreigncall, :isdefined, :copyast,
                       :undefcheck, :throw_undef_if_not,
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index e270555426b42..511af138883f4 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -938,10 +938,40 @@ function modifyfield!_tfunc(o, f, op, v)
     @nospecialize
     T = _fieldtype_tfunc(o, isconcretetype(o), f)
     T === Bottom && return Bottom
-    # note: we could sometimes refine this to a PartialStruct if we analyzed `op(o.f, v)::T`
     PT = Const(Pair)
     return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
 end
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+    nargs = length(argtypes)
+    if !isempty(argtypes) && isvarargtype(argtypes[nargs])
+        nargs - 1 <= 6 || return CallMeta(Bottom, false)
+        nargs > 3 || return CallMeta(Any, false)
+    else
+        5 <= nargs <= 6 || return CallMeta(Bottom, false)
+    end
+    o = unwrapva(argtypes[2])
+    f = unwrapva(argtypes[3])
+    RT = modifyfield!_tfunc(o, f, Any, Any)
+    info = false
+    if nargs >= 5 && RT !== Bottom
+        # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
+        # as well as compute the info for the method matches
+        op = unwrapva(argtypes[4])
+        v = unwrapva(argtypes[5])
+        TF = getfield_tfunc(o, f)
+        push!(sv.ssavalue_uses[sv.currpc], sv.currpc) # temporarily disable `call_result_unused` check for this call
+        callinfo = abstract_call(interp, nothing, Any[op, TF, v], sv, #=max_methods=# 1)
+        pop!(sv.ssavalue_uses[sv.currpc], sv.currpc)
+        TF2 = tmeet(callinfo.rt, widenconst(TF))
+        if TF2 === Bottom
+            RT = Bottom
+        elseif isconcretetype(RT) && has_nontrivial_const_info(TF2) # isconcrete condition required to form a PartialStruct
+            RT = PartialStruct(RT, Any[TF, TF2])
+        end
+        info = callinfo.info
+    end
+    return CallMeta(RT, info)
+end
 replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
 replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
 function replacefield!_tfunc(o, f, x, v)
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index f6b89f8f5cd04..6e0f81114744b 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -4,6 +4,7 @@
 const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
     :call => 1:typemax(Int),
     :invoke => 2:typemax(Int),
+    :invoke_modify => 3:typemax(Int),
     :static_parameter => 1:1,
     :(&) => 1:1,
     :(=) => 2:2,
@@ -76,7 +77,7 @@ end
 
 function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
     if isa(x, Expr)
-        if x.head === :call || x.head === :invoke
+        if x.head === :call || x.head === :invoke || x.head === :invoke_modify
             f = x.args[1]
             if f isa GlobalRef && (f.name === :cglobal) && x.head === :call
                 # TODO: these are not yet linearized
@@ -136,7 +137,8 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
                 end
                 validate_val!(lhs)
                 validate_val!(rhs)
-            elseif head === :call || head === :invoke || head === :gc_preserve_end || head === :meta ||
+            elseif head === :call || head === :invoke || x.head === :invoke_modify ||
+                head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
                 head === :const || head === :enter || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
@@ -235,7 +237,7 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
         return true
     end
     return false
diff --git a/src/ast.c b/src/ast.c
index 2a0661e629564..de2492db08c94 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -28,6 +28,7 @@ extern "C" {
 
 // head symbols for each expression type
 jl_sym_t *call_sym;    jl_sym_t *invoke_sym;
+jl_sym_t *invoke_modify_sym;
 jl_sym_t *empty_sym;   jl_sym_t *top_sym;
 jl_sym_t *module_sym;  jl_sym_t *slot_sym;
 jl_sym_t *export_sym;  jl_sym_t *import_sym;
@@ -345,6 +346,7 @@ void jl_init_common_symbols(void)
     empty_sym = jl_symbol("");
     call_sym = jl_symbol("call");
     invoke_sym = jl_symbol("invoke");
+    invoke_modify_sym = jl_symbol("invoke_modify");
     foreigncall_sym = jl_symbol("foreigncall");
     cfunction_sym = jl_symbol("cfunction");
     quote_sym = jl_symbol("quote");
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 4b1f842effe22..aec9b59c98bda 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -1547,17 +1547,23 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
         bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        bool maybe_null_if_boxed, const std::string &fname)
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
 {
     auto newval = [&](const jl_cgval_t &lhs) {
-        jl_cgval_t argv[3] = { cmp, lhs, rhs };
-        Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
-        argv[0] = mark_julia_type(ctx, callval, true, jl_any_type);
-        if (!jl_subtype(argv[0].typ, jltype)) {
-            emit_typecheck(ctx, argv[0], jltype, fname + "typed_store");
-            argv[0] = update_julia_type(ctx, argv[0], jltype);
-        }
-        return argv[0];
+        const jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        jl_cgval_t ret;
+        if (modifyop) {
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+        }
+        else {
+            Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+            ret = mark_julia_type(ctx, callval, true, jl_any_type);
+        }
+        if (!jl_subtype(ret.typ, jltype)) {
+            emit_typecheck(ctx, ret, jltype, fname + "typed_store");
+            ret = update_julia_type(ctx, ret, jltype);
+        }
+        return ret;
     };
     assert(!needlock || parent != nullptr);
     Type *elty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jltype);
@@ -1570,7 +1576,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         else if (isreplacefield) {
             Value *Success = emit_f_is(ctx, cmp, ghostValue(jltype));
             Success = ctx.builder.CreateZExt(Success, T_int8);
-            jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
+            const jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
             return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
@@ -1579,7 +1585,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         else { // modifyfield
             jl_cgval_t oldval = ghostValue(jltype);
-            jl_cgval_t argv[2] = { oldval, newval(oldval) };
+            const jl_cgval_t argv[2] = { oldval, newval(oldval) };
             jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
             return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
@@ -1862,7 +1868,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
     }
     if (ismodifyfield) {
-        jl_cgval_t argv[2] = { oldval, rhs };
+        const jl_cgval_t argv[2] = { oldval, rhs };
         jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
         oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
     }
@@ -1881,7 +1887,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
             Success = ctx.builder.CreateZExt(Success, T_int8);
-            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
             oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
         }
@@ -3269,7 +3275,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_cgval_t rhs, jl_cgval_t cmp,
         bool checked, bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
         bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        const std::string &fname)
+        const jl_cgval_t *modifyop, const std::string &fname)
 {
     if (!sty->name->mutabl && checked) {
         std::string msg = fname + "immutable struct of type "
@@ -3309,9 +3315,14 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
             if (ismodifyfield) {
                 if (needlock)
                     emit_lockstate_value(ctx, strct, false);
-                jl_cgval_t argv[3] = { cmp, oldval, rhs };
-                Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
-                rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+                if (modifyop) {
+                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+                }
+                else {
+                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                }
                 if (!jl_subtype(rhs.typ, jfty)) {
                     emit_typecheck(ctx, rhs, jfty, fname);
                     rhs = update_julia_type(ctx, rhs, jfty);
@@ -3364,7 +3375,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
             wb ? maybe_bitcast(ctx, data_pointer(ctx, strct), T_pjlvalue) : nullptr,
             isboxed, Order, FailOrder, align,
-            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, fname);
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
     }
 }
 
@@ -3543,7 +3554,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             else
                 need_wb = false;
             emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
-            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, "");
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
         }
         return strctinfo;
     }
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 5c4218a82222e..8db4a7503b116 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -1161,12 +1161,13 @@ static Value *get_current_ptls(jl_codectx_t &ctx);
 static Value *get_current_signal_page(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
 static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
@@ -2660,6 +2661,102 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     return emit_box_compare(ctx, arg1, arg2, nullcheck1, nullcheck2);
 }
 
+static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    bool issetfield = f == jl_builtin_setfield;
+    bool isreplacefield = f == jl_builtin_replacefield;
+    bool isswapfield = f == jl_builtin_swapfield;
+    bool ismodifyfield = f == jl_builtin_modifyfield;
+    const jl_cgval_t undefval;
+    const jl_cgval_t &obj = argv[1];
+    const jl_cgval_t &fld = argv[2];
+    jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
+    const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacefield && nargs == 6) {
+        const jl_cgval_t &ord = argv[6];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+
+    jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+    if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
+        ssize_t idx = -1;
+        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
+            idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
+        }
+        else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
+            ssize_t i = jl_unbox_long(fld.constant);
+            if (i > 0 && i <= jl_datatype_nfields(uty))
+                idx = i - 1;
+        }
+        if (idx != -1) {
+            jl_value_t *ft = jl_svecref(uty->types, idx);
+            if (!jl_has_free_typevars(ft)) {
+                if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
+                    emit_typecheck(ctx, val, ft, fname);
+                    val = update_julia_type(ctx, val, ft);
+                }
+                // TODO: attempt better codegen for approximate types
+                bool isboxed = jl_field_isptr(uty, idx);
+                bool isatomic = jl_field_isatomic(uty, idx);
+                bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
+                if (isatomic == (order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            issetfield ?
+                            (isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                      : "setfield!: non-atomic field cannot be written atomically") :
+                            isreplacefield ?
+                            (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                      : "replacefield!: non-atomic field cannot be written atomically") :
+                            isswapfield ?
+                            (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                      : "swapfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
+                        (needlock || order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(order),
+                        (needlock || fail_order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(fail_order),
+                        needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                        modifyop, fname);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     emit_function(
         jl_method_instance_t *lam,
@@ -2989,6 +3086,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                     false,
                                     false,
                                     false,
+                                    nullptr,
                                     "");
                     }
                 }
@@ -3132,97 +3230,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
              (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
-             (true && f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
-        bool issetfield = f == jl_builtin_setfield;
-        bool isreplacefield = f == jl_builtin_replacefield;
-        bool isswapfield = f == jl_builtin_swapfield;
-        bool ismodifyfield = f == jl_builtin_modifyfield;
-        const jl_cgval_t undefval;
-        const jl_cgval_t &obj = argv[1];
-        const jl_cgval_t &fld = argv[2];
-        jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
-        const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
-        enum jl_memory_order order = jl_memory_order_notatomic;
-        const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
-        if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
-            const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
-            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
-            if (!ord.constant)
-                return false;
-            order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
-        }
-        enum jl_memory_order fail_order = order;
-        if (isreplacefield && nargs == 6) {
-            const jl_cgval_t &ord = argv[6];
-            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
-            if (!ord.constant)
-                return false;
-            fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
-        }
-        if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
-            emit_atomic_error(ctx, "invalid atomic ordering");
-            *ret = jl_cgval_t(); // unreachable
-            return true;
-        }
-
-        jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
-            ssize_t idx = -1;
-            if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-                idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
-            }
-            else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
-                ssize_t i = jl_unbox_long(fld.constant);
-                if (i > 0 && i <= jl_datatype_nfields(uty))
-                    idx = i - 1;
-            }
-            if (idx != -1) {
-                jl_value_t *ft = jl_svecref(uty->types, idx);
-                if (!jl_has_free_typevars(ft)) {
-                    if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
-                        emit_typecheck(ctx, val, ft, fname);
-                        val = update_julia_type(ctx, val, ft);
-                    }
-                    // TODO: attempt better codegen for approximate types
-                    bool isboxed = jl_field_isptr(uty, idx);
-                    bool isatomic = jl_field_isatomic(uty, idx);
-                    bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
-                    if (isatomic == (order == jl_memory_order_notatomic)) {
-                        emit_atomic_error(ctx,
-                                issetfield ?
-                                (isatomic ? "setfield!: atomic field cannot be written non-atomically"
-                                          : "setfield!: non-atomic field cannot be written atomically") :
-                                isreplacefield ?
-                                (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
-                                          : "replacefield!: non-atomic field cannot be written atomically") :
-                                isswapfield ?
-                                (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
-                                          : "swapfield!: non-atomic field cannot be written atomically") :
-                                (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
-                                          : "modifyfield!: non-atomic field cannot be written atomically"));
-                        *ret = jl_cgval_t();
-                        return true;
-                    }
-                    if (isatomic == (fail_order == jl_memory_order_notatomic)) {
-                        emit_atomic_error(ctx,
-                                (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
-                                          : "replacefield!: non-atomic field cannot be accessed atomically"));
-                        *ret = jl_cgval_t();
-                        return true;
-                    }
-                    *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
-                            (needlock || order <= jl_memory_order_notatomic)
-                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                            : get_llvm_atomic_order(order),
-                            (needlock || fail_order <= jl_memory_order_notatomic)
-                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                            : get_llvm_atomic_order(fail_order),
-                            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
-                            fname);
-                    return true;
-                }
-            }
-        }
+             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+        return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
     else if (f == jl_builtin_nfields && nargs == 1) {
@@ -3449,7 +3458,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     // emit arguments
     SmallVector<Value*, 3> theArgs;
@@ -3473,14 +3482,14 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
 }
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, cc);
 }
 
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     // emit specialized call site
     bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
@@ -3560,7 +3569,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     jl_cgval_t retval;
     switch (returninfo.cc) {
         case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, inferred_retty);
+            retval = mark_julia_type(ctx, call, true, jlretty);
             break;
         case jl_returninfo_t::Register:
             retval = mark_julia_type(ctx, call, false, jlretty);
@@ -3590,20 +3599,18 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             break;
     }
     // see if inference has a different / better type for the call than the lambda
-    if (inferred_retty != retval.typ)
-        retval = update_julia_type(ctx, retval, inferred_retty);
-    return retval;
+    return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
     auto theFptr = cast<Function>(
         jl_Module->getOrInsertFunction(specFunctionObject, jl_func_sig).getCallee());
     add_return_attr(theFptr, Attribute::NonNull);
     theFptr->addFnAttr(Thunk);
     Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, JLCALL_F_CC);
-    return mark_julia_type(ctx, ret, true, inferred_retty);
+    return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
@@ -3620,7 +3627,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
+    return emit_invoke(ctx, lival, argv, nargs, rt);
+}
 
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+{
     bool handled = false;
     jl_cgval_t result;
     if (lival.constant) {
@@ -3632,7 +3643,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == jl_func_sig) {
-                result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
                 handled = true;
             }
             else if (ft != jl_func_sig_sparams) {
@@ -3674,7 +3685,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (specsig)
                         result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
@@ -3693,6 +3704,40 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     return result;
 }
 
+static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
+{
+    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t arglen = jl_array_dim0(ex->args);
+    size_t nargs = arglen - 1;
+    assert(arglen >= 2);
+    jl_cgval_t lival = emit_expr(ctx, args[0]);
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argv[i] = emit_expr(ctx, args[i + 1]);
+        if (argv[i].typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
+    const jl_cgval_t &f = argv[0];
+    jl_cgval_t ret;
+    if (f.constant && f.constant == jl_builtin_modifyfield) {
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+            return ret;
+        auto it = builtin_func_map.find(&jl_f_modifyfield);
+        assert(it != builtin_func_map.end());
+        Value *oldnew = emit_jlcall(ctx, it->second, V_rnull, &argv[1], nargs - 1, JLCALL_F_CC);
+        return mark_julia_type(ctx, oldnew, true, rt);
+    }
+    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+    }
+
+    // emit function and arguments
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, JLCALL_F_CC);
+    return mark_julia_type(ctx, callval, true, rt);
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
@@ -4539,6 +4584,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
         return emit_invoke(ctx, ex, expr_t);
     }
+    else if (head == invoke_modify_sym) {
+        assert(ssaval >= 0);
+        jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
+            jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
+        return emit_invoke_modify(ctx, ex, expr_t);
+    }
     else if (head == call_sym) {
         jl_value_t *expr_t;
         if (ssaval < 0)
diff --git a/src/dump.c b/src/dump.c
index 49fa6efa431cd..f7a0ced4a6ab6 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -2719,7 +2719,7 @@ void jl_init_serializer(void)
     htable_new(&backref_table, 0);
 
     void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
-                     call_sym, invoke_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
+                     call_sym, invoke_sym, invoke_modify_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
                      jl_an_empty_string, jl_an_empty_vec_any,
 
                      // empirical list of very common symbols
diff --git a/src/interpreter.c b/src/interpreter.c
index e169f9f829a63..2bfa6482aa9f7 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -217,6 +217,9 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == invoke_sym) {
         return do_invoke(args, nargs, s);
     }
+    else if (head == invoke_modify_sym) {
+        return do_call(args + 1, nargs - 1, s);
+    }
     else if (head == isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 7883542c74a13..1847fc5c60e37 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -684,7 +684,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         if (!type_is_ghost(ptrty)) {
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
             typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, tbaa_data, nullptr, nullptr, isboxed,
-                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, "");
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
         }
     }
     return e;
@@ -779,7 +779,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 // e[i] <= x (swap)
 // e[i] y => x (replace)
 // x(e[i], y) (modify)
-static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
 {
     bool issetfield = f == atomic_pointerset;
     bool isreplacefield = f == atomic_pointerreplace;
@@ -817,7 +817,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
         bool isboxed = true;
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, "atomic_pointermodify");
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -851,7 +851,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         assert(!isboxed);
         Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
-                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, "atomic_pointermodify");
+                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
         if (issetfield)
             ret = e;
         return ret;
@@ -1093,7 +1093,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     case atomic_pointerswap:
     case atomic_pointermodify:
     case atomic_pointerreplace:
-        return emit_atomic_pointerop(ctx, f, argv, nargs);
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
         return generic_bitcast(ctx, argv);
     case trunc_int:
diff --git a/src/julia_internal.h b/src/julia_internal.h
index a4e81453581f9..673d4459bef03 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -1326,6 +1326,7 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
 int isabspath(const char *in) JL_NOTSAFEPOINT;
 
 extern jl_sym_t *call_sym;    extern jl_sym_t *invoke_sym;
+extern jl_sym_t *invoke_modify_sym;
 extern jl_sym_t *empty_sym;   extern jl_sym_t *top_sym;
 extern jl_sym_t *module_sym;  extern jl_sym_t *slot_sym;
 extern jl_sym_t *export_sym;  extern jl_sym_t *import_sym;

From bfb05d5bbb9bb1d4321099cdfc82f125edb4c042 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Thu, 2 Sep 2021 21:21:33 -0400
Subject: [PATCH 59/65] inference: propagate variable changes to all exception
 frames (#42081)

* inference: propagate variable changes to all exception frames

Fix #42022

* Update test/compiler/inference.jl

* Update test/compiler/inference.jl

Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>

* fixup! inference: propagate variable changes to all exception frames

Co-authored-by: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
(cherry picked from commit e83b3177bfcae6ad837896cc001a954ac16db4d9)
---
 base/compiler/abstractinterpretation.jl |  42 +++++-----
 base/compiler/inferencestate.jl         | 102 +++++++++++++++++++++---
 test/compiler/inference.jl              |  45 +++++++++++
 3 files changed, 156 insertions(+), 33 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 2573e38c21a60..aa83cc1a7aac2 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1706,18 +1706,16 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     slottypes = frame.slottypes
     while frame.pc´´ <= n
         # make progress on the active ip set
-        local pc::Int = frame.pc´´ # current program-counter
+        local pc::Int = frame.pc´´
         while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
             #print(pc,": ",s[pc],"\n")
             local pc´::Int = pc + 1 # next program-counter (after executing instruction)
             if pc == frame.pc´´
-                # need to update pc´´ to point at the new lowest instruction in W
-                min_pc = _bits_findnext(W.bits, pc + 1)
-                frame.pc´´ = min_pc == -1 ? n + 1 : min_pc
+                # want to update pc´´ to point at the new lowest instruction in W
+                frame.pc´´ = pc´
             end
             delete!(W, pc)
             frame.currpc = pc
-            frame.cur_hand = frame.handler_at[pc]
             edges = frame.stmt_edges[pc]
             edges === nothing || empty!(edges)
             frame.stmt_info[pc] = nothing
@@ -1759,7 +1757,6 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     pc´ = l
                 else
                     # general case
-                    frame.handler_at[l] = frame.cur_hand
                     changes_else = changes
                     if isa(condt, Conditional)
                         changes_else = conditional_changes(changes_else, condt.elsetype, condt.var)
@@ -1818,7 +1815,6 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 end
             elseif hd === :enter
                 l = stmt.args[1]::Int
-                frame.cur_hand = Pair{Any,Any}(l, frame.cur_hand)
                 # propagate type info to exception handler
                 old = states[l]
                 newstate_catch = stupdate!(old, changes)
@@ -1830,11 +1826,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     states[l] = newstate_catch
                 end
                 typeassert(states[l], VarTable)
-                frame.handler_at[l] = frame.cur_hand
             elseif hd === :leave
-                for i = 1:((stmt.args[1])::Int)
-                    frame.cur_hand = (frame.cur_hand::Pair{Any,Any}).second
-                end
             else
                 if hd === :(=)
                     t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
@@ -1864,16 +1856,22 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         frame.src.ssavaluetypes[pc] = t
                     end
                 end
-                if frame.cur_hand !== nothing && isa(changes, StateUpdate)
-                    # propagate new type info to exception handler
-                    # the handling for Expr(:enter) propagates all changes from before the try/catch
-                    # so this only needs to propagate any changes
-                    l = frame.cur_hand.first::Int
-                    if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
-                        if l < frame.pc´´
-                            frame.pc´´ = l
+                if isa(changes, StateUpdate)
+                    let cur_hand = frame.handler_at[pc], l, enter
+                        while cur_hand != 0
+                            enter = frame.src.code[cur_hand]
+                            l = (enter::Expr).args[1]::Int
+                            # propagate new type info to exception handler
+                            # the handling for Expr(:enter) propagates all changes from before the try/catch
+                            # so this only needs to propagate any changes
+                            if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
+                                if l < frame.pc´´
+                                    frame.pc´´ = l
+                                end
+                                push!(W, l)
+                            end
+                            cur_hand = frame.handler_at[cur_hand]
                         end
-                        push!(W, l)
                     end
                 end
             end
@@ -1886,7 +1884,6 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
             end
 
             pc´ > n && break # can't proceed with the fast-path fall-through
-            frame.handler_at[pc´] = frame.cur_hand
             newstate = stupdate!(states[pc´], changes)
             if isa(stmt, GotoNode) && frame.pc´´ < pc´
                 # if we are processing a goto node anyways,
@@ -1897,7 +1894,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     states[pc´] = newstate
                 end
                 push!(W, pc´)
-                pc = frame.pc´´
+                break
             elseif newstate !== nothing
                 states[pc´] = newstate
                 pc = pc´
@@ -1907,6 +1904,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 break
             end
         end
+        frame.pc´´ = _bits_findnext(W.bits, frame.pc´´)::Int # next program-counter
     end
     frame.dont_work_on_me = false
     nothing
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index cb5d2009a9171..aa9a3ad1f0094 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -28,9 +28,7 @@ mutable struct InferenceState
     pc´´::LineNum
     nstmts::Int
     # current exception handler info
-    cur_hand #::Union{Nothing, Pair{LineNum, prev_handler}}
-    handler_at::Vector{Any}
-    n_handlers::Int
+    handler_at::Vector{LineNum}
     # ssavalue sparsity and restart info
     ssavalue_uses::Vector{BitSet}
     throw_blocks::BitSet
@@ -87,12 +85,9 @@ mutable struct InferenceState
         throw_blocks = find_throw_blocks(code)
 
         # exception handlers
-        cur_hand = nothing
-        handler_at = Any[ nothing for i=1:n ]
-        n_handlers = 0
-
-        W = BitSet()
-        push!(W, 1) #initial pc to visit
+        ip = BitSet()
+        handler_at = compute_trycatch(src.code, ip)
+        push!(ip, 1)
 
         if !toplevel
             meth = linfo.def
@@ -103,14 +98,14 @@ mutable struct InferenceState
 
         valid_worlds = WorldRange(src.min_world,
             src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
+
         frame = new(
             InferenceParams(interp), result, linfo,
             sp, slottypes, inmodule, 0,
             IdSet{InferenceState}(), IdSet{InferenceState}(),
             src, get_world_counter(interp), valid_worlds,
             nargs, s_types, s_edges, stmt_info,
-            Union{}, W, 1, n,
-            cur_hand, handler_at, n_handlers,
+            Union{}, ip, 1, n, handler_at,
             ssavalue_uses, throw_blocks,
             Vector{Tuple{InferenceState,LineNum}}(), # cycle_backedges
             Vector{InferenceState}(), # callers_in_cycle
@@ -124,6 +119,91 @@ mutable struct InferenceState
     end
 end
 
+function compute_trycatch(code::Vector{Any}, ip::BitSet)
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave 1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all trys by walking backwards from :enter statements,
+    # and all catches by looking at the statement after the :enter
+    n = length(code)
+    empty!(ip)
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_at = fill(0, n)
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isexpr(stmt, :enter)
+            l = stmt.args[1]::Int
+            handler_at[pc + 1] = pc
+            push!(ip, pc + 1)
+            handler_at[l] = pc
+            push!(ip, l)
+        end
+    end
+
+    # now forward those marks to all :leave statements
+    pc´´ = 0
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, pc´´)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            if pc == pc´´
+                pc´´ = pc´
+            end
+            delete!(ip, pc)
+            cur_hand = handler_at[pc]
+            @assert cur_hand != 0 "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                if handler_at[l] != cur_hand
+                    @assert handler_at[l] == 0 "unbalanced try/catch"
+                    handler_at[l] = cur_hand
+                    if l < pc´´
+                        pc´´ = l
+                    end
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) "unbalanced try/catch"
+                break
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :enter
+                    cur_hand = pc
+                elseif head === :leave
+                    l = stmt.args[1]::Int
+                    for i = 1:l
+                        cur_hand = handler_at[cur_hand]
+                    end
+                    cur_hand == 0 && break
+                end
+            end
+
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_hand
+                @assert handler_at[pc´] == 0 "unbalanced try/catch"
+                handler_at[pc´] = cur_hand
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_at
+end
+
+
 """
     Iterate through all callers of the given InferenceState in the abstract
     interpretation stack (including the given InferenceState itself), vising
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index aa33f4a112aac..a2432169b09ad 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -3454,3 +3454,48 @@ end
 f41908(x::Complex{T}) where {String<:T<:String} = 1
 g41908() = f41908(Any[1][1])
 @test only(Base.return_types(g41908, ())) <: Int
+
+# issue #42022
+let x = Tuple{Int,Any}[
+        #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
+        #= 2=# (0, Expr(:enter, 18))
+        #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
+        #= 4=# (2, Expr(:enter, 12))
+        #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
+        #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
+        #= 7=# (4, Expr(:leave, 2))
+        #= 8=# (0, Core.ReturnNode(1))
+        #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
+        #=10=# (4, Expr(:leave, 1))
+        #=11=# (2, Core.GotoNode(16))
+        #=12=# (4, Expr(:leave, 1))
+        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=16=# (2, Expr(:leave, 1))
+        #=17=# (0, Core.GotoNode(22))
+        #=18=# (2, Expr(:leave, 1))
+        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=20=# (0, nothing)
+        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+    ]
+    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    @test handler_at == first.(x)
+end
+
+@test only(Base.return_types((Bool,)) do y
+        x = 1
+        try
+            x = 2.0
+            try
+                x = '3'
+                y ? (return 1) : throw()
+            catch ex1
+                rethrow()
+            end
+        catch ex2
+            nothing
+        end
+        return x
+    end) === Union{Int, Float64, Char}

From a5bed8bfe5f4828423519721e1d089b0e3a167c6 Mon Sep 17 00:00:00 2001
From: Ian Butterworth <i.r.butterworth@gmail.com>
Date: Mon, 6 Sep 2021 03:45:27 -0400
Subject: [PATCH 60/65] filter out duplicate modules in auto-install search,
 fixes #42133 (#42134)

(cherry picked from commit f2d03be48d4f6748a2d9ce87cacbb7b9ab6151ed)
---
 stdlib/REPL/src/REPL.jl  | 2 +-
 stdlib/REPL/test/repl.jl | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 3e2770ab8f4c7..289b927c44212 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -196,7 +196,7 @@ function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
         arg isa Expr && modules_to_be_loaded(arg, mods)
     end
     filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
-    return mods
+    return unique(mods)
 end
 
 """
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 6724eb5e13ac3..3fbf6d8825bba 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -1332,6 +1332,8 @@ end
         @test mods == [:Foo, :Bar]
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo, Bar"))
         @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo.bar, Foo.baz"))
+        @test mods == [:Foo]
 
         mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo end"))
         @test mods == [:Foo]

From c85012ae9d38cc24af5700e4b8f222559ae0536c Mon Sep 17 00:00:00 2001
From: Dilum Aluthge <dilum@aluthge.com>
Date: Mon, 6 Sep 2021 19:28:17 -0400
Subject: [PATCH 61/65] Fix the `cmdlineargs` tests on Buildkite (#42118)

(cherry picked from commit e1669b678ff648e5d90b70f1b995f6e03f12739a)
---
 test/cmdlineargs.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index c5e82b681a3c4..fb206acf03477 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -599,7 +599,7 @@ end
 
 
 # test error handling code paths of running --sysimage
-let exename = Base.julia_cmd()
+let exename = `$(Base.julia_cmd().exec[1]) -t 1`
     sysname = unsafe_string(Base.JLOptions().image_file)
     for nonexist_image in (
             joinpath(@__DIR__, "nonexistent"),

From 12320104c078a79ec309485825719feab67beb35 Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Fri, 23 Jul 2021 02:22:49 +0900
Subject: [PATCH 62/65] compiler: general refactor (#41633)

Separated from compiler-plugin prototyping.

cherry-picked from 799136d6b016b73f2a88f3db3dc102fb3ca602b9
---
 base/compiler/abstractinterpretation.jl | 192 ++++++++++++++----------
 base/compiler/inferenceresult.jl        |   2 +-
 base/compiler/optimize.jl               |  25 +--
 base/compiler/ssair/inlining.jl         |  32 ++--
 base/compiler/stmtinfo.jl               |   2 +-
 base/compiler/typeinfer.jl              |  13 +-
 test/compiler/inference.jl              |   2 -
 7 files changed, 149 insertions(+), 119 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index aa83cc1a7aac2..ee96bf96418c6 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -35,73 +35,15 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         add_remark!(interp, sv, "Skipped call in throw block")
         return CallMeta(Any, false)
     end
-    valid_worlds = WorldRange()
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    splitunions = 1 < unionsplitcost(argtypes) <= InferenceParams(interp).MAX_UNION_SPLITTING
-    mts = Core.MethodTable[]
-    fullmatch = Bool[]
-    if splitunions
-        split_argtypes = switchtupleunion(argtypes)
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        infos = MethodMatchInfo[]
-        for arg_n in split_argtypes
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            if mt === nothing
-                add_remark!(interp, sv, "Could not identify method table for call")
-                return CallMeta(Any, false)
-            end
-            mt = mt::Core.MethodTable
-            matches = findall(sig_n, method_table(interp); limit=max_methods)
-            if matches === missing
-                add_remark!(interp, sv, "For one of the union split cases, too many methods matched")
-                return CallMeta(Any, false)
-            end
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatch[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatch, thisfullmatch)
-            end
-        end
-        info = UnionSplitInfo(infos)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, false)
-        end
-        mt = mt::Core.MethodTable
-        matches = findall(atype, method_table(interp, sv); limit=max_methods)
-        if matches === missing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            add_remark!(interp, sv, "Too many methods matched")
-            return CallMeta(Any, false)
-        end
-        push!(mts, mt)
-        push!(fullmatch, _any(match->(match::MethodMatch).fully_covers, matches))
-        info = MethodMatchInfo(matches)
-        applicable = matches.matches
-        valid_worlds = matches.valid_worlds
-        applicable_argtypes = nothing
+
+    matches = find_matching_methods(argtypes, atype, method_table(interp, sv), InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        return CallMeta(Any, false)
     end
+
+    (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    applicable = applicable::Array{Any,1}
     napplicable = length(applicable)
     rettype = Bottom
     edges = MethodInstance[]
@@ -142,7 +84,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 if edge !== nothing
                     push!(edges, edge)
                 end
-                this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 const_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
                 if const_rt !== rt && const_rt ⊑ rt
                     rt = const_rt
@@ -164,7 +106,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             end
             # try constant propagation with argtypes for this match
             # this is in preparation for inlining, or improving the return result
-            this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
+            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
             const_this_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
             if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
                 this_rt = const_this_rt
@@ -272,7 +214,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # and avoid keeping track of a more complex result type.
         rettype = Any
     end
-    add_call_backedges!(interp, rettype, edges, fullmatch, mts, atype, sv)
+    add_call_backedges!(interp, rettype, edges, matches, atype, sv)
     if !isempty(sv.pclimitations) # remove self, if present
         delete!(sv.pclimitations, sv)
         for caller in sv.callers_in_cycle
@@ -283,24 +225,110 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     return CallMeta(rettype, info)
 end
 
-function add_call_backedges!(interp::AbstractInterpreter,
-                             @nospecialize(rettype),
-                             edges::Vector{MethodInstance},
-                             fullmatch::Vector{Bool}, mts::Vector{Core.MethodTable}, @nospecialize(atype),
-                             sv::InferenceState)
-    if rettype === Any
-        # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine
-        # (widen) this type
-        return
+struct FailedMethodMatch
+    reason::String
+end
+
+struct MethodMatches
+    applicable::Vector{Any}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+    mt::Core.MethodTable
+    fullmatch::Bool
+end
+
+struct UnionSplitMethodMatches
+    applicable::Vector{Any}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+    mts::Vector{Core.MethodTable}
+    fullmatches::Vector{Bool}
+end
+
+function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+                               union_split::Int, max_methods::Int)
+    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+    if 1 < unionsplitcost(argtypes) <= union_split
+        split_argtypes = switchtupleunion(argtypes)
+        infos = MethodMatchInfo[]
+        applicable = Any[]
+        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+        valid_worlds = WorldRange()
+        mts = Core.MethodTable[]
+        fullmatches = Bool[]
+        for i in 1:length(split_argtypes)
+            arg_n = split_argtypes[i]::Vector{Any}
+            sig_n = argtypes_to_type(arg_n)
+            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
+            mt = mt::Core.MethodTable
+            matches = findall(sig_n, method_table; limit = max_methods)
+            if matches === missing
+                return FailedMethodMatch("For one of the union split cases, too many methods matched")
+            end
+            push!(infos, MethodMatchInfo(matches))
+            for m in matches
+                push!(applicable, m)
+                push!(applicable_argtypes, arg_n)
+            end
+            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
+            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+            found = false
+            for (i, mt′) in enumerate(mts)
+                if mt′ === mt
+                    fullmatches[i] &= thisfullmatch
+                    found = true
+                    break
+                end
+            end
+            if !found
+                push!(mts, mt)
+                push!(fullmatches, thisfullmatch)
+            end
+        end
+        return UnionSplitMethodMatches(applicable,
+                                       applicable_argtypes,
+                                       UnionSplitInfo(infos),
+                                       valid_worlds,
+                                       mts,
+                                       fullmatches)
+    else
+        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+        if mt === nothing
+            return FailedMethodMatch("Could not identify method table for call")
+        end
+        mt = mt::Core.MethodTable
+        matches = findall(atype, method_table; limit = max_methods)
+        if matches === missing
+            # this means too many methods matched
+            # (assume this will always be true, so we don't compute / update valid age in this case)
+            return FailedMethodMatch("Too many methods matched")
+        end
+        fullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+        return MethodMatches(matches.matches,
+                             MethodMatchInfo(matches),
+                             matches.valid_worlds,
+                             mt,
+                             fullmatch)
     end
+end
+
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), edges::Vector{MethodInstance},
+                             matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+                             sv::InferenceState)
+    # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine (widen) this type
+    rettype === Any && return
     for edge in edges
         add_backedge!(edge, sv)
     end
-    for (thisfullmatch, mt) in zip(fullmatch, mts)
-        if !thisfullmatch
-            # also need an edge to the method table in case something gets
-            # added that did not intersect with any existing method
-            add_mt_backedge!(mt, atype, sv)
+    # also need an edge to the method table in case something gets
+    # added that did not intersect with any existing method
+    if isa(matches, MethodMatches)
+        matches.fullmatch || add_mt_backedge!(matches.mt, atype, sv)
+    else
+        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+            thisfullmatch || add_mt_backedge!(mt, atype, sv)
         end
     end
 end
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 327ab85d104f3..026b5286979cb 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -13,7 +13,7 @@ end
 # for the provided `linfo` and `given_argtypes`. The purpose of this function is
 # to return a valid value for `cache_lookup(linfo, argtypes, cache).argtypes`,
 # so that we can construct cache-correct `InferenceResult`s in the first place.
-function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override)
+function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override::Bool)
     @assert isa(linfo.def, Method) # ensure the next line works
     nargs::Int = linfo.def.nargs
     @assert length(given_argtypes) >= (nargs - 1)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index ad0426860ece9..b8ec9610e0739 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -196,10 +196,11 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
     return true
 end
 
-# Convert IRCode back to CodeInfo and compute inlining cost and sideeffects
+# compute inlining cost and sideeffects
 function finish(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, ir::IRCode, @nospecialize(result))
-    def = opt.linfo.def
-    nargs = Int(opt.nargs) - 1
+    (; src, nargs, linfo) = opt
+    (; def, specTypes) = linfo
+    nargs = Int(nargs) - 1
 
     force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] === :noinline, ir.meta)
 
@@ -221,7 +222,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
                 end
             end
             if proven_pure
-                for fl in opt.src.slotflags
+                for fl in src.slotflags
                     if (fl & SLOT_USEDUNDEF) != 0
                         proven_pure = false
                         break
@@ -230,7 +231,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             end
         end
         if proven_pure
-            opt.src.pure = true
+            src.pure = true
         end
 
         if proven_pure
@@ -243,7 +244,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             if !(isa(result, Const) && !is_inlineable_constant(result.val))
                 opt.const_api = true
             end
-            force_noinline || (opt.src.inlineable = true)
+            force_noinline || (src.inlineable = true)
         end
     end
 
@@ -252,7 +253,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
     # determine and cache inlineability
     union_penalties = false
     if !force_noinline
-        sig = unwrap_unionall(opt.linfo.specTypes)
+        sig = unwrap_unionall(specTypes)
         if isa(sig, DataType) && sig.name === Tuple.name
             for P in sig.parameters
                 P = unwrap_unionall(P)
@@ -264,25 +265,25 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
         else
             force_noinline = true
         end
-        if !opt.src.inlineable && result === Union{}
+        if !src.inlineable && result === Union{}
             force_noinline = true
         end
     end
     if force_noinline
-        opt.src.inlineable = false
+        src.inlineable = false
     elseif isa(def, Method)
-        if opt.src.inlineable && isdispatchtuple(opt.linfo.specTypes)
+        if src.inlineable && isdispatchtuple(specTypes)
             # obey @inline declaration if a dispatch barrier would not help
         else
             bonus = 0
             if result ⊑ Tuple && !isconcretetype(widenconst(result))
                 bonus = params.inline_tupleret_bonus
             end
-            if opt.src.inlineable
+            if src.inlineable
                 # For functions declared @inline, increase the cost threshold 20x
                 bonus += params.inline_cost_threshold*19
             end
-            opt.src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
+            src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
         end
     end
 
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 05ed9511b23d8..077a1f105d3d8 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -313,8 +313,10 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         push!(linetable, LineInfoNode(entry.module, entry.method, entry.file, entry.line,
             (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at)))
     end
-    nargs_def = item.mi.def.nargs
-    isva = nargs_def > 0 && item.mi.def.isva
+    (; def, sparam_vals) = item.mi
+    nargs_def = def.nargs::Int32
+    isva = nargs_def > 0 && def.isva
+    sig = def.sig
     if isva
         vararg = mk_tuplecall!(compact, argexprs[nargs_def:end], compact.result[idx][:line])
         argexprs = Any[argexprs[1:(nargs_def - 1)]..., vararg]
@@ -347,7 +349,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
                 isa(stmt′.val, SSAValue) && (compact.used_ssas[stmt′.val.id] += 1)
                 return_value = SSAValue(idx′)
@@ -374,7 +376,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -709,9 +711,8 @@ function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::Meth
     return mi
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, result::InferenceResult)
-    mi = specialize_method(result.linfo.def::Method, result.linfo.specTypes,
-        result.linfo.sparam_vals, false, true)
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo)::InferenceResult)
+    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals, false, true)
     mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
     return mi
 end
@@ -1065,9 +1066,9 @@ function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result):
     pushfirst!(atypes, atype0)
 
     if isa(result, InferenceResult)
-        item = InliningTodo(result, atypes, calltype)
-        validate_sparams(item.mi.sparam_vals) || return nothing
-        if argtypes_to_type(atypes) <: item.mi.def.sig
+        (; mi) = item = InliningTodo(result, atypes, calltype)
+        validate_sparams(mi.sparam_vals) || return nothing
+        if argtypes_to_type(atypes) <: mi.def.sig
             state.mi_cache !== nothing && (item = resolve_todo(item, state))
             handle_single_case!(ir, stmt, idx, item, true, todo)
             return nothing
@@ -1195,7 +1196,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
     for i in 1:length(infos)
         info = infos[i]
         meth = info.results
-        if meth === missing || meth.ambig
+        if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
             too_many = true
@@ -1213,8 +1214,9 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
             only_method = false
         end
         for match in meth
-            signature_union = Union{signature_union, match.spec_types}
-            if !isdispatchtuple(match.spec_types)
+            spec_types = match.spec_types
+            signature_union = Union{signature_union, spec_types}
+            if !isdispatchtuple(spec_types)
                 fully_covered = false
                 continue
             end
@@ -1222,10 +1224,10 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
             if case === nothing
                 fully_covered = false
                 continue
-            elseif _any(p->p[1] === match.spec_types, cases)
+            elseif _any(p->p[1] === spec_types, cases)
                 continue
             end
-            push!(cases, Pair{Any,Any}(match.spec_types, case))
+            push!(cases, Pair{Any,Any}(spec_types, case))
         end
     end
 
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index a6ffee299c4f5..0c54e9359fa1a 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -9,7 +9,7 @@ to re-consult the method table. This info is illegal on any statement that is
 not a call to a generic function.
 """
 struct MethodMatchInfo
-    results::Union{Missing, MethodLookupResult}
+    results::MethodLookupResult
 end
 
 """
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 25a07fbb5ee7d..b0c0efa80d004 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# build (and start inferring) the inference frame for the linfo
+# build (and start inferring) the inference frame for the top-level MethodInstance
 function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::Bool)
     frame = InferenceState(result, cached, interp)
     frame === nothing && return false
@@ -386,17 +386,18 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    already_inferred = already_inferred_quick_test(interp, result.linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), result.linfo)
+    linfo = result.linfo
+    already_inferred = already_inferred_quick_test(interp, linfo)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        inferred_result = transform_result_for_cache(interp, result.linfo, valid_worlds, result.src)
-        code_cache(interp)[result.linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src)
+        code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
     end
-    unlock_mi_inference(interp, result.linfo)
+    unlock_mi_inference(interp, linfo)
     nothing
 end
 
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index a2432169b09ad..acd4f5af33de0 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -2238,12 +2238,10 @@ code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
 ssachangemap = fill(0, length(code28279))
 labelchangemap = fill(0, length(code28279))
-worklist = Int[]
 let i
     for i in 1:length(code28279)
         stmt = code28279[i]
         if isa(stmt, GotoIfNot)
-            push!(worklist, i)
             ssachangemap[i] = 1
             if i < length(code28279)
                 labelchangemap[i + 1] = 1

From 78dc2243eba69b397e41119c1104d5d64b050b51 Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Fri, 30 Jul 2021 04:04:33 +0900
Subject: [PATCH 63/65] more type-stable type-inference (#41697)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(this PR is the final output of my demo at [our workshop](https://github.com/aviatesk/juliacon2021-workshop-pkgdev))

This PR eliminated much of runtime dispatches within our type inference
routine, that are reported by the following JET analysis:
```julia
using JETTest

const CC = Core.Compiler

function function_filter(@nospecialize(ft))
    ft === typeof(CC.isprimitivetype) && return false
    ft === typeof(CC.ismutabletype) && return false
    ft === typeof(CC.isbitstype) && return false
    ft === typeof(CC.widenconst) && return false
    ft === typeof(CC.widenconditional) && return false
    ft === typeof(CC.widenwrappedconditional) && return false
    ft === typeof(CC.maybe_extract_const_bool) && return false
    ft === typeof(CC.ignorelimited) && return false
    return true
end

function frame_filter((; linfo) = sv)
    meth = linfo.def
    isa(meth, Method) || return true
    return occursin("compiler/", string(meth.file))
end

report_dispatch(CC.typeinf, (CC.NativeInterpreter, CC.InferenceState); function_filter, frame_filter)
```

> on master
```
═════ 137 possible errors found ═════
...
```
> on this PR
```
═════ 51 possible errors found ═════
...
```

And it seems like this PR makes JIT slightly faster:
> on master
```julia
~/julia/julia master
❯ ./usr/bin/julia -e '@time using Plots; @time plot(rand(10,3));'
  3.659865 seconds (7.19 M allocations: 497.982 MiB, 3.94% gc time, 0.39% compilation time)
  2.696410 seconds (3.62 M allocations: 202.905 MiB, 7.49% gc time, 56.39% compilation time)
```
> on this PR
```julia
~/julia/julia avi/jetdemo* 7s
❯ ./usr/bin/julia -e '@time using Plots; @time plot(rand(10,3));'
  3.396974 seconds (7.16 M allocations: 491.442 MiB, 4.80% gc time, 0.28% compilation time)
  2.591130 seconds (3.48 M allocations: 196.026 MiB, 7.29% gc time, 56.72% compilation time)
```

cherry-picked from 795935fd3a2d97b2f948cfb82a18da48743b622d
---
 base/compiler/abstractinterpretation.jl | 108 ++++++++++++++----------
 base/compiler/inferencestate.jl         |   2 +-
 base/compiler/ssair/legacy.jl           |   2 +-
 base/compiler/ssair/passes.jl           |   2 +-
 base/compiler/ssair/slot2ssa.jl         |   2 +-
 base/compiler/tfuncs.jl                 |   4 +-
 base/compiler/typeinfer.jl              |  30 +++----
 base/compiler/typelattice.jl            |   4 +-
 base/compiler/validation.jl             |   9 +-
 9 files changed, 90 insertions(+), 73 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index ee96bf96418c6..f0d59694f4128 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -85,9 +85,12 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                     push!(edges, edge)
                 end
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-                const_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
-                if const_rt !== rt && const_rt ⊑ rt
-                    rt = const_rt
+                const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
+                if const_result !== nothing
+                    const_rt, const_result = const_result
+                    if const_rt !== rt && const_rt ⊑ rt
+                        rt = const_rt
+                    end
                 end
                 push!(const_results, const_result)
                 if const_result !== nothing
@@ -107,9 +110,12 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             # try constant propagation with argtypes for this match
             # this is in preparation for inlining, or improving the return result
             this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            const_this_rt, const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
-            if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
-                this_rt = const_this_rt
+            const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
+            if const_result !== nothing
+                const_this_rt, const_result = const_result
+                if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
+                    this_rt = const_this_rt
+                end
             end
             push!(const_results, const_result)
             if const_result !== nothing
@@ -520,33 +526,35 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul
                                               @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch,
                                               sv::InferenceState, va_override::Bool)
     mi = maybe_get_const_prop_profitable(interp, result, f, argtypes, match, sv)
-    mi === nothing && return Any, nothing
+    mi === nothing && return nothing
     # try constant prop'
     inf_cache = get_inference_cache(interp)
     inf_result = cache_lookup(mi, argtypes, inf_cache)
     if inf_result === nothing
         # if there might be a cycle, check to make sure we don't end up
         # calling ourselves here.
-        if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
-                # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
-                # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
-                # propagate different constant elements if the recursion is finite over the lattice
-                return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
-                        any(infstate.result.overridden_by_const)
+        let result = result # prevent capturing
+            if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
+                    # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
+                    # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
+                    # propagate different constant elements if the recursion is finite over the lattice
+                    return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
+                            any(infstate.result.overridden_by_const)
+                end
+                add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+                return nothing
             end
-            add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-            return Any, nothing
         end
         inf_result = InferenceResult(mi, argtypes, va_override)
         frame = InferenceState(inf_result, #=cache=#false, interp)
-        frame === nothing && return Any, nothing # this is probably a bad generated function (unsound), but just ignore it
+        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
         frame.parent = sv
         push!(inf_cache, inf_result)
-        typeinf(interp, frame) || return Any, nothing
+        typeinf(interp, frame) || return nothing
     end
     result = inf_result.result
     # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return Any, nothing
+    isa(result, InferenceState) && return nothing
     add_backedge!(mi, sv)
     return result, inf_result
 end
@@ -1178,7 +1186,8 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     nargtype === Bottom && return CallMeta(Bottom, false)
     nargtype isa DataType || return CallMeta(Any, false) # other cases are not implemented below
     isdispatchelem(ft) || return CallMeta(Any, false) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)
+    ft = ft::DataType
+    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     result = findsup(types, method_table(interp))
@@ -1200,12 +1209,14 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     #     t, a = ti.parameters[i], argtypes′[i]
     #     argtypes′[i] = t ⊑ a ? t : a
     # end
-    const_rt, const_result = abstract_call_method_with_const_args(interp, result, argtype_to_function(ft′), argtypes′, match, sv, false)
-    if const_rt !== rt && const_rt ⊑ rt
-        return CallMeta(collect_limitations!(const_rt, sv), InvokeCallInfo(match, const_result))
-    else
-        return CallMeta(collect_limitations!(rt, sv), InvokeCallInfo(match, nothing))
+    const_result = abstract_call_method_with_const_args(interp, result, argtype_to_function(ft′), argtypes′, match, sv, false)
+    if const_result !== nothing
+        const_rt, const_result = const_result
+        if const_rt !== rt && const_rt ⊑ rt
+            return CallMeta(collect_limitations!(const_rt, sv), InvokeCallInfo(match, const_result))
+        end
     end
+    return CallMeta(collect_limitations!(rt, sv), InvokeCallInfo(match, nothing))
 end
 
 # call where the function is known exactly
@@ -1307,19 +1318,20 @@ end
 function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::PartialOpaque, argtypes::Vector{Any}, sv::InferenceState)
     pushfirst!(argtypes, closure.env)
     sig = argtypes_to_type(argtypes)
-    (; rt, edge) = result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, sv)
+    (; rt, edge) = result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, sv)
     edge !== nothing && add_backedge!(edge, sv)
     tt = closure.typ
-    sigT = unwrap_unionall(tt).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source::Method, sig <: rewrap_unionall(sigT, tt))
+    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
+    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
     info = OpaqueClosureCallInfo(match)
     if !result.edgecycle
-        const_rettype, const_result = abstract_call_method_with_const_args(interp, result, closure, argtypes,
+        const_result = abstract_call_method_with_const_args(interp, result, closure, argtypes,
             match, sv, closure.isva)
-        if const_rettype ⊑ rt
-           rt = const_rettype
-        end
         if const_result !== nothing
+            const_rettype, const_result = const_result
+            if const_rettype ⊑ rt
+               rt = const_rettype
+            end
             info = ConstCallInfo(info, Union{Nothing,InferenceResult}[const_result])
         end
     end
@@ -1329,7 +1341,7 @@ end
 function most_general_argtypes(closure::PartialOpaque)
     ret = Any[]
     cc = widenconst(closure)
-    argt = unwrap_unionall(cc).parameters[1]
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
     if !isa(argt, DataType) || argt.name !== typename(Tuple)
         argt = Tuple
     end
@@ -1344,8 +1356,8 @@ function abstract_call(interp::AbstractInterpreter, fargs::Union{Nothing,Vector{
     f = argtype_to_function(ft)
     if isa(ft, PartialOpaque)
         return abstract_call_opaque_closure(interp, ft, argtypes[2:end], sv)
-    elseif isa(unwrap_unionall(ft), DataType) && unwrap_unionall(ft).name === typename(Core.OpaqueClosure)
-        return CallMeta(rewrap_unionall(unwrap_unionall(ft).parameters[2], ft), false)
+    elseif (uft = unwrap_unionall(ft); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
+        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], ft), false)
     elseif f === nothing
         # non-constant function, but the number of arguments is known
         # and the ft is not a Builtin or IntrinsicFunction
@@ -1541,12 +1553,12 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
         if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val) &&
-                let t = t; _all(i->getfield(at.val, i) isa fieldtype(t, i), 1:n); end
+            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                let t = t; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields) &&
-                let t = t, at = at; _all(i->at.fields[i] ⊑ fieldtype(t, i), 1:n); end
-                t = PartialStruct(t, at.fields)
+            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) &&
+                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end
+                t = PartialStruct(t, at.fields::Vector{Any})
             end
         end
     elseif ehead === :new_opaque_closure
@@ -1594,7 +1606,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
         sym = e.args[1]
         t = Bool
         if isa(sym, SlotNumber)
-            vtyp = vtypes[slot_id(sym)]
+            vtyp = vtypes[slot_id(sym)]::VarState
             if vtyp.typ === Bottom
                 t = Const(false) # never assigned previously
             elseif !vtyp.undef
@@ -1609,7 +1621,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Const(true)
             end
         elseif isa(sym, Expr) && sym.head === :static_parameter
-            n = sym.args[1]
+            n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
                 spty = sv.sptypes[n]
                 if isa(spty, Const)
@@ -1644,7 +1656,7 @@ function abstract_eval_global(M::Module, s::Symbol)
 end
 
 function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
-    typ = src.ssavaluetypes[s.id]
+    typ = (src.ssavaluetypes::Vector{Any})[s.id]
     if typ === NOT_FOUND
         return Bottom
     end
@@ -1732,6 +1744,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     isva = isa(def, Method) && def.isva
     nslots = nargs - isva
     slottypes = frame.slottypes
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
     while frame.pc´´ <= n
         # make progress on the active ip set
         local pc::Int = frame.pc´´
@@ -1832,7 +1845,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     for (caller, caller_pc) in frame.cycle_backedges
                         # notify backedges of updated type information
                         typeassert(caller.stmt_types[caller_pc], VarTable) # we must have visited this statement before
-                        if !(caller.src.ssavaluetypes[caller_pc] === Any)
+                        if !((caller.src.ssavaluetypes::Vector{Any})[caller_pc] === Any)
                             # no reason to revisit if that call-site doesn't affect the final result
                             if caller_pc < caller.pc´´
                                 caller.pc´´ = caller_pc
@@ -1842,6 +1855,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     end
                 end
             elseif hd === :enter
+                stmt = stmt::Expr
                 l = stmt.args[1]::Int
                 # propagate type info to exception handler
                 old = states[l]
@@ -1857,16 +1871,18 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
             elseif hd === :leave
             else
                 if hd === :(=)
+                    stmt = stmt::Expr
                     t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
                     if t === Bottom
                         break
                     end
-                    frame.src.ssavaluetypes[pc] = t
+                    ssavaluetypes[pc] = t
                     lhs = stmt.args[1]
                     if isa(lhs, SlotNumber)
                         changes = StateUpdate(lhs, VarState(t, false), changes, false)
                     end
                 elseif hd === :method
+                    stmt = stmt::Expr
                     fname = stmt.args[1]
                     if isa(fname, SlotNumber)
                         changes = StateUpdate(fname, VarState(Any, false), changes, false)
@@ -1881,7 +1897,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     if !isempty(frame.ssavalue_uses[pc])
                         record_ssa_assign(pc, t, frame)
                     else
-                        frame.src.ssavaluetypes[pc] = t
+                        ssavaluetypes[pc] = t
                     end
                 end
                 if isa(changes, StateUpdate)
@@ -1908,7 +1924,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
 
             if t === nothing
                 # mark other reached expressions as `Any` to indicate they don't throw
-                frame.src.ssavaluetypes[pc] = Any
+                ssavaluetypes[pc] = Any
             end
 
             pc´ > n && break # can't proceed with the fast-path fall-through
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index aa9a3ad1f0094..f13622edb23fe 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -265,7 +265,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
             while temp isa UnionAll
                 temp = temp.body
             end
-            sigtypes = temp.parameters
+            sigtypes = (temp::DataType).parameters
             for j = 1:length(sigtypes)
                 tj = sigtypes[j]
                 if isType(tj) && tj.parameters[1] === Pi
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 49d9aef973e29..e9fddd1d12a02 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -47,7 +47,7 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
     for metanode in ir.meta
         push!(ci.code, metanode)
         push!(ci.codelocs, 1)
-        push!(ci.ssavaluetypes, Any)
+        push!(ci.ssavaluetypes::Vector{Any}, Any)
         push!(ci.ssaflags, 0x00)
     end
     # Translate BB Edges to statement edges
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index ce20a61a4e983..7c8964d371122 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1064,7 +1064,7 @@ function type_lift_pass!(ir::IRCode)
                                         if haskey(processed, id)
                                             val = processed[id]
                                         else
-                                            push!(worklist, (id, up_id, new_phi, i))
+                                            push!(worklist, (id, up_id, new_phi::SSAValue, i))
                                             continue
                                         end
                                     else
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 21c0bf00ec755..91543835c8c06 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -871,7 +871,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         changed = false
         for new_idx in type_refine_phi
             node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst], ci, ir, ir.sptypes, slottypes)
+            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes)
             if !(node[:type] ⊑ new_typ) || !(new_typ ⊑ node[:type])
                 node[:type] = new_typ
                 changed = true
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 511af138883f4..ca03710bbbd47 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -1627,7 +1627,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         if length(argtypes) - 1 == tf[2]
             argtypes = argtypes[1:end-1]
         else
-            vatype = argtypes[end]
+            vatype = argtypes[end]::Core.TypeofVararg
             argtypes = argtypes[1:end-1]
             while length(argtypes) < tf[1]
                 push!(argtypes, unwrapva(vatype))
@@ -1733,7 +1733,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
             aft = argtypes[2]
             if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
                    (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : tt.parameters[1]
+                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
                 if isa(af_argtype, DataType) && af_argtype <: Tuple
                     argtypes_vec = Any[aft, af_argtype.parameters...]
                     if contains_is(argtypes_vec, Union{})
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index b0c0efa80d004..ef6e5a161864a 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -243,7 +243,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     # collect results for the new expanded frame
     results = Tuple{InferenceResult, Vector{Any}, Bool}[
             ( frames[i].result,
-              frames[i].stmt_edges[1],
+              frames[i].stmt_edges[1]::Vector{Any},
               frames[i].cached )
         for i in 1:length(frames) ]
     empty!(frames)
@@ -341,7 +341,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
     if cache_the_tree
         if may_compress(interp)
             nslots = length(ci.slotflags)
-            resize!(ci.slottypes, nslots)
+            resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
             return ccall(:jl_compress_ir, Any, (Any, Any), def, ci)
         else
@@ -438,7 +438,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         empty!(edges)
     end
     if me.src.edges !== nothing
-        append!(s_edges, me.src.edges)
+        append!(s_edges, me.src.edges::Vector)
         me.src.edges = nothing
     end
     # inspect whether our inference had a limited result accuracy,
@@ -447,7 +447,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     limited_ret = me.bestguess isa LimitedAccuracy
     limited_src = false
     if !limited_ret
-        gt = me.src.ssavaluetypes
+        gt = me.src.ssavaluetypes::Vector{Any}
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
             if gtj isa LimitedAccuracy && me.parent !== nothing
@@ -511,8 +511,9 @@ end
 
 # widen all Const elements in type annotations
 function widen_all_consts!(src::CodeInfo)
-    for i = 1:length(src.ssavaluetypes)
-        src.ssavaluetypes[i] = widenconst(src.ssavaluetypes[i])
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
     end
 
     for i = 1:length(src.code)
@@ -577,6 +578,7 @@ function record_slot_assign!(sv::InferenceState)
     states = sv.stmt_types
     body = sv.src.code::Vector{Any}
     slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.src.ssavaluetypes::Vector{Any}
     for i = 1:length(body)
         expr = body[i]
         st_i = states[i]
@@ -585,7 +587,7 @@ function record_slot_assign!(sv::InferenceState)
             lhs = expr.args[1]
             rhs = expr.args[2]
             if isa(lhs, SlotNumber)
-                vt = widenconst(sv.src.ssavaluetypes[i])
+                vt = widenconst(ssavaluetypes[i])
                 if vt !== Bottom
                     id = slot_id(lhs)
                     otherTy = slottypes[id]
@@ -608,12 +610,11 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # (otherwise, we'll perhaps run the optimization passes later, outside of inference)
 
     # remove all unused ssa values
-    gt = sv.src.ssavaluetypes
-    for j = 1:length(gt)
-        if gt[j] === NOT_FOUND
-            gt[j] = Union{}
-        end
-        gt[j] = widenconditional(gt[j])
+    src = sv.src
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for j = 1:length(ssavaluetypes)
+        t = ssavaluetypes[j]
+        ssavaluetypes[j] = t === NOT_FOUND ? Union{} : widenconditional(t)
     end
 
     # compute the required type for each slot
@@ -626,7 +627,6 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # annotate variables load types
     # remove dead code optimization
     # and compute which variables may be used undef
-    src = sv.src
     states = sv.stmt_types
     nargs = sv.nargs
     nslots = length(states[1]::VarTable)
@@ -669,7 +669,7 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
             elseif run_optimizer
                 deleteat!(body, i)
                 deleteat!(states, i)
-                deleteat!(src.ssavaluetypes, i)
+                deleteat!(ssavaluetypes, i)
                 deleteat!(src.codelocs, i)
                 deleteat!(sv.stmt_info, i)
                 nexpr -= 1
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 2d65211c273b2..2f026d41efb35 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -309,7 +309,7 @@ function smerge(sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
 end
 
 @inline tchanged(@nospecialize(n), @nospecialize(o)) = o === NOT_FOUND || (n !== NOT_FOUND && !(n ⊑ o))
-@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n, o)))
+@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n::VarState, o::VarState)))
 
 widenconditional(@nospecialize typ) = typ
 function widenconditional(typ::AnyConditional)
@@ -406,7 +406,7 @@ function stupdate1!(state::VarTable, change::StateUpdate)
                 if isa(oldtypetyp, Conditional) && slot_id(oldtypetyp.var) == changeid
                     oldtypetyp = widenconditional(oldtypetyp)
                     if oldtype.typ isa LimitedAccuracy
-                        oldtypetyp = LimitedAccuracy(oldtypetyp, oldtype.typ.causes)
+                        oldtypetyp = LimitedAccuracy(oldtypetyp, (oldtype.typ::LimitedAccuracy).causes)
                     end
                     state[i] = VarState(oldtypetyp, oldtype.undef)
                 end
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 6e0f81114744b..02fb1b02c6ef0 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Expr head => argument count bounds
-const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
+const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :call => 1:typemax(Int),
     :invoke => 2:typemax(Int),
     :invoke_modify => 3:typemax(Int),
@@ -182,10 +182,11 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
     !is_top_level && nslotnames == 0 && push!(errors, InvalidCodeError(EMPTY_SLOTNAMES))
     nslotnames < nslotflags && push!(errors, InvalidCodeError(SLOTFLAGS_MISMATCH, (nslotnames, nslotflags)))
     if c.inferred
-        nssavaluetypes = length(c.ssavaluetypes)
+        nssavaluetypes = length(c.ssavaluetypes::Vector{Any})
         nssavaluetypes < nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH, (nssavals, nssavaluetypes)))
     else
-        c.ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, c.ssavaluetypes)))
+        ssavaluetypes = c.ssavaluetypes::Int
+        ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, ssavaluetypes)))
     end
     return errors
 end
@@ -207,7 +208,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInsta
     else
         m = mi.def::Method
         mnargs = m.nargs
-        n_sig_params = length(Core.Compiler.unwrap_unionall(m.sig).parameters)
+        n_sig_params = length((unwrap_unionall(m.sig)::DataType).parameters)
         if (m.isva ? (n_sig_params < (mnargs - 1)) : (n_sig_params != mnargs))
             push!(errors, InvalidCodeError(SIGNATURE_NARGS_MISMATCH, (m.isva, n_sig_params, mnargs)))
         end

From 93f40a049f229984389f6a1713c03b4cd50415ba Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 8 Sep 2021 02:32:49 -0400
Subject: [PATCH 64/65] inference: bail from const-prop if const-prop fails
 (#42112)

Otherwise we can end up in an infinite cycle of attempting
const-prop, and that failing. Also handle `Varargs` in method-lookup
matching, which was the cause for which we could not compute the
const-prop signature.

Fixes #42097

cherry-picked from 3a4198e91644a66b33d82921348ce4e050fae633
---
 base/compiler/abstractinterpretation.jl |  4 +++
 base/compiler/inferenceresult.jl        | 34 +++++++++++++------------
 test/compiler/inference.jl              |  7 +++++
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index f0d59694f4128..a057a1879412c 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -546,6 +546,10 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul
             end
         end
         inf_result = InferenceResult(mi, argtypes, va_override)
+        if !any(inf_result.overridden_by_const)
+            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+            return nothing
+        end
         frame = InferenceState(inf_result, #=cache=#false, interp)
         frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
         frame.parent = sv
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 026b5286979cb..483e2f38d9ee8 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -16,30 +16,32 @@ end
 function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override::Bool)
     @assert isa(linfo.def, Method) # ensure the next line works
     nargs::Int = linfo.def.nargs
-    @assert length(given_argtypes) >= (nargs - 1)
     given_argtypes = anymap(widenconditional, given_argtypes)
-    if va_override || linfo.def.isva
+    isva = va_override || linfo.def.isva
+    if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - 1)
+        for i = 1:(nargs - isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
-        if length(given_argtypes) >= nargs || !isvarargtype(given_argtypes[end])
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[nargs:end])
-        else
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[end:end])
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[last:end])
         end
         given_argtypes = isva_given_argtypes
     end
+    @assert length(given_argtypes) == nargs
     cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing, va_override)
-    if nargs === length(given_argtypes)
-        for i in 1:nargs
-            given_argtype = given_argtypes[i]
-            cache_argtype = cache_argtypes[i]
-            if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
-                # prefer the argtype we were given over the one computed from `linfo`
-                cache_argtypes[i] = given_argtype
-                overridden_by_const[i] = true
-            end
+    for i in 1:nargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = cache_argtypes[i]
+        if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
+            # prefer the argtype we were given over the one computed from `linfo`
+            cache_argtypes[i] = given_argtype
+            overridden_by_const[i] = true
         end
     end
     return cache_argtypes, overridden_by_const
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index acd4f5af33de0..d4d0f6700c179 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -3497,3 +3497,10 @@ end
         end
         return x
     end) === Union{Int, Float64, Char}
+
+# issue #42097
+struct Foo42097{F} end
+Foo42097(f::F, args) where {F} = Foo42097{F}()
+Foo42097(A) = Foo42097(Base.inferencebarrier(+), Base.inferencebarrier(1)...)
+foo42097() = Foo42097([1]...)
+@test foo42097() isa Foo42097{typeof(+)}

From a8722c6c5a99b2044328ba01fc470704d4bdba1b Mon Sep 17 00:00:00 2001
From: Thibaut Lienart <tlienart@me.com>
Date: Wed, 8 Sep 2021 12:16:36 +0200
Subject: [PATCH 65/65] (#42139) Fixes _is_mailto in resolution of autolink in
 Markdown module (#42140)

(cherry picked from commit 47797a175dd6460460492ec8f8e72151e6a5fa6a)
---
 stdlib/Markdown/src/Common/inline.jl | 7 ++-----
 stdlib/Markdown/test/runtests.jl     | 8 ++++++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index d2855f27a7add..fd5134481e113 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -146,13 +146,10 @@ function _is_link(s::AbstractString)
 end
 
 # non-normative regex from the HTML5 spec
-const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
+const _email_regex = r"^mailto\:[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
 
 function _is_mailto(s::AbstractString)
-    length(s) < 6 && return false
-    # slicing strings is a bit risky, but this equality check is safe
-    lowercase(s[1:6]) == "mailto:" || return false
-    return occursin(_email_regex, s[6:end])
+    return occursin(_email_regex, s)
 end
 
 # –––––––––––
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index f90eefb85310e..3de9e667e2e06 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1230,3 +1230,11 @@ end
     @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
 end
 
+@testset "issue #42139: autolink" begin
+    # ok
+    @test md"<mailto:foo@bar.com>" |> html == """<p><a href="mailto:foo@bar.com">mailto:foo@bar.com</a></p>\n"""
+    # not ok
+    @test md"<mailto foo@bar.com>" |> html == """<p>&lt;mailto foo@bar.com&gt;</p>\n"""
+    # see issue #42139
+    @test md"<一轮红日初升>" |> html == """<p>&lt;一轮红日初升&gt;</p>\n"""
+end