From 6b42079ea99d6f4ad3a11042c2ca259ddbb8866b Mon Sep 17 00:00:00 2001
From: Stuart Daines <s.daines@exeter.ac.uk>
Date: Wed, 11 Dec 2024 13:09:23 +0000
Subject: [PATCH] Document and updated automatic differentation helper
 functions

Document:
    value_ad

Add new functions:
    smoothstepcubic
    zero_ad
---
 docs/src/Reaction API.md | 25 ++++++++++++++++
 src/PALEOboxes.jl        |  1 +
 src/Types.jl             | 12 --------
 src/utils/ADUtils.jl     | 65 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 12 deletions(-)
 create mode 100644 src/utils/ADUtils.jl

diff --git a/docs/src/Reaction API.md b/docs/src/Reaction API.md
index 2dc3efe5..7afe1cc0 100644
--- a/docs/src/Reaction API.md	
+++ b/docs/src/Reaction API.md	
@@ -151,6 +151,31 @@ In rare cases where it is necessary to operate on a Vector representing a quanti
         return nothing
     end
 
+#### Writing automatic-differentiation-compatible code
+
+Stiff systems of equations (with a wide range of timescales) will require a
+solver that uses Jacobian information (either calculated numerically or by automatic differentiation).
+
+To be numerically efficient, this means that the Jacobian needs to exist mathematically, so any reaction rates
+should change continuously. Any discontinuous steps (eg a rate that suddenly increases to a finite value at a threshold)
+should be smoothed out, eg using the [`smoothstepcubic`](@ref) function or ideally by reformulating the physical model
+in such a way that discontinuities are avoided.
+
+For large models, a sparse Jacobian may be required, where the sparsity pattern is generated by tracing the code with the
+model variables set eg to their initial values.  Conditional logic (if-then-else blocks or elseif) may then cause this tracing to fail
+(omit some terms from the Jacobian) if the conditional branch taken sets some rates to zero losing dependency information. 
+Workaround is to use [`zero_ad`](@ref) to generate a zero value that retains variable dependency information.  NB: the `max` and `min`
+Julia functions *are* safe to use with sparsity tracing.
+
+It is sometimes desirable to omit terms from the Jacobian, eg an insignificantly small term that would greatly reduce the
+Jacobian sparsity. This can be accomplished by using the [`value_ad`](@ref) function to drop automatic-differentiation information
+and retain only the value.
+```@docs
+smoothstepcubic
+zero_ad
+value_ad
+```
+
 
 #### Optimising loops over cells using explicit SIMD instructions
 Reactions with simple loops over `cellindices` that implement time-consuming per-cell calculations 
diff --git a/src/PALEOboxes.jl b/src/PALEOboxes.jl
index ee3aea6c..a971ac12 100644
--- a/src/PALEOboxes.jl
+++ b/src/PALEOboxes.jl
@@ -70,6 +70,7 @@ include("utils/IteratorUtils.jl")
 include("utils/DocUtils.jl")
 include("utils/StringUtils.jl")
 include("utils/ChemistryUtils.jl")
+include("utils/ADUtils.jl")
 
 include("variableaggregators/VariableAggregator.jl")
 
diff --git a/src/Types.jl b/src/Types.jl
index 334903d9..eae29f5e 100644
--- a/src/Types.jl
+++ b/src/Types.jl
@@ -1,15 +1,3 @@
-# Get scalar value from variable x (discarding any AD derivatives)
-value_ad(x) = x
-# Model code should implement this for any AD types used, eg
-# value_ad(x::SparsityTracing.ADval) = SparsityTracing.value(x)
-# value_ad(x::ForwardDiff.Dual) = ForwardDiff.value(x)
-
-# get scalar or ad from variable x, as specified by first argument
-value_ad(::Type{T}, x::T) where {T} = x        # pass through AD
-value_ad(::Type{T}, x::Float64) where {T} = x  # pass through Float64
-value_ad(::Type{Float64}, x)  = value_ad(x)    # strip AD
-value_ad(::Type{Float64}, x::Float64)  = x     # avoid method ambiguity
-
 # TODO there doesn't seem to be an easy way of parameterising ModelData by an Array type ?
 const PaleoArrayType = Array
 
diff --git a/src/utils/ADUtils.jl b/src/utils/ADUtils.jl
new file mode 100644
index 00000000..ffca88d8
--- /dev/null
+++ b/src/utils/ADUtils.jl
@@ -0,0 +1,65 @@
+"""
+    value_ad(x::ADT) -> x
+
+Get scalar value from variable `x` (discarding any AD derivatives).
+
+This can be used to exclude `x` from automatic differentation and hence a Jacobian calculation,
+eg where `x` is a small contribution but would make the Jacobian much denser.
+
+Model code should implement this for any AD types used, eg
+
+    value_ad(x::SparsityTracing.ADval) = SparsityTracing.value(x)
+    value_ad(x::ForwardDiff.Dual) = ForwardDiff.value(x)
+
+"""
+value_ad(x) = x
+
+# get scalar or ad from variable x, as specified by first argument
+value_ad(::Type{T}, x::T) where {T} = x        # pass through AD
+value_ad(::Type{T}, x::Float64) where {T} = x  # pass through Float64
+value_ad(::Type{Float64}, x)  = value_ad(x)    # strip AD
+value_ad(::Type{Float64}, x::Float64)  = x     # avoid method ambiguity
+
+"""
+    zero_ad(x...) -> 0.0*x
+
+Provide a zero of type of `x` (or type of `x1*x2*...` if given multiple arguments), retaining AD dependency information.
+
+Workaround to enable use of conditional logic whilst retaining dependency information for tracing Jacobian sparsity pattern.
+"""
+zero_ad(x) = 0.0*x
+zero_ad(x1, x2) = 0.0*x1*x2
+zero_ad(x1, x2, x3) = 0.0*x1*x2*x3
+zero_ad(x1, x2, x3, x4) = 0.0*x1*x2*x3*x4
+
+"""
+    smoothstepcubic(x, xedge, xwidth) -> y
+
+Smoothed step function over width `xwidth` at location `xedge`.
+
+Provides a way of smoothing a discontinuous step function so that the
+derivative exists, to help numerical solution methods that require a
+Jacobian (eg stiff ODE solvers).
+
+Uses a cubic function in range `xedge +/- xwidth/2`, so first derivative
+is continuous, higher derivatives are not.
+
+Uses [`zero_ad`](@ref) to retain AD dependency information for tracing Jacobian sparsity pattern.
+
+Returns:
+- 0.0 for x < (xedge - xwidth/2)
+- 1.0 for x > (xedge + xwidth/2)
+- a smoothed step for xedge-xwidth/2 < x < xedge+xwidth/2
+"""
+function smoothstepcubic(x, xedge, xwidth)
+    # rescale to 0 < xs < 1
+    xs = (x - xedge + 0.5*xwidth)/xwidth
+    # xs smoothly steps from 0 to 1 over interval 0 < xs < 1
+    if xs > 1.0
+        return one(x) + zero_ad(x)  
+    elseif xs < 0.0
+        return zero_ad(x)
+    else
+        return xs*xs*(3 - 2 * xs)
+    end
+end
\ No newline at end of file