Merge pull request #95 from andreped/tf-fix

Fixed dynamic optimizer wrapper inheritance + support tf >= 2.8
andreped · May 9, 2023 · 6a97262 · 6a97262
2 parents e56b7e1 + 89a94a1
commit 6a97262
Show file tree

Hide file tree

Showing 17 changed files with 43 additions and 263 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -14,10 +14,10 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v1
-      - name: Set up Python 3.7
+      - name: Set up Python 3.8
         uses: actions/setup-python@v2
         with:
-          python-version: 3.7
+          python-version: 3.8
 
       - name: Install lint dependencies
         run: |
@@ -49,8 +49,8 @@ jobs:
       # max-parallel: 10
       matrix:
         os: [windows-2019, ubuntu-20.04, macos-11]
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
-        tf-version: [2.8.0, 2.9.1]
+        python-version: ["3.8", "3.9", "3.10"]
+        tf-version: [2.8, 2.11.1]
 
     steps:
       - uses: actions/checkout@v1
@@ -72,6 +72,9 @@ jobs:
 
       - name: Install wheel
         run: pip install --find-links=${{github.workspace}} gradient_accumulator
+
+      - name: Debug pip deps
+        run: pip list
 
       - name: Test library accessibility
         run: python -c "from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer"

diff --git a/gradient_accumulator/accumulators.py b/gradient_accumulator/accumulators.py
@@ -10,7 +10,7 @@
 
 # https://stackoverflow.com/a/66524901
 # https://keras.io/guides/customizing_what_happens_in_fit/
-@tf.keras.utils.register_keras_serializable()
+@tf.keras.utils.register_keras_serializable("gradient-accumulator")
 class GradientAccumulateModel(tf.keras.Model):
     """Model wrapper for gradient accumulation."""
 
@@ -195,7 +195,7 @@ def reinit_grad_accum(self):
 # Implementation was derived from:
 # https://github.com/fsx950223/addons/blob/67c1e8ea19e82c3f2a5706674dd81f15ab5002a2/tensorflow_addons/optimizers/gradient_accumulator.py  # noqa
 # https://github.com/FreddeFrallan/Multilingual-CLIP/blob/5c82118452b3b59b41bb53714d61cd4990b1588d/multilingual_clip/TeacherLearning/Utils.py#L84  # noqa
-@tf.keras.utils.register_keras_serializable()
+@tf.keras.utils.register_keras_serializable("gradient-accumulator")
 class GradientAccumulateOptimizer(opt):
     """Optimizer wrapper for gradient accumulation."""
 

diff --git a/setup.py b/setup.py
@@ -18,7 +18,9 @@
         "numpy<=1.23.2",
     ],
     extras_require={"dev": [
-        "pytest",
+        "wheel",
+        "setuptools",
+        "pytest-cov",
         "black==22.3.0",
         "isort==5.10.1",
         "flake8==4.0.1",

diff --git a/tests/test_adaptive_gradient_clipping.py b/tests/test_adaptive_gradient_clipping.py
@@ -5,11 +5,7 @@
 from gradient_accumulator import unitwise_norm
 from tensorflow.keras import mixed_precision
 import os
-
-
-def normalize_img(image, label):
-    """Normalizes images: `uint8` -> `float32`."""
-    return tf.cast(image, tf.float32) / 255., label
+from .utils import normalize_img
 
 
 def test_unitwise_norm():

diff --git a/tests/test_batch_norm.py b/tests/test_batch_norm.py
@@ -6,35 +6,7 @@
 import random as python_random
 import numpy as np
 import os
-
-
-def normalize_img(image, label):
-    """Normalizes images: `uint8` -> `float32`."""
-    return tf.cast(image, tf.float32) / 255., label
-
-
-def reset():
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-    # disable GPU
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-
-    # The below is necessary for starting Numpy generated random numbers
-    # in a well-defined initial state.
-    np.random.seed(123)
-
-    # The below is necessary for starting core Python generated random numbers
-    # in a well-defined state.
-    python_random.seed(123)
-
-    # The below set_seed() will make random number generation
-    # in the TensorFlow backend have a well-defined initial state.
-    # For further details, see:
-    # https://www.tensorflow.org/api_docs/python/tf/random/set_seed
-    tf.random.set_seed(1234)
-
-    # https://stackoverflow.com/a/71311207
-    tf.config.experimental.enable_op_determinism()
+from .utils import reset, normalize_img
 
 
 def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epochs:int = 3):

diff --git a/tests/test_bn_convnd.py b/tests/test_bn_convnd.py
@@ -60,7 +60,6 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
     return result
 
 
-
 def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
     # make toy dataset
     data = np.random.randint(2, size=(16, 8, 8, 8, 1))

diff --git a/tests/test_expected_result.py b/tests/test_expected_result.py
@@ -2,6 +2,7 @@
 import tensorflow as tf
 import random as python_random
 import os
+from .utils import get_opt, normalize_img, reset
 import tensorflow_datasets as tfds
 from tensorflow.keras.models import load_model
 from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer
@@ -11,39 +12,6 @@
 tf_version = int(tf.version.VERSION.split(".")[1])
 
 
-def normalize_img(image, label):
-    """Normalizes images: `uint8` -> `float32`."""
-    return tf.cast(image, tf.float32) / 255., label
-
-
-def reset():
-    # set tf log level
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-    # disable GPU
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-
-    # The below is necessary for starting Numpy generated random numbers
-    # in a well-defined initial state.
-    np.random.seed(123)
-
-    # The below is necessary for starting core Python generated random numbers
-    # in a well-defined state.
-    python_random.seed(123)
-
-    # The below set_seed() will make random number generation
-    # in the TensorFlow backend have a well-defined initial state.
-    # For further details, see:
-    # https://www.tensorflow.org/api_docs/python/tf/random/set_seed
-    tf.random.set_seed(1234)
-
-    # https://stackoverflow.com/a/71311207
-    try:
-        tf.config.experimental.enable_op_determinism()  # Exist only for TF > 2.7
-    except AttributeError as e:
-        print(e)
-
-
 def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
     # load dataset
     (ds_train, ds_test), ds_info = tfds.load(
@@ -70,7 +38,7 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
     x = tf.keras.layers.Dense(128, activation='relu')(x)
     output = tf.keras.layers.Dense(10)(x)
 
-    opt = tf.keras.optimizers.SGD(1e-3)
+    opt = get_opt(opt_name="SGD", tf_version=tf_version)
 
     if accum_steps == 1:
         model = tf.keras.Model(inputs=input, outputs=output)
@@ -131,8 +99,13 @@ def test_expected_result():
     result3 = run_experiment(bs=50, accum_steps=2, epochs=2, modeloropt="model")
 
     # results should be identical (theoretically, even in practice on CPU)
-    assert result1 == result2
-    assert result2 == result3
+    if tf_version <= 10:
+        assert result1 == result2
+        assert result2 == result3
+    else:
+        # approximation worse for tf >= 2.11
+        np.testing.assert_almost_equal(result1, result2, decimal=2)
+        np.testing.assert_almost_equal(result2, result3, decimal=2)
 
 
 if __name__ == "__main__":

diff --git a/tests/test_mixed_precision.py b/tests/test_mixed_precision.py
@@ -6,14 +6,10 @@ def run_experiment():
     import tensorflow_datasets as tfds
     from tensorflow.keras import mixed_precision
     from gradient_accumulator import GradientAccumulateModel
+    from .utils import normalize_img
     import os
 
 
-    def normalize_img(image, label):
-        """Normalizes images: `uint8` -> `float32`."""
-        return tf.cast(image, tf.float32) / 255., label
-
-
     # disable GPU
     os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 

diff --git a/tests/test_model_distribute.py b/tests/test_model_distribute.py
@@ -2,6 +2,7 @@
 import tensorflow_datasets as tfds
 from tensorflow.keras.models import load_model
 from gradient_accumulator import GradientAccumulateModel
+from .utils import get_opt
 
 
 def test_model_distribute():
@@ -38,14 +39,11 @@ def test_model_distribute():
         )
 
         # define optimizer - currently only SGD compatible with GAOptimizerWrapper
-        if int(tf.version.VERSION.split(".")[1]) > 10:
-            curr_opt = tf.keras.optimizers.legacy.SGD(learning_rate=1e-2)
-        else:
-            curr_opt = tf.keras.optimizers.SGD(learning_rate=1e-2)
+        opt = get_opt("SGD")
 
         # compile model
         model.compile(
-            optimizer=curr_opt,
+            optimizer=opt,
             loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
         )

diff --git a/tests/test_mp_batch_norm.py b/tests/test_mp_batch_norm.py
@@ -11,11 +11,7 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
     import random as python_random
     import numpy as np
     import os
-
-
-    def normalize_img(image, label):
-        """Normalizes images: `uint8` -> `float32`."""
-        return tf.cast(image, tf.float32) / 255., label
+    from .utils import normalize_img, get_opt
 
 
     ## reset session and seed stuff before running experiment
@@ -90,7 +86,7 @@ def normalize_img(image, label):
         )
 
     # need to scale optimizer for mixed precision
-    opt = tf.keras.optimizers.SGD(1e-2)
+    opt = get_opt("SGD")
     if mixed_precision_flag:
         opt = mixed_precision.LossScaleOptimizer(opt)
 

diff --git a/tests/test_multitask.py b/tests/test_multitask.py
@@ -7,41 +7,13 @@
 from gradient_accumulator import GradientAccumulateModel
 from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, UpSampling2D,\
     MaxPooling2D, Activation
-
-
-def normalize_img(image, label):
-    """Normalizes images: `uint8` -> `float32`."""
-    return tf.cast(image, tf.float32) / 255., label
+from .utils import normalize_img, reset
 
 
 def create_multi_input_output(image, label):
     return (image, image), (image, label)
 
 
-def reset():
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-    # The below is necessary for starting Numpy generated random numbers
-    # in a well-defined initial state.
-    np.random.seed(123)
-
-    # The below is necessary for starting core Python generated random numbers
-    # in a well-defined state.
-    python_random.seed(123)
-
-    # The below set_seed() will make random number generation
-    # in the TensorFlow backend have a well-defined initial state.
-    # For further details, see:
-    # https://www.tensorflow.org/api_docs/python/tf/random/set_seed
-    tf.random.set_seed(1234)
-
-    # https://stackoverflow.com/a/71311207
-    tf.config.experimental.enable_op_determinism()
-
-    # disable GPU
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-
-
 def run_experiment(bs=16, accum_steps=4, epochs=1):
     # load dataset
     (ds_train, ds_test), ds_info = tfds.load(

diff --git a/tests/test_optimizer_distribute.py b/tests/test_optimizer_distribute.py
@@ -3,15 +3,12 @@
 from tensorflow.keras.models import load_model
 from gradient_accumulator import GradientAccumulateOptimizer
 import numpy as np
-from .utils import reset, get_opt
+from .utils import reset, get_opt, normalize_img
 
 
 # get current tf minor version
 tf_version = int(tf.version.VERSION.split(".")[1])
 
-def normalize_img(image, label):
-    """Normalizes images: `uint8` -> `float32`."""
-    return tf.cast(image, tf.float32) / 255., label
 
 def run_experiment(opt_name="adam", bs=100, accum_steps=1, epochs=1, strategy_name="multi"):
     # setup single/multi-GPU strategy
@@ -104,4 +101,4 @@ def test_distributed_optimizer_invariance():
             result2 = run_experiment(opt_name=opt_name, bs=50, accum_steps=2, epochs=2, strategy_name=strategy_name)
 
             # results should be "identical" (on CPU, can be different on GPU)
-            np.testing.assert_almost_equal(result1, result2, decimal=3)
+            np.testing.assert_almost_equal(result1, result2, decimal=2)