Skip to content

Commit

Permalink
Merge pull request #95 from andreped/tf-fix
Browse files Browse the repository at this point in the history
Fixed dynamic optimizer wrapper inheritance + support tf >= 2.8
  • Loading branch information
andreped authored May 9, 2023
2 parents e56b7e1 + 89a94a1 commit 6a97262
Show file tree
Hide file tree
Showing 17 changed files with 43 additions and 263 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v1
- name: Set up Python 3.7
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.8

- name: Install lint dependencies
run: |
Expand Down Expand Up @@ -49,8 +49,8 @@ jobs:
# max-parallel: 10
matrix:
os: [windows-2019, ubuntu-20.04, macos-11]
python-version: ["3.7", "3.8", "3.9", "3.10"]
tf-version: [2.8.0, 2.9.1]
python-version: ["3.8", "3.9", "3.10"]
tf-version: [2.8, 2.11.1]

steps:
- uses: actions/checkout@v1
Expand All @@ -72,6 +72,9 @@ jobs:

- name: Install wheel
run: pip install --find-links=${{github.workspace}} gradient_accumulator

- name: Debug pip deps
run: pip list

- name: Test library accessibility
run: python -c "from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer"
Expand Down
4 changes: 2 additions & 2 deletions gradient_accumulator/accumulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# https://stackoverflow.com/a/66524901
# https://keras.io/guides/customizing_what_happens_in_fit/
@tf.keras.utils.register_keras_serializable()
@tf.keras.utils.register_keras_serializable("gradient-accumulator")
class GradientAccumulateModel(tf.keras.Model):
"""Model wrapper for gradient accumulation."""

Expand Down Expand Up @@ -195,7 +195,7 @@ def reinit_grad_accum(self):
# Implementation was derived from:
# https://github.com/fsx950223/addons/blob/67c1e8ea19e82c3f2a5706674dd81f15ab5002a2/tensorflow_addons/optimizers/gradient_accumulator.py # noqa
# https://github.com/FreddeFrallan/Multilingual-CLIP/blob/5c82118452b3b59b41bb53714d61cd4990b1588d/multilingual_clip/TeacherLearning/Utils.py#L84 # noqa
@tf.keras.utils.register_keras_serializable()
@tf.keras.utils.register_keras_serializable("gradient-accumulator")
class GradientAccumulateOptimizer(opt):
"""Optimizer wrapper for gradient accumulation."""

Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
"numpy<=1.23.2",
],
extras_require={"dev": [
"pytest",
"wheel",
"setuptools",
"pytest-cov",
"black==22.3.0",
"isort==5.10.1",
"flake8==4.0.1",
Expand Down
6 changes: 1 addition & 5 deletions tests/test_adaptive_gradient_clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@
from gradient_accumulator import unitwise_norm
from tensorflow.keras import mixed_precision
import os


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
from .utils import normalize_img


def test_unitwise_norm():
Expand Down
30 changes: 1 addition & 29 deletions tests/test_batch_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,7 @@
import random as python_random
import numpy as np
import os


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label


def reset():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
python_random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)

# https://stackoverflow.com/a/71311207
tf.config.experimental.enable_op_determinism()
from .utils import reset, normalize_img


def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epochs:int = 3):
Expand Down
1 change: 0 additions & 1 deletion tests/test_bn_convnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
return result



def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
# make toy dataset
data = np.random.randint(2, size=(16, 8, 8, 8, 1))
Expand Down
45 changes: 9 additions & 36 deletions tests/test_expected_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tensorflow as tf
import random as python_random
import os
from .utils import get_opt, normalize_img, reset
import tensorflow_datasets as tfds
from tensorflow.keras.models import load_model
from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer
Expand All @@ -11,39 +12,6 @@
tf_version = int(tf.version.VERSION.split(".")[1])


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label


def reset():
# set tf log level
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
python_random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)

# https://stackoverflow.com/a/71311207
try:
tf.config.experimental.enable_op_determinism() # Exist only for TF > 2.7
except AttributeError as e:
print(e)


def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
# load dataset
(ds_train, ds_test), ds_info = tfds.load(
Expand All @@ -70,7 +38,7 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
x = tf.keras.layers.Dense(128, activation='relu')(x)
output = tf.keras.layers.Dense(10)(x)

opt = tf.keras.optimizers.SGD(1e-3)
opt = get_opt(opt_name="SGD", tf_version=tf_version)

if accum_steps == 1:
model = tf.keras.Model(inputs=input, outputs=output)
Expand Down Expand Up @@ -131,8 +99,13 @@ def test_expected_result():
result3 = run_experiment(bs=50, accum_steps=2, epochs=2, modeloropt="model")

# results should be identical (theoretically, even in practice on CPU)
assert result1 == result2
assert result2 == result3
if tf_version <= 10:
assert result1 == result2
assert result2 == result3
else:
# approximation worse for tf >= 2.11
np.testing.assert_almost_equal(result1, result2, decimal=2)
np.testing.assert_almost_equal(result2, result3, decimal=2)


if __name__ == "__main__":
Expand Down
6 changes: 1 addition & 5 deletions tests/test_mixed_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@ def run_experiment():
import tensorflow_datasets as tfds
from tensorflow.keras import mixed_precision
from gradient_accumulator import GradientAccumulateModel
from .utils import normalize_img
import os


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label


# disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

Expand Down
8 changes: 3 additions & 5 deletions tests/test_model_distribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tensorflow_datasets as tfds
from tensorflow.keras.models import load_model
from gradient_accumulator import GradientAccumulateModel
from .utils import get_opt


def test_model_distribute():
Expand Down Expand Up @@ -38,14 +39,11 @@ def test_model_distribute():
)

# define optimizer - currently only SGD compatible with GAOptimizerWrapper
if int(tf.version.VERSION.split(".")[1]) > 10:
curr_opt = tf.keras.optimizers.legacy.SGD(learning_rate=1e-2)
else:
curr_opt = tf.keras.optimizers.SGD(learning_rate=1e-2)
opt = get_opt("SGD")

# compile model
model.compile(
optimizer=curr_opt,
optimizer=opt,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
Expand Down
8 changes: 2 additions & 6 deletions tests/test_mp_batch_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
import random as python_random
import numpy as np
import os


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
from .utils import normalize_img, get_opt


## reset session and seed stuff before running experiment
Expand Down Expand Up @@ -90,7 +86,7 @@ def normalize_img(image, label):
)

# need to scale optimizer for mixed precision
opt = tf.keras.optimizers.SGD(1e-2)
opt = get_opt("SGD")
if mixed_precision_flag:
opt = mixed_precision.LossScaleOptimizer(opt)

Expand Down
30 changes: 1 addition & 29 deletions tests/test_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,13 @@
from gradient_accumulator import GradientAccumulateModel
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, UpSampling2D,\
MaxPooling2D, Activation


def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
from .utils import normalize_img, reset


def create_multi_input_output(image, label):
return (image, image), (image, label)


def reset():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
python_random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)

# https://stackoverflow.com/a/71311207
tf.config.experimental.enable_op_determinism()

# disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


def run_experiment(bs=16, accum_steps=4, epochs=1):
# load dataset
(ds_train, ds_test), ds_info = tfds.load(
Expand Down
7 changes: 2 additions & 5 deletions tests/test_optimizer_distribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@
from tensorflow.keras.models import load_model
from gradient_accumulator import GradientAccumulateOptimizer
import numpy as np
from .utils import reset, get_opt
from .utils import reset, get_opt, normalize_img


# get current tf minor version
tf_version = int(tf.version.VERSION.split(".")[1])

def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label

def run_experiment(opt_name="adam", bs=100, accum_steps=1, epochs=1, strategy_name="multi"):
# setup single/multi-GPU strategy
Expand Down Expand Up @@ -104,4 +101,4 @@ def test_distributed_optimizer_invariance():
result2 = run_experiment(opt_name=opt_name, bs=50, accum_steps=2, epochs=2, strategy_name=strategy_name)

# results should be "identical" (on CPU, can be different on GPU)
np.testing.assert_almost_equal(result1, result2, decimal=3)
np.testing.assert_almost_equal(result1, result2, decimal=2)
Loading

0 comments on commit 6a97262

Please sign in to comment.