From 7246bac254e8fbc403291fa780b55acbf07b7698 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Tue, 21 Sep 2021 14:31:41 +0200 Subject: [PATCH 01/18] Initial ruck project template with actions, testing and docs --- .github/ISSUE_TEMPLATE/bug-report.md | 20 ++++++ .github/ISSUE_TEMPLATE/feature-request.md | 20 ++++++ .github/ISSUE_TEMPLATE/framework-request.md | 23 +++++++ .github/ISSUE_TEMPLATE/question.md | 13 ++++ .github/workflows/python-publish.yml | 34 ++++++++++ .github/workflows/python-test.yml | 44 +++++++++++++ .readthedocs.yml | 19 ++++++ README.md | 57 +++++++++++++++- docs/index.md | 25 +++++++ docs/requirements.txt | 7 ++ mkdocs.yml | 33 ++++++++++ pytest.ini | 9 +++ requirements-test.txt | 2 + requirements.txt | 3 + ruck/__init__.py | 2 + setup.py | 73 +++++++++++++++++++++ tests/test.py | 37 +++++++++++ tests/util.py | 70 ++++++++++++++++++++ 18 files changed, 489 insertions(+), 2 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md create mode 100644 .github/ISSUE_TEMPLATE/framework-request.md create mode 100644 .github/ISSUE_TEMPLATE/question.md create mode 100644 .github/workflows/python-publish.yml create mode 100644 .github/workflows/python-test.yml create mode 100644 .readthedocs.yml create mode 100644 docs/index.md create mode 100644 docs/requirements.txt create mode 100644 mkdocs.yml create mode 100644 pytest.ini create mode 100644 requirements-test.txt create mode 100644 requirements.txt create mode 100644 ruck/__init__.py create mode 100644 setup.py create mode 100644 tests/test.py create mode 100644 tests/util.py diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..36636d2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,20 @@ +--- +name: Bug Report +about: Create a bug report to help us improve +title: "[BUG]: " +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behaviour. + +**Expected behaviour** +A clear and concise description of what you expected to happen. + +**Additional context** +Add any other context about the problem here, including any relevant system information and python version. diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 0000000..5bbb680 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,20 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[FEATURE]: " +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/framework-request.md b/.github/ISSUE_TEMPLATE/framework-request.md new file mode 100644 index 0000000..f009de1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/framework-request.md @@ -0,0 +1,23 @@ +--- +name: Algorithm Request +about: Suggest for a new algorithm to be added to this project +title: "[ALGORITHM]: " +labels: enhancement +assignees: '' + +--- + +**Which algorithm would you like added to this project** +- algorithm name +- link to academic paper + +**Why should this algorithm be added?** +What benefit is there to adding this algorithm? + +**Short summary of algorithm** +What is the core algorithmic idea behind the algorithm in simple terms. Please give a general overview rather than advanced algorithmic concepts. + +**Which algorithm does this build upon, if any?** + +**Are you willing to submit a PR?** +Are you willing to work on this implementation and submit a PR? diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000..bddea02 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,13 @@ +--- +name: Question +about: Ask a question about this project +title: "[Q]: " +labels: question +assignees: '' + +--- + +**Ask Away!** + +- Please double-check the [docs](https://ruck.dontpanic.sh) to make sure that your question is not already answered there. +- Please double-check the issues to make sure that your question has not been answered before. diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..0c23870 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,34 @@ +# This workflow will upload a Python Package +# using Twine when a release is created + +name: publish + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install setuptools wheel twine + + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python3 setup.py sdist bdist_wheel + python3 -m twine upload dist/* diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 0000000..e84d369 --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,44 @@ +# This workflow will install Python dependencies, +# then run tests over a variety of Python versions. + +name: test + +on: + push: + branches: [ main, dev ] + tags: [ '*' ] + pull_request: + branches: [ main, dev ] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] # [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.8] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install -r requirements.txt + python3 -m pip install -r requirements-test.txt + + - name: Test with pytest + run: | + python3 -m pytest --cov=ruck tests/ + + - uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: true + # codecov automatically merges all generated files + # if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.9 diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..cc39a88 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,19 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +mkdocs: + configuration: mkdocs.yml + fail_on_warning: false + +# Optionally build your docs in additional formats such as PDF +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.8 + install: + - requirements: docs/requirements.txt diff --git a/README.md b/README.md index bee9f26..4e08483 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,55 @@ -# ruck -Efficient Parallel Genetic Algorithms For Python + +

+

🧬 Ruck

+

+ Performant evolutionary algorithms for Python +

+

+ +

+ + license + + + python versions + + + pypi version + + + tests status + +

+ +

+

+ Visit the docs for more info, or browse the releases. +

+

+ Contributions are welcome! +

+

+ +------------------------ + +## Goals + +Ruck aims to fill the following criteria: + +1. Provide **high quality**, **readable** implementations of algorithms. +2. Be easily **extensible** and **debuggable**. +3. Performant while maintaining its simplicity. + +## Citing Ruck + +Please use the following citation if you use Ruck in your research: + +```bibtex +@Misc{Michlo2021Ruck, + author = {Nathan Juraj Michlo}, + title = {Ruck - Performant evolutionary algorithms for Python}, + howpublished = {Github}, + year = {2021}, + url = {https://github.com/nmichlo/ruck} +} +``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..65a1d35 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,25 @@ +# Ruck + +Performant evolutionary algorithms for Python. + +## Goals + +Ruck aims to fill the following criteria: + +1. Provide **high quality**, **readable** implementations of algorithms. +2. Be easily **extensible** and **debuggable**. +3. Performant while maintaining its simplicity. + +## Citing Ruck + +Please use the following citation if you use Ruck in your research: + +```bibtex +@Misc{Michlo2021Ruck, + author = {Nathan Juraj Michlo}, + title = {Ruck - Performant evolutionary algorithms for Python}, + howpublished = {Github}, + year = {2021}, + url = {https://github.com/nmichlo/ruck} +} +``` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..b86d1a4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,7 @@ + +mkdocs == 1.1.2 +mkdocstrings == 0.14.0 +mkdocs-material == 6.2.5 +mkdocs-git-revision-date-localized-plugin == 0.8 +# pygments == 2.7.4 +# pymdown-extensions == 8.1 diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..f6eef57 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,33 @@ +site_name: 🧬 Ruck Docs +repo_url: https://github.com/nmichlo/ruck +repo_name: nmichlo/ruck +theme: + name: material + palette: + scheme: default + primary: green + icon: + repo: fontawesome/brands/github + logo: material/library + favicon: images/favicon.png +plugins: + - search + - mkdocstrings # reference functions and code in markdown `::: module.class.func` + - git-revision-date-localized: # visible last edit date on each page + type: date + fallback_to_build_date: false +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.highlight + - pymdownx.inlinehilite + - pymdownx.superfences + - pymdownx.snippets + - pymdownx.tabbed + - pymdownx.arithmatex: + generic: true +# THE !! CURRENTLY BREAKS READTHEDOCS +# https://github.com/readthedocs/readthedocs.org/issues/7865 +# - pymdownx.emoji: +# emoji_index: !!python/name:materialx.emoji.twemoji +# emoji_generator: !!python/name:materialx.emoji.to_svg diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..a6fb8d5 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,9 @@ + +[pytest] +minversion = 6.0 +testpaths = + tests + ruck +python_files = + test_*.py + __test__*.py diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..412de73 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +pytest>=6.2.4 +pytest-cov>=2.12.1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9e5bf19 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pip>=21.0 +numpy>=1.21.0 +tqdm>=4.60.0 diff --git a/ruck/__init__.py b/ruck/__init__.py new file mode 100644 index 0000000..e472153 --- /dev/null +++ b/ruck/__init__.py @@ -0,0 +1,2 @@ + +# Ruck does nothing at the moment! diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..108817a --- /dev/null +++ b/setup.py @@ -0,0 +1,73 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import setuptools + + +# ========================================================================= # +# HELPER # +# ========================================================================= # + + +with open("README.md", "r", encoding="utf-8") as file: + long_description = file.read() + +with open('requirements.txt', 'r') as f: + install_requires = (req[0] for req in map(lambda x: x.split('#'), f.readlines())) + install_requires = [req for req in map(str.strip, install_requires) if req] + + +# ========================================================================= # +# SETUP # +# ========================================================================= # + + +setuptools.setup( + name="ruck", + author="Nathan Juraj Michlo", + author_email="NathanJMichlo@gmail.com", + + version="0.0.1.dev1", + python_requires=">=3.8", + packages=setuptools.find_packages(), + + install_requires=install_requires, + + url="https://github.com/nmichlo/ruck", + description="Performant evolutionary algorithms for Python.", + long_description=long_description, + long_description_content_type="text/markdown", + + classifiers=[ + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Intended Audience :: Science/Research", + ], +) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..51cacab --- /dev/null +++ b/tests/test.py @@ -0,0 +1,37 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +# ========================================================================= # +# TESTS # +# ========================================================================= # + + +def test_tests(): + assert True + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..c2fe6ca --- /dev/null +++ b/tests/util.py @@ -0,0 +1,70 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import contextlib +import os +import sys +from contextlib import contextmanager + + +# ========================================================================= # +# TEST UTILS # +# ========================================================================= # + + +@contextmanager +def no_stdout(): + old_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + yield + sys.stdout = old_stdout + + +@contextmanager +def no_stderr(): + old_stderr = sys.stderr + sys.stderr = open(os.devnull, 'w') + yield + sys.stderr = old_stderr + + +@contextlib.contextmanager +def temp_wd(new_wd): + old_wd = os.getcwd() + os.chdir(new_wd) + yield + os.chdir(old_wd) + + +@contextlib.contextmanager +def temp_sys_args(new_argv): + old_argv = sys.argv + sys.argv = new_argv + yield + sys.argv = old_argv + + +# ========================================================================= # +# END # +# ========================================================================= # From 9c15d7ad10644be4498c73b62fc1c7f4c22a324d Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Thu, 23 Sep 2021 13:28:24 +0200 Subject: [PATCH 02/18] initial commit --- examples/onemax.py | 93 +++++++++++++++++ ruck/__init__.py | 36 ++++++- ruck/_history.py | 198 ++++++++++++++++++++++++++++++++++++ ruck/_member.py | 92 +++++++++++++++++ ruck/_module.py | 67 ++++++++++++ ruck/_train.py | 175 +++++++++++++++++++++++++++++++ ruck/_util/__init__.py | 23 +++++ ruck/_util/args.py | 67 ++++++++++++ ruck/_util/random.py | 48 +++++++++ ruck/functional/__init__.py | 30 ++++++ ruck/functional/_helper.py | 111 ++++++++++++++++++++ ruck/functional/_mate.py | 51 ++++++++++ ruck/functional/_mutate.py | 54 ++++++++++ ruck/functional/_select.py | 62 +++++++++++ 14 files changed, 1106 insertions(+), 1 deletion(-) create mode 100644 examples/onemax.py create mode 100644 ruck/_history.py create mode 100644 ruck/_member.py create mode 100644 ruck/_module.py create mode 100644 ruck/_train.py create mode 100644 ruck/_util/__init__.py create mode 100644 ruck/_util/args.py create mode 100644 ruck/_util/random.py create mode 100644 ruck/functional/__init__.py create mode 100644 ruck/functional/_helper.py create mode 100644 ruck/functional/_mate.py create mode 100644 ruck/functional/_mutate.py create mode 100644 ruck/functional/_select.py diff --git a/examples/onemax.py b/examples/onemax.py new file mode 100644 index 0000000..c7676ba --- /dev/null +++ b/examples/onemax.py @@ -0,0 +1,93 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import contextlib +import logging +import time + +import numpy as np + +from ruck import * +from ruck import EaModule +from ruck import PopulationHint + + +class OneMaxModule(EaModule): + + def __init__( + self, + generations: int = 40, + population_size: int = 128, + member_size: int = 10_000, + p_mate: float = 0.5, + p_mutate: float = 0.5, + ): + super().__init__() + self.save_hyperparameters() + + @property + def num_generations(self) -> int: + return self.hparams.generations + + def gen_starting_population(self) -> PopulationHint: + return [ + Member(np.random.random(self.hparams.member_size) < 0.5) + for _ in range(self.hparams.population_size) + ] + + def generate_offspring(self, population: PopulationHint) -> PopulationHint: + # SEE: R.factory_ea_alg -- TODO: make it easier to swap! + return R.apply_mate_and_mutate( + population=R.select_tournament(population, len(population)), # tools.selNSGA2 + mate=R.mate_crossover_1d, + mutate=R.mutate_flip_bit_types, + p_mate=self.hparams.p_mate, + p_mutate=self.hparams.p_mutate, + ) + + def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: + return offspring + + def evaluate_member(self, value: np.ndarray) -> float: + return value.mean() + + +if __name__ == '__main__': + # about 10x faster than the onemax (0.18s vs 2.6s) + # numpy version given for deap + # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py + + logging.basicConfig(level=logging.INFO) + + @contextlib.contextmanager + def Timer(name: str): + t = time.time() + yield + print(name, time.time() - t) + + with Timer('ruck:trainer'): + module = OneMaxModule(generations=40, population_size=300, member_size=100) + population, logbook, halloffame = Trainer(progress=False).fit(module) + print(logbook[0]) + print(logbook[-1]) diff --git a/ruck/__init__.py b/ruck/__init__.py index e472153..e2a8430 100644 --- a/ruck/__init__.py +++ b/ruck/__init__.py @@ -1,2 +1,36 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -# Ruck does nothing at the moment! + +# base +from ruck._member import Member +from ruck._member import PopulationHint +from ruck._module import EaModule + +# training +from ruck._train import Trainer +from ruck._train import yield_population_steps + +# functional utils +from ruck import functional as R diff --git a/ruck/_history.py b/ruck/_history.py new file mode 100644 index 0000000..5943c64 --- /dev/null +++ b/ruck/_history.py @@ -0,0 +1,198 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import dataclasses +import heapq +from typing import Any +from typing import Callable +from typing import Dict +from typing import List + + +from ruck._member import PopulationHint + + +# ========================================================================= # +# Type Hints # +# ========================================================================= # + + +ValueFnHint = Callable[[Any], Any] +StatFnHint = Callable[[Any], Any] + + +# ========================================================================= # +# Logbook # +# ========================================================================= # + + +class StatsGroup(object): + + def __init__(self, value_fn: ValueFnHint = None, **stats_fns: StatFnHint): + assert all(str.isidentifier(key) for key in stats_fns.keys()) + assert stats_fns + self._value_fn = value_fn + self._stats_fns = stats_fns + + @property + def keys(self) -> List[str]: + return list(self._stats_fns.keys()) + + def compute(self, value: Any) -> Dict[str, Any]: + if self._value_fn is not None: + value = self._value_fn(value) + return { + key: stat_fn(value) + for key, stat_fn in self._stats_fns.items() + } + + +class Logbook(object): + + def __init__(self, *external_keys: str, **stats_groups: StatsGroup): + self._all_ordered_keys = [] + self._external_keys = [] + self._stats_groups = {} + self._history = [] + # register values + for k in external_keys: + self.register_external_stat(k) + for k, v in stats_groups.items(): + self.register_stats_group(k, v) + + def _assert_key_valid(self, name: str): + if not str.isidentifier(name): + raise ValueError(f'stat name is not a valid identifier: {repr(name)}') + return name + + def _assert_key_available(self, name: str): + if name in self._external_keys: + raise ValueError(f'external stat already named: {repr(name)}') + if name in self._stats_groups: + raise ValueError(f'stat group already named: {repr(name)}') + return name + + def register_external_stat(self, name: str): + self._assert_key_available(self._assert_key_available(name)) + # add stat + self._external_keys.append(name) + self._all_ordered_keys.append(name) + return self + + def register_stats_group(self, name: str, stats_group: StatsGroup): + self._assert_key_available(self._assert_key_available(name)) + assert isinstance(stats_group, StatsGroup) + assert stats_group not in self._stats_groups.values() + # add stat group + self._stats_groups[name] = stats_group + self._all_ordered_keys.extend(f'{name}:{key}' for key in stats_group.keys) + return self + + def record(self, population: 'PopulationHint', **external_values): + # extra stats + if set(external_values.keys()) != set(self._external_keys): + raise KeyError(f'required external_values: {sorted(self._external_keys)}, got: {sorted(external_values.keys())}') + # external values + stats = dict(external_values) + # generate stats + for name, stat_group in self._stats_groups.items(): + for key, value in stat_group.compute(population).items(): + stats[f'{name}:{key}'] = value + # order stats + assert set(stats.keys()) == set(self._all_ordered_keys) + record = {k: stats[k] for k in self._all_ordered_keys} + # record and return stats + self._history.append(record) + return dict(record) + + @property + def history(self) -> List[Dict[str, Any]]: + return list(self._history) + + def __getitem__(self, idx: int): + assert isinstance(idx, int) + return dict(self._history[idx]) + + def __len__(self): + return len(self._history) + + def __iter__(self): + for i in range(len(self)): + yield self[i] + + +# ========================================================================= # +# HallOfFame # +# ========================================================================= # + + +@dataclasses.dataclass(order=True) +class HallOfFameItem: + fitness: float + member: Any = dataclasses.field(compare=False) + + +class HallOfFame(object): + + def __init__(self, n_best: int = 5, maximize: bool = True): + self._maximize = maximize + assert maximize + self._n_best = n_best + self._heap = [] # element 0 is always the smallest + self._scores = {} + + def update(self, population: PopulationHint): + best = sorted(population, key=lambda m: m.fitness, reverse=True)[:self._n_best] + # add the best + for member in best: + # try add to hall of fame + item = HallOfFameItem(fitness=member.fitness, member=member) + # skip if we already have the same score ... + # TODO: this should not ignore members with the same scores, this is hacky + if item.fitness in self._scores: + continue + # checks + self._scores[item.fitness] = item + if len(self._heap) < self._n_best: + heapq.heappush(self._heap, item) + else: + removed = heapq.heappushpop(self._heap, item) + del self._scores[removed.fitness] + + @property + def members(self) -> PopulationHint: + return [m.member for m in sorted(self._heap, reverse=True)] + + @property + def values(self) -> List[Any]: + return [m.value for m in self.members] + + @property + def scores(self) -> List[Any]: + return [m.fitness for m in self.members] + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/_member.py b/ruck/_member.py new file mode 100644 index 0000000..69152a7 --- /dev/null +++ b/ruck/_member.py @@ -0,0 +1,92 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from typing import Any +from typing import List + +import numpy as np + + +# ========================================================================= # +# Members # +# ========================================================================= # + + +class MemberIsNotEvaluatedError(Exception): + pass + + +class MemberAlreadyEvaluatedError(Exception): + pass + + +class Member(object): + + def __init__(self, value: Any): + self._value = value + self._fitness = None + + @property + def value(self) -> Any: + return self._value + + @property + def fitness(self): + if not self.is_evaluated: + raise MemberIsNotEvaluatedError('The member has not been evaluated, the fitness has not yet been set.') + return self._fitness + + @fitness.setter + def fitness(self, value): + if self.is_evaluated: + raise MemberAlreadyEvaluatedError('The member has already been evaluated, the fitness can only ever be set once. Create a new member instead!') + if np.isnan(value): + raise ValueError('fitness values cannot be NaN, this is an error!') + self._fitness = value + + @property + def is_evaluated(self) -> bool: + return (self._fitness is not None) + + def __str__(self): + return repr(self) + + def __repr__(self): + if self.is_evaluated: + return f'{self.__class__.__name__}<{self.fitness}>' + else: + return f'{self.__class__.__name__}<>' + + +# ========================================================================= # +# Population # +# ========================================================================= # + + +PopulationHint = List[Member] + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/_module.py b/ruck/_module.py new file mode 100644 index 0000000..c3656b4 --- /dev/null +++ b/ruck/_module.py @@ -0,0 +1,67 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from typing import Any +from typing import Dict + +from ruck._history import StatsGroup +from ruck._member import PopulationHint +from ruck._util.args import HParamsMixin + + +# ========================================================================= # +# Module # +# ========================================================================= # + + +class EaModule(HParamsMixin): + + # OVERRIDE + + def get_stats_groups(self) -> Dict[str, StatsGroup]: + return {} + + def get_progress_stats(self): + return ('evals', 'fit:max',) + + @property + def num_generations(self) -> int: + raise NotImplementedError + + def gen_starting_population(self) -> PopulationHint: + raise NotImplementedError + + def generate_offspring(self, population: PopulationHint) -> PopulationHint: + raise NotImplementedError + + def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: + raise NotImplementedError + + def evaluate_member(self, value: Any) -> float: + raise NotImplementedError + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/_train.py b/ruck/_train.py new file mode 100644 index 0000000..a842a4f --- /dev/null +++ b/ruck/_train.py @@ -0,0 +1,175 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +import logging +import numpy as np +from tqdm import tqdm + +from ruck._history import HallOfFame +from ruck._history import Logbook +from ruck._history import StatsGroup +from ruck._member import Member +from ruck._member import PopulationHint +from ruck._module import EaModule + + +log = logging.getLogger(__name__) + + +# ========================================================================= # +# Utils Trainer # +# ========================================================================= # + + +def _check_population(population: PopulationHint, required_size: int) -> PopulationHint: + assert len(population) > 0, 'population must not be empty' + assert len(population) == required_size, 'population size is invalid' + assert all(isinstance(member, Member) for member in population), 'items in population are not members' + return population + + +# def _get_batch_size(total: int) -> int: +# resources = ray.available_resources() +# if 'CPU' not in resources: +# return total +# else: +# cpus = int(resources['CPU']) +# batch_size = (total + cpus - 1) // cpus +# return batch_size + + +# ========================================================================= # +# Evaluate Helper # +# ========================================================================= # + + +def _eval_sequential(population: PopulationHint, eval_fn): + return [eval_fn(member.value) for member in population] + + +# _evaluate_ray = ray.remote(_eval_sequential) + + +# def _eval_multiproc(population: PopulationHint, eval_fn): +# member_batches = iter_chunks(population, chunk_size=_get_batch_size(len(population))) +# score_batches = ray.get([_evaluate_ray.remote(members, eval_fn=eval_fn) for members in member_batches]) +# return [score for score_batch in score_batches for score in score_batch] + + +# ========================================================================= # +# Evaluate Invalid # +# ========================================================================= # + + +def _evaluate_invalid(population: PopulationHint, eval_fn): + unevaluated = [member for member in population if not member.is_evaluated] + # get scores + scores = _eval_sequential(unevaluated, eval_fn) + # set values + for member, score in zip(unevaluated, scores): + member.fitness = score + # return number of evaluations + return len(unevaluated) + + +# ========================================================================= # +# Functional Trainer # +# ========================================================================= # + + +def yield_population_steps(module: EaModule): + # 1. create population + population = module.gen_starting_population() + population_size = len(population) + population = _check_population(population, required_size=population_size) + + # 2. evaluate population + evaluations = _evaluate_invalid(population, eval_fn=module.evaluate_member) + + # yield initial population + yield 0, population, evaluations, population + + # training loop + for i in range(1, module.num_generations+1): + # 1. generate offspring + offspring = module.generate_offspring(population) + # 2. evaluate + evaluations = _evaluate_invalid(offspring, eval_fn=module.evaluate_member) + # 3. select + population = module.select_population(population, offspring) + population = _check_population(population, required_size=population_size) + + # yield steps + yield i, offspring, evaluations, population + + +# ========================================================================= # +# Class Trainer # +# ========================================================================= # + + +class Trainer(object): + + def __init__( + self, + progress: bool = True, + history_n_best: int = 5, + ): + self._progress = progress + self._history_n_best = history_n_best + assert self._history_n_best > 0 + + def fit(self, module: EaModule): + assert isinstance(module, EaModule) + # history trackers + logbook, halloffame = self._create_default_trackers(module) + # progress bar and training loop + with tqdm(total=module.num_generations+1, desc='generation', disable=not self._progress, ncols=120) as p: + for gen, offspring, evals, population in yield_population_steps(module): + # update statistics with new population + halloffame.update(offspring) + stats = logbook.record(population, gen=gen, evals=evals) + # update progress bar + p.update() + p.set_postfix({k: stats[k] for k in module.get_progress_stats()}) + # done + return population, logbook, halloffame + + def _create_default_trackers(self, module: EaModule): + halloffame = HallOfFame( + n_best=self._history_n_best, + maximize=True, + ) + logbook = Logbook( + 'gen', 'evals', + fit=StatsGroup(lambda pop: [m.fitness for m in pop], min=np.min, max=np.max, mean=np.mean), + **module.get_stats_groups() + ) + return logbook, halloffame + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/_util/__init__.py b/ruck/_util/__init__.py new file mode 100644 index 0000000..9a05a47 --- /dev/null +++ b/ruck/_util/__init__.py @@ -0,0 +1,23 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ diff --git a/ruck/_util/args.py b/ruck/_util/args.py new file mode 100644 index 0000000..4d25765 --- /dev/null +++ b/ruck/_util/args.py @@ -0,0 +1,67 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from argparse import Namespace +from typing import Optional +from typing import Sequence + + +class HParamsMixin(object): + + __hparams = None + + def save_hyperparameters(self, ignore: Optional[Sequence[str]] = None, include: Optional[Sequence[str]] = None): + import inspect + import warnings + # get ignored values + ignored = set() if (ignore is None) else set(ignore) + included = set() if (include is None) else set(include) + assert all(str.isidentifier(k) for k in ignored) + assert all(str.isidentifier(k) for k in included) + # get function params & signature + locals = inspect.currentframe().f_back.f_locals + params = inspect.signature(self.__class__.__init__) + # get values + (self_param, *params) = params.parameters.items() + # check that self is correct & skip it + assert self_param[0] == 'self' + assert locals[self_param[0]] is self + # get other values + values = {} + for k, v in params: + if k in ignored: continue + if v.kind == v.VAR_KEYWORD: warnings.warn('variable keywords argument saved, consider converting to explicit arguments.') + if v.kind == v.VAR_POSITIONAL: warnings.warn('variable positional argument saved, consider converting to explicit named arguments.') + values[k] = locals[k] + # get extra values + for k in included: + assert k != 'self' + assert k not in values, 'k has already been included' + values[k] = locals[k] + # done! + self.__hparams = Namespace(**values) + + @property + def hparams(self): + return self.__hparams diff --git a/ruck/_util/random.py b/ruck/_util/random.py new file mode 100644 index 0000000..5fca20f --- /dev/null +++ b/ruck/_util/random.py @@ -0,0 +1,48 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +import numpy as np + + +# ========================================================================= # +# From https://github.com/nmichlo/disent +# ========================================================================= # + + +def random_choice_prng(a, size=None, replace=True, p=None): + # create seeded pseudo random number generator + # - built in np.random.choice cannot handle large values: https://github.com/numpy/numpy/issues/5299#issuecomment-497915672 + # - PCG64 is the default: https://numpy.org/doc/stable/reference/random/bit_generators/index.html + # - PCG64 has good statistical properties and is fast: https://numpy.org/doc/stable/reference/random/performance.html + g = np.random.Generator(np.random.PCG64(seed=np.random.randint(0, 2**32))) + # sample indices + choices = g.choice(a, size=size, replace=replace, p=p) + # done! + return choices + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/functional/__init__.py b/ruck/functional/__init__.py new file mode 100644 index 0000000..be45628 --- /dev/null +++ b/ruck/functional/__init__.py @@ -0,0 +1,30 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from ruck.functional._mate import * +from ruck.functional._mutate import * +from ruck.functional._select import * + +# helper -- should be replaced +from ruck.functional._helper import * diff --git a/ruck/functional/_helper.py b/ruck/functional/_helper.py new file mode 100644 index 0000000..a0e43e2 --- /dev/null +++ b/ruck/functional/_helper.py @@ -0,0 +1,111 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from ruck import Member +from ruck import PopulationHint +from ruck.functional import MateFnHint +from ruck.functional import MutateFnHint + + +import random + + +# ========================================================================= # +# Crossover & Mutate Helpers # +# ========================================================================= # + + +def apply_mate_and_mutate( + population: PopulationHint, + mate: MateFnHint, + mutate: MutateFnHint, + p_mate: float, + p_mutate: float, +) -> PopulationHint: + """ + Apply crossover AND mutation. + Modified individuals are independent of the population, + requiring their fitness to be re-evaluated. + + NB: Mate & Mutate should return copies of the received values. + + ** Modified from DEAP ** + """ + offspring = list(population) + + # EXTRA + random.shuffle(offspring) + + # Apply crossover + for i in range(1, len(offspring), 2): + if random.random() < p_mate: + value0, value1 = mate(offspring[i - 1].value, offspring[i].value) + offspring[i - 1], offspring[i] = Member(value0), Member(value1) + + # Apply Mutation + for i in range(len(offspring)): + if random.random() < p_mutate: + value = mutate(offspring[i].value) + offspring[i] = Member(value) + + return offspring + + +def apply_mate_or_mutate_or_reproduce( + population: PopulationHint, + mate: MateFnHint, + mutate: MutateFnHint, + p_mate: float, + p_mutate: float, + num_offspring: int, # lambda_ +) -> PopulationHint: + """ + Apply crossover OR mutation OR reproduction + Modified individuals are independent of the population, + requiring their fitness to be re-evaluated. + + NB: Mate & Mutate should return copies of the received values. + + ** Modified from DEAP ** + """ + assert (p_mate + p_mutate) <= 1.0, 'The sum of the crossover and mutation probabilities must be smaller or equal to 1.0.' + + offspring = [] + for _ in range(num_offspring): + op_choice = random.random() + if op_choice < p_mate: + # Apply crossover + ind1, ind2 = random.sample(population, 2) + value, _ = mate(ind1.value, ind2.value) + offspring.append(Member(value)) + elif op_choice < p_mate + p_mutate: + # Apply mutation + ind = random.choice(population) + value = mutate(ind.value) + offspring.append(Member(value)) + else: + # Apply reproduction + offspring.append(random.choice(population)) + + return offspring diff --git a/ruck/functional/_mate.py b/ruck/functional/_mate.py new file mode 100644 index 0000000..64de482 --- /dev/null +++ b/ruck/functional/_mate.py @@ -0,0 +1,51 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from typing import Any +from typing import Callable +from typing import Tuple +import numpy as np + + +# ========================================================================= # +# Mate # +# ========================================================================= # + + +MateFnHint = Callable[[Any, Any], Tuple[Any, Any]] + + +def mate_crossover_1d(a: np.ndarray, b: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + assert a.ndim == 1 + assert a.shape == b.shape + i, j = np.random.randint(0, len(a), size=2) + i, j = min(i, j), max(i, j) + new_a = np.concatenate([a[:i], b[i:j], a[j:]], axis=0) + new_b = np.concatenate([b[:i], a[i:j], b[j:]], axis=0) + return new_a, new_b + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/functional/_mutate.py b/ruck/functional/_mutate.py new file mode 100644 index 0000000..5fffd19 --- /dev/null +++ b/ruck/functional/_mutate.py @@ -0,0 +1,54 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from typing import Any +from typing import Callable + +import numpy as np + + +# ========================================================================= # +# Mutate # +# ========================================================================= # + + +MutateFnHint = Callable[[Any], Any] + + +def mutate_flip_bits(a: np.ndarray, p: float = 0.05): + return a ^ (np.random.random(a.shape) < p) + + +def mutate_flip_bit_types(a: np.ndarray, p: float = 0.05): + if np.random.random() < 0.5: + # flip set bits + return a ^ ((np.random.random(a.shape) < p) & a) + else: + # flip unset bits + return a ^ ((np.random.random(a.shape) < p) & ~a) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py new file mode 100644 index 0000000..9ea0c85 --- /dev/null +++ b/ruck/functional/_select.py @@ -0,0 +1,62 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import random +from typing import Callable + +from ruck import PopulationHint +from ruck._util.random import random_choice_prng + + +# ========================================================================= # +# Select # +# ========================================================================= # + + +SelectFnHint = Callable[[PopulationHint, int], PopulationHint] + + +def select_best(population: PopulationHint, num: int) -> PopulationHint: + return sorted(population, key=lambda m: m.fitness, reverse=True)[:num] + + +def select_worst(population: PopulationHint, num: int) -> PopulationHint: + return sorted(population, key=lambda m: m.fitness, reverse=False)[:num] + + +def select_random(population: PopulationHint, num: int) -> PopulationHint: + return random_choice_prng(population, size=num, replace=False) + + +def select_tournament(population: PopulationHint, num: int, k: int = 3) -> PopulationHint: + key = lambda m: m.fitness + return [ + max(random.sample(population, k=k), key=key) + for _ in range(num) + ] + + +# ========================================================================= # +# Selection # +# ========================================================================= # From c7406e36ac54a6e871eb3aa4b7bcf0b9516d0a73 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Thu, 23 Sep 2021 15:27:06 +0200 Subject: [PATCH 03/18] update --- examples/onemax.py | 27 ++++--- ruck/functional/_helper.py | 144 ++++++++++++++++++++++++------------- 2 files changed, 109 insertions(+), 62 deletions(-) diff --git a/examples/onemax.py b/examples/onemax.py index c7676ba..e3b8b34 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -38,8 +38,8 @@ class OneMaxModule(EaModule): def __init__( self, generations: int = 40, - population_size: int = 128, - member_size: int = 10_000, + population_size: int = 300, + member_size: int = 100, p_mate: float = 0.5, p_mutate: float = 0.5, ): @@ -56,26 +56,30 @@ def gen_starting_population(self) -> PopulationHint: for _ in range(self.hparams.population_size) ] + def evaluate_member(self, value: np.ndarray) -> float: + # this is a large reason why the deap version is slow, + # it does not make use of numpy operations + return value.sum() + def generate_offspring(self, population: PopulationHint) -> PopulationHint: - # SEE: R.factory_ea_alg -- TODO: make it easier to swap! + # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd + offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 + # vary population return R.apply_mate_and_mutate( - population=R.select_tournament(population, len(population)), # tools.selNSGA2 - mate=R.mate_crossover_1d, - mutate=R.mutate_flip_bit_types, + population=offspring, + mate_fn=R.mate_crossover_1d, + mutate_fn=lambda a: R.mutate_flip_bits(a, p=0.05), p_mate=self.hparams.p_mate, p_mutate=self.hparams.p_mutate, ) def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: + # Same as deap.algorithms.eaSimple return offspring - def evaluate_member(self, value: np.ndarray) -> float: - return value.mean() - if __name__ == '__main__': - # about 10x faster than the onemax (0.18s vs 2.6s) - # numpy version given for deap + # about 18x faster than deap's numpy onemax example (0.145s vs 2.6s) # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py logging.basicConfig(level=logging.INFO) @@ -89,5 +93,6 @@ def Timer(name: str): with Timer('ruck:trainer'): module = OneMaxModule(generations=40, population_size=300, member_size=100) population, logbook, halloffame = Trainer(progress=False).fit(module) + print(logbook[0]) print(logbook[-1]) diff --git a/ruck/functional/_helper.py b/ruck/functional/_helper.py index a0e43e2..bcbec4f 100644 --- a/ruck/functional/_helper.py +++ b/ruck/functional/_helper.py @@ -29,6 +29,7 @@ import random +import numpy as np # ========================================================================= # @@ -36,76 +37,117 @@ # ========================================================================= # -def apply_mate_and_mutate( +def apply_mate( population: PopulationHint, - mate: MateFnHint, - mutate: MutateFnHint, - p_mate: float, - p_mutate: float, + mate_fn: MateFnHint, + p: float = 0.5, ) -> PopulationHint: - """ - Apply crossover AND mutation. - Modified individuals are independent of the population, - requiring their fitness to be re-evaluated. + # randomize order so we have randomized pairs + offspring = list(population) + np.random.shuffle(offspring) + # apply mating to population -- why is this faster than pre-generating the boolean mask? + for i in range(1, len(population), 2): + if random.random() < p: + v0, v1 = mate_fn(offspring[i-1].value, offspring[i].value) + offspring[i-1], offspring[i] = Member(v0), Member(v1) + # done! + return offspring - NB: Mate & Mutate should return copies of the received values. - ** Modified from DEAP ** - """ - offspring = list(population) +def apply_mutate( + population: PopulationHint, + mutate_fn: MutateFnHint, + p: float = 0.5, +) -> PopulationHint: + elem_mask = np.random.random(size=len(population)) < p + # apply mutate to population + return [ + Member(mutate_fn(m.value)) if do_mutate else m + for m, do_mutate in zip(population, elem_mask) + ] - # EXTRA - random.shuffle(offspring) - # Apply crossover - for i in range(1, len(offspring), 2): - if random.random() < p_mate: - value0, value1 = mate(offspring[i - 1].value, offspring[i].value) - offspring[i - 1], offspring[i] = Member(value0), Member(value1) +def apply_mate_and_mutate( + population: PopulationHint, + mate_fn: MateFnHint, + mutate_fn: MutateFnHint, + p_mate: float = 0.5, + p_mutate: float = 0.5, +) -> PopulationHint: + """ + Apply crossover AND mutation - # Apply Mutation - for i in range(len(offspring)): - if random.random() < p_mutate: - value = mutate(offspring[i].value) - offspring[i] = Member(value) + NOTE: + - Modified individuals need their fitness re-evaluated + - Mate & Mutate should always return copies of the received values. - return offspring + ** Should be equivalent to varAnd from DEAP ** + """ + population = apply_mate(population, mate_fn, p=p_mate) + population = apply_mutate(population, mutate_fn, p=p_mutate) + return population def apply_mate_or_mutate_or_reproduce( population: PopulationHint, - mate: MateFnHint, - mutate: MutateFnHint, - p_mate: float, - p_mutate: float, num_offspring: int, # lambda_ + mate_fn: MateFnHint, + mutate_fn: MutateFnHint, + p_mate: float = 0.5, + p_mutate: float = 0.5, ) -> PopulationHint: """ Apply crossover OR mutation OR reproduction - Modified individuals are independent of the population, - requiring their fitness to be re-evaluated. - NB: Mate & Mutate should return copies of the received values. + NOTE: + - Modified individuals need their fitness re-evaluated + - Mate & Mutate should always return copies of the received values. - ** Modified from DEAP ** + ** Should be equivalent to varOr from DEAP, but significantly faster for larger populations ** """ assert (p_mate + p_mutate) <= 1.0, 'The sum of the crossover and mutation probabilities must be smaller or equal to 1.0.' - offspring = [] - for _ in range(num_offspring): - op_choice = random.random() - if op_choice < p_mate: - # Apply crossover - ind1, ind2 = random.sample(population, 2) - value, _ = mate(ind1.value, ind2.value) - offspring.append(Member(value)) - elif op_choice < p_mate + p_mutate: - # Apply mutation - ind = random.choice(population) - value = mutate(ind.value) - offspring.append(Member(value)) - else: - # Apply reproduction - offspring.append(random.choice(population)) + pairs = np.random.randint(0, len(population), size=[2, num_offspring]) + rand = np.random.random(len(population)) - return offspring + def _fn(a: int, b: int, r: float): + if r < p_mate: return Member(mate_fn(population[a].value, population[b].value)[0]) # Apply crossover + elif r < p_mate + p_mutate: return Member(mutate_fn(population[a].value)) # Apply mutation + else: return population[a] # Apply reproduction + + # np.vectorize can help, but only about 10% faster for large populations, and 3x slower for tiny populations + return [_fn(a, b, r) for a, b, r in zip(pairs[0], pairs[1], rand)] + + +# ========================================================================= # +# Gen & Select # +# ========================================================================= # + + +# def factory_ea_alg( +# mate_fn, +# mutate_fn, +# select_fn, +# mode: str = 'simple', +# p_mate: float = 0.5, +# p_mutate: float = 0.5, +# offspring_num: int = 128, +# population_num: int = 128, +# ): +# if mode == 'simple': +# def _generate(population): return apply_mate_and_mutate(population=select_fn(population, len(population)), p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) +# def _select(population, offspring): return offspring +# elif mode == 'mu_plus_lambda': +# def _generate(population): return apply_mate_or_mutate_or_reproduce(population, num_offspring=offspring_num, p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) +# def _select(population, offspring): return select_fn(population + offspring, population_num) +# elif mode == 'mu_comma_lambda': +# def _generate(population): return apply_mate_or_mutate_or_reproduce(population, num_offspring=offspring_num, p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) +# def _select(population, offspring): return select_fn(offspring, population_num) +# else: +# raise KeyError(f'invalid mode: {repr(mode)}') +# return _generate, _select + + +# # ========================================================================= # +# # END # +# # ========================================================================= # From 7381c726df81439a4f6e3955eaad0431481ce6cc Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Thu, 23 Sep 2021 18:12:45 +0200 Subject: [PATCH 04/18] adjust api for easier multi-threading --- examples/onemax.py | 17 ++++++--------- ruck/_module.py | 15 ++++++++----- ruck/_train.py | 43 ++++++++++++++++++++++---------------- ruck/functional/_helper.py | 8 +++---- ruck/functional/_select.py | 2 +- 5 files changed, 46 insertions(+), 39 deletions(-) diff --git a/examples/onemax.py b/examples/onemax.py index e3b8b34..0879b8b 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -37,7 +37,6 @@ class OneMaxModule(EaModule): def __init__( self, - generations: int = 40, population_size: int = 300, member_size: int = 100, p_mate: float = 0.5, @@ -46,17 +45,13 @@ def __init__( super().__init__() self.save_hyperparameters() - @property - def num_generations(self) -> int: - return self.hparams.generations - def gen_starting_population(self) -> PopulationHint: return [ Member(np.random.random(self.hparams.member_size) < 0.5) for _ in range(self.hparams.population_size) ] - def evaluate_member(self, value: np.ndarray) -> float: + def evaluate_value(self, value: np.ndarray) -> float: # this is a large reason why the deap version is slow, # it does not make use of numpy operations return value.sum() @@ -88,11 +83,11 @@ def select_population(self, population: PopulationHint, offspring: PopulationHin def Timer(name: str): t = time.time() yield - print(name, time.time() - t) + print(name, time.time() - t, 'seconds') with Timer('ruck:trainer'): - module = OneMaxModule(generations=40, population_size=300, member_size=100) - population, logbook, halloffame = Trainer(progress=False).fit(module) + module = OneMaxModule(population_size=300, member_size=100) + population, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) - print(logbook[0]) - print(logbook[-1]) + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) diff --git a/ruck/_module.py b/ruck/_module.py index c3656b4..7828337 100644 --- a/ruck/_module.py +++ b/ruck/_module.py @@ -24,6 +24,7 @@ from typing import Any from typing import Dict +from typing import List from ruck._history import StatsGroup from ruck._member import PopulationHint @@ -37,17 +38,21 @@ class EaModule(HParamsMixin): - # OVERRIDE + # OVERRIDABLE DEFAULTS def get_stats_groups(self) -> Dict[str, StatsGroup]: + # additional stats to be recorded return {} def get_progress_stats(self): + # which stats are included in the progress bar return ('evals', 'fit:max',) - @property - def num_generations(self) -> int: - raise NotImplementedError + def evaluate_values(self, values: List[Any]) -> List[float]: + # we include this here so we can easily override to add multi-threading support + return [self.evaluate_value(value) for value in values] + + # REQUIRED def gen_starting_population(self) -> PopulationHint: raise NotImplementedError @@ -58,7 +63,7 @@ def generate_offspring(self, population: PopulationHint) -> PopulationHint: def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: raise NotImplementedError - def evaluate_member(self, value: Any) -> float: + def evaluate_value(self, value: Any): raise NotImplementedError diff --git a/ruck/_train.py b/ruck/_train.py index a842a4f..c90a6b5 100644 --- a/ruck/_train.py +++ b/ruck/_train.py @@ -22,8 +22,9 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ - +import itertools import logging + import numpy as np from tqdm import tqdm @@ -65,8 +66,8 @@ def _check_population(population: PopulationHint, required_size: int) -> Populat # ========================================================================= # -def _eval_sequential(population: PopulationHint, eval_fn): - return [eval_fn(member.value) for member in population] +# def _eval_sequential(population: PopulationHint, eval_fn): +# return [eval_fn(member.value) for member in population] # _evaluate_ray = ray.remote(_eval_sequential) @@ -83,14 +84,16 @@ def _eval_sequential(population: PopulationHint, eval_fn): # ========================================================================= # -def _evaluate_invalid(population: PopulationHint, eval_fn): - unevaluated = [member for member in population if not member.is_evaluated] - # get scores - scores = _eval_sequential(unevaluated, eval_fn) - # set values - for member, score in zip(unevaluated, scores): - member.fitness = score - # return number of evaluations +def _evaluate_unevaluated(module: EaModule, members: PopulationHint) -> int: + # get unevaluated members + unevaluated = [m for m in members if not m.is_evaluated] + # get fitness values + fitnesses = module.evaluate_values([m.value for m in unevaluated]) + # save fitness values + assert len(unevaluated) == len(fitnesses) + for m, f in zip(unevaluated, fitnesses): + m.fitness = f + # return the count return len(unevaluated) @@ -106,23 +109,23 @@ def yield_population_steps(module: EaModule): population = _check_population(population, required_size=population_size) # 2. evaluate population - evaluations = _evaluate_invalid(population, eval_fn=module.evaluate_member) + evals = _evaluate_unevaluated(module, population) # yield initial population - yield 0, population, evaluations, population + yield 0, population, population, evals # training loop - for i in range(1, module.num_generations+1): + for i in itertools.count(1): # 1. generate offspring offspring = module.generate_offspring(population) # 2. evaluate - evaluations = _evaluate_invalid(offspring, eval_fn=module.evaluate_member) + evals = _evaluate_unevaluated(module, offspring) # 3. select population = module.select_population(population, offspring) population = _check_population(population, required_size=population_size) # yield steps - yield i, offspring, evaluations, population + yield i, population, offspring, evals # ========================================================================= # @@ -134,11 +137,15 @@ class Trainer(object): def __init__( self, + generations: int = 100, progress: bool = True, history_n_best: int = 5, + offspring_generator=yield_population_steps, ): + self._generations = generations self._progress = progress self._history_n_best = history_n_best + self._offspring_generator = offspring_generator assert self._history_n_best > 0 def fit(self, module: EaModule): @@ -146,8 +153,8 @@ def fit(self, module: EaModule): # history trackers logbook, halloffame = self._create_default_trackers(module) # progress bar and training loop - with tqdm(total=module.num_generations+1, desc='generation', disable=not self._progress, ncols=120) as p: - for gen, offspring, evals, population in yield_population_steps(module): + with tqdm(total=self._generations+1, desc='generation', disable=not self._progress, ncols=120) as p: + for gen, population, offspring, evals in itertools.islice(self._offspring_generator(module), self._generations): # update statistics with new population halloffame.update(offspring) stats = logbook.record(population, gen=gen, evals=evals) diff --git a/ruck/functional/_helper.py b/ruck/functional/_helper.py index bcbec4f..fac5e47 100644 --- a/ruck/functional/_helper.py +++ b/ruck/functional/_helper.py @@ -22,10 +22,10 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -from ruck import Member -from ruck import PopulationHint -from ruck.functional import MateFnHint -from ruck.functional import MutateFnHint +from ruck._member import Member +from ruck._member import PopulationHint +from ruck.functional._mate import MateFnHint +from ruck.functional._mutate import MutateFnHint import random diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index 9ea0c85..f3e09cc 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -25,7 +25,7 @@ import random from typing import Callable -from ruck import PopulationHint +from ruck._member import PopulationHint from ruck._util.random import random_choice_prng From 55d9dc54b974892c0c8416e11a8d9c26c0a0e7ca Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Thu, 23 Sep 2021 18:16:13 +0200 Subject: [PATCH 05/18] renamed PopulationHint to Population --- examples/onemax.py | 8 ++++---- ruck/__init__.py | 2 +- ruck/_history.py | 8 ++++---- ruck/_member.py | 2 +- ruck/_module.py | 8 ++++---- ruck/_train.py | 6 +++--- ruck/functional/_helper.py | 18 +++++++++--------- ruck/functional/_select.py | 12 ++++++------ 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/examples/onemax.py b/examples/onemax.py index 0879b8b..9b82096 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -30,7 +30,7 @@ from ruck import * from ruck import EaModule -from ruck import PopulationHint +from ruck import Population class OneMaxModule(EaModule): @@ -45,7 +45,7 @@ def __init__( super().__init__() self.save_hyperparameters() - def gen_starting_population(self) -> PopulationHint: + def gen_starting_population(self) -> Population: return [ Member(np.random.random(self.hparams.member_size) < 0.5) for _ in range(self.hparams.population_size) @@ -56,7 +56,7 @@ def evaluate_value(self, value: np.ndarray) -> float: # it does not make use of numpy operations return value.sum() - def generate_offspring(self, population: PopulationHint) -> PopulationHint: + def generate_offspring(self, population: Population) -> Population: # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 # vary population @@ -68,7 +68,7 @@ def generate_offspring(self, population: PopulationHint) -> PopulationHint: p_mutate=self.hparams.p_mutate, ) - def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: + def select_population(self, population: Population, offspring: Population) -> Population: # Same as deap.algorithms.eaSimple return offspring diff --git a/ruck/__init__.py b/ruck/__init__.py index e2a8430..e1ba995 100644 --- a/ruck/__init__.py +++ b/ruck/__init__.py @@ -25,7 +25,7 @@ # base from ruck._member import Member -from ruck._member import PopulationHint +from ruck._member import Population from ruck._module import EaModule # training diff --git a/ruck/_history.py b/ruck/_history.py index 5943c64..d881e04 100644 --- a/ruck/_history.py +++ b/ruck/_history.py @@ -30,7 +30,7 @@ from typing import List -from ruck._member import PopulationHint +from ruck._member import Population # ========================================================================= # @@ -109,7 +109,7 @@ def register_stats_group(self, name: str, stats_group: StatsGroup): self._all_ordered_keys.extend(f'{name}:{key}' for key in stats_group.keys) return self - def record(self, population: 'PopulationHint', **external_values): + def record(self, population: 'Population', **external_values): # extra stats if set(external_values.keys()) != set(self._external_keys): raise KeyError(f'required external_values: {sorted(self._external_keys)}, got: {sorted(external_values.keys())}') @@ -162,7 +162,7 @@ def __init__(self, n_best: int = 5, maximize: bool = True): self._heap = [] # element 0 is always the smallest self._scores = {} - def update(self, population: PopulationHint): + def update(self, population: Population): best = sorted(population, key=lambda m: m.fitness, reverse=True)[:self._n_best] # add the best for member in best: @@ -181,7 +181,7 @@ def update(self, population: PopulationHint): del self._scores[removed.fitness] @property - def members(self) -> PopulationHint: + def members(self) -> Population: return [m.member for m in sorted(self._heap, reverse=True)] @property diff --git a/ruck/_member.py b/ruck/_member.py index 69152a7..79b74ff 100644 --- a/ruck/_member.py +++ b/ruck/_member.py @@ -84,7 +84,7 @@ def __repr__(self): # ========================================================================= # -PopulationHint = List[Member] +Population = List[Member] # ========================================================================= # diff --git a/ruck/_module.py b/ruck/_module.py index 7828337..4d3fb1b 100644 --- a/ruck/_module.py +++ b/ruck/_module.py @@ -27,7 +27,7 @@ from typing import List from ruck._history import StatsGroup -from ruck._member import PopulationHint +from ruck._member import Population from ruck._util.args import HParamsMixin @@ -54,13 +54,13 @@ def evaluate_values(self, values: List[Any]) -> List[float]: # REQUIRED - def gen_starting_population(self) -> PopulationHint: + def gen_starting_population(self) -> Population: raise NotImplementedError - def generate_offspring(self, population: PopulationHint) -> PopulationHint: + def generate_offspring(self, population: Population) -> Population: raise NotImplementedError - def select_population(self, population: PopulationHint, offspring: PopulationHint) -> PopulationHint: + def select_population(self, population: Population, offspring: Population) -> Population: raise NotImplementedError def evaluate_value(self, value: Any): diff --git a/ruck/_train.py b/ruck/_train.py index c90a6b5..cc395d9 100644 --- a/ruck/_train.py +++ b/ruck/_train.py @@ -32,7 +32,7 @@ from ruck._history import Logbook from ruck._history import StatsGroup from ruck._member import Member -from ruck._member import PopulationHint +from ruck._member import Population from ruck._module import EaModule @@ -44,7 +44,7 @@ # ========================================================================= # -def _check_population(population: PopulationHint, required_size: int) -> PopulationHint: +def _check_population(population: Population, required_size: int) -> Population: assert len(population) > 0, 'population must not be empty' assert len(population) == required_size, 'population size is invalid' assert all(isinstance(member, Member) for member in population), 'items in population are not members' @@ -84,7 +84,7 @@ def _check_population(population: PopulationHint, required_size: int) -> Populat # ========================================================================= # -def _evaluate_unevaluated(module: EaModule, members: PopulationHint) -> int: +def _evaluate_unevaluated(module: EaModule, members: Population) -> int: # get unevaluated members unevaluated = [m for m in members if not m.is_evaluated] # get fitness values diff --git a/ruck/functional/_helper.py b/ruck/functional/_helper.py index fac5e47..8e4c386 100644 --- a/ruck/functional/_helper.py +++ b/ruck/functional/_helper.py @@ -23,7 +23,7 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ from ruck._member import Member -from ruck._member import PopulationHint +from ruck._member import Population from ruck.functional._mate import MateFnHint from ruck.functional._mutate import MutateFnHint @@ -38,10 +38,10 @@ def apply_mate( - population: PopulationHint, + population: Population, mate_fn: MateFnHint, p: float = 0.5, -) -> PopulationHint: +) -> Population: # randomize order so we have randomized pairs offspring = list(population) np.random.shuffle(offspring) @@ -55,10 +55,10 @@ def apply_mate( def apply_mutate( - population: PopulationHint, + population: Population, mutate_fn: MutateFnHint, p: float = 0.5, -) -> PopulationHint: +) -> Population: elem_mask = np.random.random(size=len(population)) < p # apply mutate to population return [ @@ -68,12 +68,12 @@ def apply_mutate( def apply_mate_and_mutate( - population: PopulationHint, + population: Population, mate_fn: MateFnHint, mutate_fn: MutateFnHint, p_mate: float = 0.5, p_mutate: float = 0.5, -) -> PopulationHint: +) -> Population: """ Apply crossover AND mutation @@ -89,13 +89,13 @@ def apply_mate_and_mutate( def apply_mate_or_mutate_or_reproduce( - population: PopulationHint, + population: Population, num_offspring: int, # lambda_ mate_fn: MateFnHint, mutate_fn: MutateFnHint, p_mate: float = 0.5, p_mutate: float = 0.5, -) -> PopulationHint: +) -> Population: """ Apply crossover OR mutation OR reproduction diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index f3e09cc..b9b8804 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -25,7 +25,7 @@ import random from typing import Callable -from ruck._member import PopulationHint +from ruck._member import Population from ruck._util.random import random_choice_prng @@ -34,22 +34,22 @@ # ========================================================================= # -SelectFnHint = Callable[[PopulationHint, int], PopulationHint] +SelectFnHint = Callable[[Population, int], Population] -def select_best(population: PopulationHint, num: int) -> PopulationHint: +def select_best(population: Population, num: int) -> Population: return sorted(population, key=lambda m: m.fitness, reverse=True)[:num] -def select_worst(population: PopulationHint, num: int) -> PopulationHint: +def select_worst(population: Population, num: int) -> Population: return sorted(population, key=lambda m: m.fitness, reverse=False)[:num] -def select_random(population: PopulationHint, num: int) -> PopulationHint: +def select_random(population: Population, num: int) -> Population: return random_choice_prng(population, size=num, replace=False) -def select_tournament(population: PopulationHint, num: int, k: int = 3) -> PopulationHint: +def select_tournament(population: Population, num: int, k: int = 3) -> Population: key = lambda m: m.fitness return [ max(random.sample(population, k=k), key=key) From 4024343e07882d132dd1bea5a2f6c4d9c957a7d8 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 12:32:16 +0200 Subject: [PATCH 06/18] typing --- examples/onemax.py | 32 +++++++++++---- ruck/_history.py | 35 +++++++++-------- ruck/_member.py | 34 +++++++++++----- ruck/_module.py | 26 +++++++------ ruck/_train.py | 47 +++++++---------------- ruck/functional/_select.py | 2 +- ruck/{_util => util}/__init__.py | 8 ++++ ruck/{_util/args.py => util/_args.py} | 0 ruck/{_util/random.py => util/_random.py} | 0 9 files changed, 105 insertions(+), 79 deletions(-) rename ruck/{_util => util}/__init__.py (87%) rename ruck/{_util/args.py => util/_args.py} (100%) rename ruck/{_util/random.py => util/_random.py} (100%) diff --git a/examples/onemax.py b/examples/onemax.py index 9b82096..4ead9d0 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -22,9 +22,9 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -import contextlib import logging -import time +from typing import Any +from typing import List import numpy as np @@ -33,8 +33,14 @@ from ruck import Population +# ========================================================================= # +# Module # +# ========================================================================= # + + class OneMaxModule(EaModule): + def __init__( self, population_size: int = 300, @@ -45,17 +51,17 @@ def __init__( super().__init__() self.save_hyperparameters() + def evaluate_values(self, values: List[Any]) -> List[float]: + # this is a large reason why the deap version is slow, + # it does not make use of numpy operations + return [value.sum() for value in values] + def gen_starting_population(self) -> Population: return [ Member(np.random.random(self.hparams.member_size) < 0.5) for _ in range(self.hparams.population_size) ] - def evaluate_value(self, value: np.ndarray) -> float: - # this is a large reason why the deap version is slow, - # it does not make use of numpy operations - return value.sum() - def generate_offspring(self, population: Population) -> Population: # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 @@ -73,6 +79,11 @@ def select_population(self, population: Population, offspring: Population) -> Po return offspring +# ========================================================================= # +# Main # +# ========================================================================= # + + if __name__ == '__main__': # about 18x faster than deap's numpy onemax example (0.145s vs 2.6s) # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py @@ -87,7 +98,12 @@ def Timer(name: str): with Timer('ruck:trainer'): module = OneMaxModule(population_size=300, member_size=100) - population, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) print('initial stats:', logbook[0]) print('final stats:', logbook[-1]) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/_history.py b/ruck/_history.py index d881e04..723b48e 100644 --- a/ruck/_history.py +++ b/ruck/_history.py @@ -27,19 +27,24 @@ from typing import Any from typing import Callable from typing import Dict +from typing import Generic from typing import List - +from typing import TypeVar from ruck._member import Population +T = TypeVar('T') +V = TypeVar('V') + + # ========================================================================= # # Type Hints # # ========================================================================= # -ValueFnHint = Callable[[Any], Any] -StatFnHint = Callable[[Any], Any] +ValueFnHint = Callable[[T], V] +StatFnHint = Callable[[V], Any] # ========================================================================= # @@ -47,9 +52,9 @@ # ========================================================================= # -class StatsGroup(object): +class StatsGroup(Generic[T, V]): - def __init__(self, value_fn: ValueFnHint = None, **stats_fns: StatFnHint): + def __init__(self, value_fn: ValueFnHint[T, V] = None, **stats_fns: StatFnHint[V]): assert all(str.isidentifier(key) for key in stats_fns.keys()) assert stats_fns self._value_fn = value_fn @@ -59,7 +64,7 @@ def __init__(self, value_fn: ValueFnHint = None, **stats_fns: StatFnHint): def keys(self) -> List[str]: return list(self._stats_fns.keys()) - def compute(self, value: Any) -> Dict[str, Any]: + def compute(self, value: T) -> Dict[str, Any]: if self._value_fn is not None: value = self._value_fn(value) return { @@ -68,9 +73,9 @@ def compute(self, value: Any) -> Dict[str, Any]: } -class Logbook(object): +class Logbook(Generic[T]): - def __init__(self, *external_keys: str, **stats_groups: StatsGroup): + def __init__(self, *external_keys: str, **stats_groups: StatsGroup[T, Any]): self._all_ordered_keys = [] self._external_keys = [] self._stats_groups = {} @@ -100,7 +105,7 @@ def register_external_stat(self, name: str): self._all_ordered_keys.append(name) return self - def register_stats_group(self, name: str, stats_group: StatsGroup): + def register_stats_group(self, name: str, stats_group: StatsGroup[T, Any]): self._assert_key_available(self._assert_key_available(name)) assert isinstance(stats_group, StatsGroup) assert stats_group not in self._stats_groups.values() @@ -109,7 +114,7 @@ def register_stats_group(self, name: str, stats_group: StatsGroup): self._all_ordered_keys.extend(f'{name}:{key}' for key in stats_group.keys) return self - def record(self, population: 'Population', **external_values): + def record(self, population: Population[T], **external_values): # extra stats if set(external_values.keys()) != set(self._external_keys): raise KeyError(f'required external_values: {sorted(self._external_keys)}, got: {sorted(external_values.keys())}') @@ -153,7 +158,7 @@ class HallOfFameItem: member: Any = dataclasses.field(compare=False) -class HallOfFame(object): +class HallOfFame(Generic[T]): def __init__(self, n_best: int = 5, maximize: bool = True): self._maximize = maximize @@ -162,7 +167,7 @@ def __init__(self, n_best: int = 5, maximize: bool = True): self._heap = [] # element 0 is always the smallest self._scores = {} - def update(self, population: Population): + def update(self, population: Population[T]): best = sorted(population, key=lambda m: m.fitness, reverse=True)[:self._n_best] # add the best for member in best: @@ -181,15 +186,15 @@ def update(self, population: Population): del self._scores[removed.fitness] @property - def members(self) -> Population: + def members(self) -> Population[T]: return [m.member for m in sorted(self._heap, reverse=True)] @property - def values(self) -> List[Any]: + def values(self) -> List[T]: return [m.value for m in self.members] @property - def scores(self) -> List[Any]: + def scores(self) -> List[float]: return [m.fitness for m in self.members] diff --git a/ruck/_member.py b/ruck/_member.py index 79b74ff..9864e3a 100644 --- a/ruck/_member.py +++ b/ruck/_member.py @@ -22,8 +22,10 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -from typing import Any +from typing import Generic from typing import List +from typing import Optional +from typing import TypeVar import numpy as np @@ -41,29 +43,43 @@ class MemberAlreadyEvaluatedError(Exception): pass -class Member(object): +T = TypeVar('T') - def __init__(self, value: Any): + +class Member(Generic[T]): + + def __init__(self, value: T, fitness: float = None): self._value = value self._fitness = None + # set fitness + if fitness is not None: + self.fitness = fitness @property - def value(self) -> Any: + def value(self) -> T: return self._value @property - def fitness(self): + def fitness_unsafe(self) -> Optional[float]: + return self._fitness + + @property + def fitness(self) -> float: if not self.is_evaluated: raise MemberIsNotEvaluatedError('The member has not been evaluated, the fitness has not yet been set.') return self._fitness @fitness.setter - def fitness(self, value): + def fitness(self, fitness: float): if self.is_evaluated: raise MemberAlreadyEvaluatedError('The member has already been evaluated, the fitness can only ever be set once. Create a new member instead!') - if np.isnan(value): + if np.isnan(fitness): raise ValueError('fitness values cannot be NaN, this is an error!') - self._fitness = value + self._fitness = float(fitness) + + def set_fitness(self, fitness: float) -> 'Member[T]': + self.fitness = fitness + return self @property def is_evaluated(self) -> bool: @@ -84,7 +100,7 @@ def __repr__(self): # ========================================================================= # -Population = List[Member] +Population = List[Member[T]] # ========================================================================= # diff --git a/ruck/_module.py b/ruck/_module.py index 4d3fb1b..34c4f06 100644 --- a/ruck/_module.py +++ b/ruck/_module.py @@ -24,11 +24,14 @@ from typing import Any from typing import Dict +from typing import Generic from typing import List +from typing import Sequence +from typing import TypeVar from ruck._history import StatsGroup from ruck._member import Population -from ruck._util.args import HParamsMixin +from ruck.util._args import HParamsMixin # ========================================================================= # @@ -36,34 +39,33 @@ # ========================================================================= # -class EaModule(HParamsMixin): +T = TypeVar('T') + + +class EaModule(Generic[T], HParamsMixin): # OVERRIDABLE DEFAULTS - def get_stats_groups(self) -> Dict[str, StatsGroup]: + def get_stats_groups(self) -> Dict[str, StatsGroup[T, Any]]: # additional stats to be recorded return {} - def get_progress_stats(self): + def get_progress_stats(self) -> Sequence[str]: # which stats are included in the progress bar return ('evals', 'fit:max',) - def evaluate_values(self, values: List[Any]) -> List[float]: - # we include this here so we can easily override to add multi-threading support - return [self.evaluate_value(value) for value in values] - # REQUIRED - def gen_starting_population(self) -> Population: + def gen_starting_population(self) -> Population[T]: raise NotImplementedError - def generate_offspring(self, population: Population) -> Population: + def generate_offspring(self, population: Population[T]) -> Population[T]: raise NotImplementedError - def select_population(self, population: Population, offspring: Population) -> Population: + def select_population(self, population: Population[T], offspring: Population[T]) -> Population[T]: raise NotImplementedError - def evaluate_value(self, value: Any): + def evaluate_values(self, values: List[T]) -> List[float]: raise NotImplementedError diff --git a/ruck/_train.py b/ruck/_train.py index cc395d9..4eb327b 100644 --- a/ruck/_train.py +++ b/ruck/_train.py @@ -24,6 +24,10 @@ import itertools import logging +from typing import Generic +from typing import Iterator +from typing import Tuple +from typing import TypeVar import numpy as np from tqdm import tqdm @@ -39,52 +43,27 @@ log = logging.getLogger(__name__) +T = TypeVar('T') + + # ========================================================================= # # Utils Trainer # # ========================================================================= # -def _check_population(population: Population, required_size: int) -> Population: +def _check_population(population: Population[T], required_size: int) -> Population[T]: assert len(population) > 0, 'population must not be empty' assert len(population) == required_size, 'population size is invalid' assert all(isinstance(member, Member) for member in population), 'items in population are not members' return population -# def _get_batch_size(total: int) -> int: -# resources = ray.available_resources() -# if 'CPU' not in resources: -# return total -# else: -# cpus = int(resources['CPU']) -# batch_size = (total + cpus - 1) // cpus -# return batch_size - - -# ========================================================================= # -# Evaluate Helper # -# ========================================================================= # - - -# def _eval_sequential(population: PopulationHint, eval_fn): -# return [eval_fn(member.value) for member in population] - - -# _evaluate_ray = ray.remote(_eval_sequential) - - -# def _eval_multiproc(population: PopulationHint, eval_fn): -# member_batches = iter_chunks(population, chunk_size=_get_batch_size(len(population))) -# score_batches = ray.get([_evaluate_ray.remote(members, eval_fn=eval_fn) for members in member_batches]) -# return [score for score_batch in score_batches for score in score_batch] - - # ========================================================================= # # Evaluate Invalid # # ========================================================================= # -def _evaluate_unevaluated(module: EaModule, members: Population) -> int: +def _evaluate_unevaluated(module: EaModule[T], members: Population[T]) -> int: # get unevaluated members unevaluated = [m for m in members if not m.is_evaluated] # get fitness values @@ -102,7 +81,7 @@ def _evaluate_unevaluated(module: EaModule, members: Population) -> int: # ========================================================================= # -def yield_population_steps(module: EaModule): +def yield_population_steps(module: EaModule[T]) -> Iterator[Tuple[int, Population[T], Population[T], int]]: # 1. create population population = module.gen_starting_population() population_size = len(population) @@ -133,7 +112,7 @@ def yield_population_steps(module: EaModule): # ========================================================================= # -class Trainer(object): +class Trainer(Generic[T]): def __init__( self, @@ -148,7 +127,7 @@ def __init__( self._offspring_generator = offspring_generator assert self._history_n_best > 0 - def fit(self, module: EaModule): + def fit(self, module: EaModule[T]) -> Tuple[Population[T], Logbook[T], HallOfFame[T]]: assert isinstance(module, EaModule) # history trackers logbook, halloffame = self._create_default_trackers(module) @@ -164,7 +143,7 @@ def fit(self, module: EaModule): # done return population, logbook, halloffame - def _create_default_trackers(self, module: EaModule): + def _create_default_trackers(self, module: EaModule[T]) -> Tuple[Logbook[T], HallOfFame[T]]: halloffame = HallOfFame( n_best=self._history_n_best, maximize=True, diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index b9b8804..c07847e 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -26,7 +26,7 @@ from typing import Callable from ruck._member import Population -from ruck._util.random import random_choice_prng +from ruck.util._random import random_choice_prng # ========================================================================= # diff --git a/ruck/_util/__init__.py b/ruck/util/__init__.py similarity index 87% rename from ruck/_util/__init__.py rename to ruck/util/__init__.py index 9a05a47..4e01d72 100644 --- a/ruck/_util/__init__.py +++ b/ruck/util/__init__.py @@ -21,3 +21,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +from ruck.util._timer import Timer + +from ruck.util._ray import splits +from ruck.util._ray import chained +from ruck.util._ray import ray_map +from ruck.util._ray import ray_map_chunks diff --git a/ruck/_util/args.py b/ruck/util/_args.py similarity index 100% rename from ruck/_util/args.py rename to ruck/util/_args.py diff --git a/ruck/_util/random.py b/ruck/util/_random.py similarity index 100% rename from ruck/_util/random.py rename to ruck/util/_random.py From d40b10aaacdaed77de33f19a9f570ca871a04767 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 12:32:26 +0200 Subject: [PATCH 07/18] timer utils --- examples/onemax.py | 7 +------ ruck/util/_timer.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 ruck/util/_timer.py diff --git a/examples/onemax.py b/examples/onemax.py index 4ead9d0..3eb35c1 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -31,6 +31,7 @@ from ruck import * from ruck import EaModule from ruck import Population +from ruck.util import Timer # ========================================================================= # @@ -90,12 +91,6 @@ def select_population(self, population: Population, offspring: Population) -> Po logging.basicConfig(level=logging.INFO) - @contextlib.contextmanager - def Timer(name: str): - t = time.time() - yield - print(name, time.time() - t, 'seconds') - with Timer('ruck:trainer'): module = OneMaxModule(population_size=300, member_size=100) pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) diff --git a/ruck/util/_timer.py b/ruck/util/_timer.py new file mode 100644 index 0000000..16cc089 --- /dev/null +++ b/ruck/util/_timer.py @@ -0,0 +1,43 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import contextlib +import time + + +# ========================================================================= # +# Timer # +# ========================================================================= # + + +@contextlib.contextmanager +def Timer(name: str): + t = time.time() + yield + print(name, time.time() - t, 'seconds') + + +# ========================================================================= # +# lists # +# ========================================================================= # From c2f98e8a8ea67e77e6fc75a6d9b2cf0762a22b97 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 12:32:37 +0200 Subject: [PATCH 08/18] ray utils --- examples/onemax_ray.py | 181 +++++++++++++++++++++++++++++++++++++++++ ruck/util/_ray.py | 91 +++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 examples/onemax_ray.py create mode 100644 ruck/util/_ray.py diff --git a/examples/onemax_ray.py b/examples/onemax_ray.py new file mode 100644 index 0000000..7d6d4e7 --- /dev/null +++ b/examples/onemax_ray.py @@ -0,0 +1,181 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import logging +import random +from typing import Any +from typing import List +from typing import Tuple + +import numpy as np +import ray +from ray import ObjectRef + +from ruck import * +from ruck import EaModule +from ruck import Population +from ruck.util import chained +from ruck.util import ray_map +from ruck.util import Timer + + +# ========================================================================= # +# Module # +# ========================================================================= # + + +class OneMaxModule(EaModule): + + def __init__( + self, + population_size: int = 300, + member_size: int = 100, + p_mate: float = 0.5, + p_mutate: float = 0.5, + ): + super().__init__() + self.save_hyperparameters() + + def gen_starting_population(self) -> Population: + # 2.0317113399505615 + return [ + Member(ray.put(np.random.random(self.hparams.member_size) < 0.5)) + for _ in range(self.hparams.population_size) + ] + + def generate_offspring(self, population: Population) -> Population: + # HACK + # 0.027140140533447266 + # population = [Member(ray.get(m.value), m.fitness) for m in population] + # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd + # 0.0007593631744384766 + offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 + # vary population + # 0.7187347412109375 + + @ray.remote + def mate_crossover_1d(a, b) -> Tuple[ObjectRef, ObjectRef]: + a, b = R.mate_crossover_1d(a, b) + return ray.put(a), ray.put(b) + + @ray.remote + def mutate_flip_bits(a) -> ObjectRef: + a = R.mutate_flip_bits(a, p=0.05) + return ray.put(a) + + with Timer('vary'): + # mate + random.shuffle(offspring) + futures, positions = [], [] + for i, (a, b) in enumerate(zip(offspring[0::2], offspring[1::2])): + if random.random() < self.hparams.p_mate: + futures.append(mate_crossover_1d.remote(a.value, b.value)) + positions.append(i) + for i, (a, b) in zip(positions, ray.get(futures)): + offspring[i*2+0] = Member(a) # why does this step slow things down so much? + offspring[i*2+1] = Member(b) # why does this step slow things down so much? + + # mutate + futures, positions = [], [] + for i, a in enumerate(offspring): + if random.random() < self.hparams.p_mutate: + futures.append(mutate_flip_bits.remote(a.value)) + for i, a in zip(positions, ray.get(futures)): + print(a) + offspring[i] = Member(a) # why does this step slow things down so much? + + # offspring = R.apply_mate_and_mutate( + # population=offspring, + # mate_fn=lambda a, b: R.mate_crossover_1d, + # mutate_fn=lambda a: ray.put(R.mutate_flip_bits(ray.get(a), p=0.05)), + # p_mate=self.hparams.p_mate, + # p_mutate=self.hparams.p_mutate, + # ) + # HACK + # 0.13915061950683594 + # offspring = [Member(ray.put(m.value), m.fitness_unsafe) for m in offspring] + # done + return offspring + + def select_population(self, population: Population, offspring: Population) -> Population: + # Same as deap.algorithms.eaSimple + return offspring + + def evaluate_values(self, values: List[Any]) -> List[float]: + # 0.1165781021118164 + return ray_map(np.mean, values) + + +# @ray.remote +# def evaluate(value): +# return value.std() + # return [ray.get(value_id).std() for value_id in values] + +# @ray.remote +# class RayWorker(object): +# +# def gen_starting_population(self) -> Population: +# pass +# +# def generate_offspring(self, population: Population) -> Population: +# pass +# +# def select_population(self, population: Population, offspring: Population) -> Population: +# pass +# +# def evaluate_values(self, values: List[Any]) -> List[float]: +# pass + + +# class RayManager(): +# +# def __init__(self, num_workers: int = None): +# if num_workers is None: +# num_workers = ray.available_resources().get('CPU', 1) + + +# ========================================================================= # +# Main # +# ========================================================================= # + + +if __name__ == '__main__': + # about 18x faster than deap's numpy onemax example (0.145s vs 2.6s) + # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py + + logging.basicConfig(level=logging.INFO) + + ray.init(num_cpus=128) + + with Timer('ruck:trainer'): + module = OneMaxModule(population_size=512, member_size=1_000_000) + pop, logbook, halloffame = Trainer(generations=1000, progress=True).fit(module) + + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/util/_ray.py b/ruck/util/_ray.py new file mode 100644 index 0000000..8619178 --- /dev/null +++ b/ruck/util/_ray.py @@ -0,0 +1,91 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import functools +from typing import Any +from typing import List +from typing import Protocol +from typing import Sequence + +import numpy as np +import ray +from ray.remote_function import RemoteFunction + + +# ========================================================================= # +# lists # +# ========================================================================= # + + +def chained(list_of_lists: List[List[Any]]) -> List[Any]: + return [item for items in list_of_lists for item in items] + + +def splits(items: List[Any], num_chunks: int = None, keep_empty: bool = False) -> List[List[Any]]: + if num_chunks is None: + num_chunks = _cpus() + if not keep_empty: + num_chunks = min(num_chunks, len(items)) + return [list(items) for items in np.array_split(items, num_chunks)] + + +# ========================================================================= # +# ray # +# ========================================================================= # + + +class _RayFnHint(Protocol): + def remote(self, *args, **kwargs) -> Any: + pass + def __call__(self, *args, **kwargs) -> Any: + pass + + +@functools.lru_cache(maxsize=16) +def _to_remote_ray_fn(fn): + if not isinstance(fn, RemoteFunction): + fn = ray.remote(fn) + return fn + + +@functools.lru_cache() +def _cpus(): + return ray.available_resources().get('CPU', 1) + + +def ray_map(ray_fn: _RayFnHint, items: Sequence[Any]) -> List[Any]: + # make sure the function is a remote function + ray_fn = _to_remote_ray_fn(ray_fn) + # pass each item to ray and wait for the result + return ray.get(list(map(ray_fn.remote, items))) + + +def ray_map_chunks(ray_fn: _RayFnHint, items: List[Any], num_chunks: int = None) -> List[Any]: + # split items into chunks, and pass each chunk to function, then chain results back together + return chained(ray_map(ray_fn, splits(items, num_chunks=num_chunks))) + + +# ========================================================================= # +# END # +# ========================================================================= # From bc3fb6ad01bb75b7c582d7cf21d7e5860b843fff Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 17:34:36 +0200 Subject: [PATCH 09/18] function checkers + api change + --- examples/onemax.py | 13 ++++----- ruck/_module.py | 2 +- ruck/_train.py | 4 +-- ruck/functional/__init__.py | 2 +- ruck/functional/{_helper.py => _algorithm.py} | 0 ruck/functional/_mate.py | 19 ++++++++++++- ruck/functional/_mutate.py | 19 ++++++++++++- ruck/functional/_select.py | 25 +++++++++++++++-- ruck/util/__init__.py | 4 +-- ruck/util/_args.py | 10 +++++++ ruck/util/{_random.py => _iter.py} | 28 +++++++++++-------- ruck/util/_ray.py | 19 ++----------- 12 files changed, 98 insertions(+), 47 deletions(-) rename ruck/functional/{_helper.py => _algorithm.py} (100%) rename ruck/util/{_random.py => _iter.py} (71%) diff --git a/examples/onemax.py b/examples/onemax.py index 3eb35c1..dac83af 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -41,7 +41,6 @@ class OneMaxModule(EaModule): - def __init__( self, population_size: int = 300, @@ -52,16 +51,14 @@ def __init__( super().__init__() self.save_hyperparameters() - def evaluate_values(self, values: List[Any]) -> List[float]: + def evaluate_values(self, values: List[Any]): # this is a large reason why the deap version is slow, # it does not make use of numpy operations - return [value.sum() for value in values] + return map(np.sum, values) - def gen_starting_population(self) -> Population: - return [ - Member(np.random.random(self.hparams.member_size) < 0.5) - for _ in range(self.hparams.population_size) - ] + def gen_starting_values(self) -> Population: + for _ in range(self.hparams.population_size): + yield np.random.random(self.hparams.member_size) < 0.5 def generate_offspring(self, population: Population) -> Population: # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd diff --git a/ruck/_module.py b/ruck/_module.py index 34c4f06..8ae8c64 100644 --- a/ruck/_module.py +++ b/ruck/_module.py @@ -56,7 +56,7 @@ def get_progress_stats(self) -> Sequence[str]: # REQUIRED - def gen_starting_population(self) -> Population[T]: + def gen_starting_values(self) -> List[T]: raise NotImplementedError def generate_offspring(self, population: Population[T]) -> Population[T]: diff --git a/ruck/_train.py b/ruck/_train.py index 4eb327b..4bd611e 100644 --- a/ruck/_train.py +++ b/ruck/_train.py @@ -67,7 +67,7 @@ def _evaluate_unevaluated(module: EaModule[T], members: Population[T]) -> int: # get unevaluated members unevaluated = [m for m in members if not m.is_evaluated] # get fitness values - fitnesses = module.evaluate_values([m.value for m in unevaluated]) + fitnesses = list(module.evaluate_values([m.value for m in unevaluated])) # save fitness values assert len(unevaluated) == len(fitnesses) for m, f in zip(unevaluated, fitnesses): @@ -83,7 +83,7 @@ def _evaluate_unevaluated(module: EaModule[T], members: Population[T]) -> int: def yield_population_steps(module: EaModule[T]) -> Iterator[Tuple[int, Population[T], Population[T], int]]: # 1. create population - population = module.gen_starting_population() + population = [Member(m) for m in module.gen_starting_values()] population_size = len(population) population = _check_population(population, required_size=population_size) diff --git a/ruck/functional/__init__.py b/ruck/functional/__init__.py index be45628..f18cc3d 100644 --- a/ruck/functional/__init__.py +++ b/ruck/functional/__init__.py @@ -27,4 +27,4 @@ from ruck.functional._select import * # helper -- should be replaced -from ruck.functional._helper import * +from ruck.functional._algorithm import * diff --git a/ruck/functional/_helper.py b/ruck/functional/_algorithm.py similarity index 100% rename from ruck/functional/_helper.py rename to ruck/functional/_algorithm.py diff --git a/ruck/functional/_mate.py b/ruck/functional/_mate.py index 64de482..9d37726 100644 --- a/ruck/functional/_mate.py +++ b/ruck/functional/_mate.py @@ -22,6 +22,7 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +from functools import wraps from typing import Any from typing import Callable from typing import Tuple @@ -29,13 +30,29 @@ # ========================================================================= # -# Mate # +# Mate Helper # # ========================================================================= # MateFnHint = Callable[[Any, Any], Tuple[Any, Any]] +def check_mating(fn): + @wraps(fn) + def wrapper(value_a, value_b, *args, **kwargs): + mated_a, mated_b = fn(value_a, value_b, *args, **kwargs) + assert mated_a not in (value_a, value_b), f'Mate function: {fn} should return new values' + assert mated_b not in (value_a, value_b), f'Mate function: {fn} should return new values' + return mated_a, mated_b + return wrapper + + +# ========================================================================= # +# Mate # +# ========================================================================= # + + +@check_mating def mate_crossover_1d(a: np.ndarray, b: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: assert a.ndim == 1 assert a.shape == b.shape diff --git a/ruck/functional/_mutate.py b/ruck/functional/_mutate.py index 5fffd19..648a110 100644 --- a/ruck/functional/_mutate.py +++ b/ruck/functional/_mutate.py @@ -22,6 +22,7 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +from functools import wraps from typing import Any from typing import Callable @@ -29,17 +30,33 @@ # ========================================================================= # -# Mutate # +# Mutate Helper # # ========================================================================= # MutateFnHint = Callable[[Any], Any] +def check_mutation(fn): + @wraps(fn) + def wrapper(value, *args, **kwargs): + mutated = fn(value, *args, **kwargs) + assert mutated is not value, f'Mutate function: {fn} should return a new value' + return mutated + return wrapper + + +# ========================================================================= # +# Mutate # +# ========================================================================= # + + +@check_mutation def mutate_flip_bits(a: np.ndarray, p: float = 0.05): return a ^ (np.random.random(a.shape) < p) +@check_mutation def mutate_flip_bit_types(a: np.ndarray, p: float = 0.05): if np.random.random() < 0.5: # flip set bits diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index c07847e..d19c710 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -23,32 +23,51 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ import random +from functools import wraps from typing import Callable from ruck._member import Population -from ruck.util._random import random_choice_prng # ========================================================================= # -# Select # +# Select Helper # # ========================================================================= # SelectFnHint = Callable[[Population, int], Population] +def check_selection(fn): + @wraps(fn) + def wrapper(population: Population, num: int): + selected = fn(population, num) + assert selected is not population, f'Select function: {fn} should return a new list' + assert len(selected) == num, f'Select function: {fn} returned an incorrect number of elements, got: {len(selected)}, should be: {num}' + return selected + return wrapper + + +# ========================================================================= # +# Select # +# ========================================================================= # + + +@check_selection def select_best(population: Population, num: int) -> Population: return sorted(population, key=lambda m: m.fitness, reverse=True)[:num] +@check_selection def select_worst(population: Population, num: int) -> Population: return sorted(population, key=lambda m: m.fitness, reverse=False)[:num] +@check_selection def select_random(population: Population, num: int) -> Population: - return random_choice_prng(population, size=num, replace=False) + return random.sample(population, k=num) +@check_selection def select_tournament(population: Population, num: int, k: int = 3) -> Population: key = lambda m: m.fitness return [ diff --git a/ruck/util/__init__.py b/ruck/util/__init__.py index 4e01d72..b37cac7 100644 --- a/ruck/util/__init__.py +++ b/ruck/util/__init__.py @@ -25,7 +25,7 @@ from ruck.util._timer import Timer -from ruck.util._ray import splits -from ruck.util._ray import chained +from ruck.util._iter import splits +from ruck.util._iter import chained from ruck.util._ray import ray_map from ruck.util._ray import ray_map_chunks diff --git a/ruck/util/_args.py b/ruck/util/_args.py index 4d25765..c74ec6d 100644 --- a/ruck/util/_args.py +++ b/ruck/util/_args.py @@ -27,6 +27,11 @@ from typing import Sequence +# ========================================================================= # +# Hyper Parameters # +# ========================================================================= # + + class HParamsMixin(object): __hparams = None @@ -65,3 +70,8 @@ def save_hyperparameters(self, ignore: Optional[Sequence[str]] = None, include: @property def hparams(self): return self.__hparams + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/util/_random.py b/ruck/util/_iter.py similarity index 71% rename from ruck/util/_random.py rename to ruck/util/_iter.py index 5fca20f..91135d3 100644 --- a/ruck/util/_random.py +++ b/ruck/util/_iter.py @@ -22,25 +22,31 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +import itertools +import random +from typing import Any +from typing import Callable +from typing import List +from typing import Sequence +from typing import Tuple +from typing import TypeVar import numpy as np # ========================================================================= # -# From https://github.com/nmichlo/disent +# lists # # ========================================================================= # -def random_choice_prng(a, size=None, replace=True, p=None): - # create seeded pseudo random number generator - # - built in np.random.choice cannot handle large values: https://github.com/numpy/numpy/issues/5299#issuecomment-497915672 - # - PCG64 is the default: https://numpy.org/doc/stable/reference/random/bit_generators/index.html - # - PCG64 has good statistical properties and is fast: https://numpy.org/doc/stable/reference/random/performance.html - g = np.random.Generator(np.random.PCG64(seed=np.random.randint(0, 2**32))) - # sample indices - choices = g.choice(a, size=size, replace=replace, p=p) - # done! - return choices +def chained(list_of_lists: List[List[Any]]) -> List[Any]: + return [item for items in list_of_lists for item in items] + + +def splits(items: List[Any], num_chunks: int, keep_empty: bool = False) -> List[List[Any]]: + if not keep_empty: + num_chunks = min(num_chunks, len(items)) + return [list(items) for items in np.array_split(items, num_chunks)] # ========================================================================= # diff --git a/ruck/util/_ray.py b/ruck/util/_ray.py index 8619178..73a145e 100644 --- a/ruck/util/_ray.py +++ b/ruck/util/_ray.py @@ -28,26 +28,11 @@ from typing import Protocol from typing import Sequence -import numpy as np import ray from ray.remote_function import RemoteFunction - -# ========================================================================= # -# lists # -# ========================================================================= # - - -def chained(list_of_lists: List[List[Any]]) -> List[Any]: - return [item for items in list_of_lists for item in items] - - -def splits(items: List[Any], num_chunks: int = None, keep_empty: bool = False) -> List[List[Any]]: - if num_chunks is None: - num_chunks = _cpus() - if not keep_empty: - num_chunks = min(num_chunks, len(items)) - return [list(items) for items in np.array_split(items, num_chunks)] +from ruck.util._iter import chained +from ruck.util._iter import splits # ========================================================================= # From 8d0a1991c02cf0edb44933af085465529b8e18d4 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 17:46:17 +0200 Subject: [PATCH 10/18] fixes --- ruck/functional/_mate.py | 6 ++++-- ruck/functional/_select.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ruck/functional/_mate.py b/ruck/functional/_mate.py index 9d37726..c07aa5c 100644 --- a/ruck/functional/_mate.py +++ b/ruck/functional/_mate.py @@ -41,8 +41,10 @@ def check_mating(fn): @wraps(fn) def wrapper(value_a, value_b, *args, **kwargs): mated_a, mated_b = fn(value_a, value_b, *args, **kwargs) - assert mated_a not in (value_a, value_b), f'Mate function: {fn} should return new values' - assert mated_b not in (value_a, value_b), f'Mate function: {fn} should return new values' + assert mated_a is not value_a, f'Mate function: {fn} should return new values' + assert mated_a is not value_b, f'Mate function: {fn} should return new values' + assert mated_b is not value_a, f'Mate function: {fn} should return new values' + assert mated_b is not value_b, f'Mate function: {fn} should return new values' return mated_a, mated_b return wrapper diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index d19c710..a7e1592 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -39,8 +39,8 @@ def check_selection(fn): @wraps(fn) - def wrapper(population: Population, num: int): - selected = fn(population, num) + def wrapper(population: Population, num: int, *args, **kwargs): + selected = fn(population, num, *args, **kwargs) assert selected is not population, f'Select function: {fn} should return a new list' assert len(selected) == num, f'Select function: {fn} returned an incorrect number of elements, got: {len(selected)}, should be: {num}' return selected From ce4f0a85eba4c7f883626e656f1b629c36fbea7b Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Fri, 24 Sep 2021 17:47:06 +0200 Subject: [PATCH 11/18] working ray, nicer api --- examples/onemax_ray.py | 163 ++++++++++++++++++----------------------- ruck/util/_iter.py | 96 ++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 91 deletions(-) diff --git a/examples/onemax_ray.py b/examples/onemax_ray.py index 7d6d4e7..4f93593 100644 --- a/examples/onemax_ray.py +++ b/examples/onemax_ray.py @@ -23,14 +23,12 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ import logging -import random +from functools import wraps from typing import Any from typing import List -from typing import Tuple import numpy as np import ray -from ray import ObjectRef from ruck import * from ruck import EaModule @@ -38,6 +36,13 @@ from ruck.util import chained from ruck.util import ray_map from ruck.util import Timer +from ruck.util._iter import ipairs + +from ruck.util._iter import itake_random +from ruck.util._iter import random_map +from ruck.util._iter import random_map_pairs +from ruck.util._iter import replaced +from ruck.util._iter import transposed # ========================================================================= # @@ -45,6 +50,60 @@ # ========================================================================= # +def ray_store(get: bool = True, put: bool = True, iter_results: bool = False): + def wrapper(fn): + @wraps(fn) + def inner(*args): + # get values from object store + if get: + args = (ray.get(v) for v in args) + # call function + result = fn(*args) + # store values in the object store + if put: + if iter_results: + result = tuple(ray.put(v) for v in result) + else: + result = ray.put(result) + # done! + return result + return inner + return wrapper + + +def member_values(unwrap: bool = True, wrap: bool = True, iter_results: bool = False): + def wrapper(fn): + @wraps(fn) + def inner(*args): + # unwrap member values + if unwrap: + args = (m.value for m in args) + # call function + result = fn(*args) + # wrap values withing members again + if wrap: + if iter_results: + result = tuple(Member(v) for v in result) + else: + result = Member(result) + # done! + return result + return inner + return wrapper + + +@member_values(iter_results=True) +@ray_store(iter_results=True) +def mate(a, b): + return R.mate_crossover_1d(a, b) + + +@member_values() +@ray_store() +def mutate(v): + return R.mutate_flip_bit_types(v, p=0.05) + + class OneMaxModule(EaModule): def __init__( @@ -57,104 +116,26 @@ def __init__( super().__init__() self.save_hyperparameters() - def gen_starting_population(self) -> Population: - # 2.0317113399505615 - return [ - Member(ray.put(np.random.random(self.hparams.member_size) < 0.5)) - for _ in range(self.hparams.population_size) - ] + def gen_starting_values(self): + for _ in range(self.hparams.population_size): + yield ray.put(np.random.random(self.hparams.member_size) < 0.5) def generate_offspring(self, population: Population) -> Population: - # HACK - # 0.027140140533447266 - # population = [Member(ray.get(m.value), m.fitness) for m in population] # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd - # 0.0007593631744384766 - offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 - # vary population - # 0.7187347412109375 - - @ray.remote - def mate_crossover_1d(a, b) -> Tuple[ObjectRef, ObjectRef]: - a, b = R.mate_crossover_1d(a, b) - return ray.put(a), ray.put(b) - - @ray.remote - def mutate_flip_bits(a) -> ObjectRef: - a = R.mutate_flip_bits(a, p=0.05) - return ray.put(a) - - with Timer('vary'): - # mate - random.shuffle(offspring) - futures, positions = [], [] - for i, (a, b) in enumerate(zip(offspring[0::2], offspring[1::2])): - if random.random() < self.hparams.p_mate: - futures.append(mate_crossover_1d.remote(a.value, b.value)) - positions.append(i) - for i, (a, b) in zip(positions, ray.get(futures)): - offspring[i*2+0] = Member(a) # why does this step slow things down so much? - offspring[i*2+1] = Member(b) # why does this step slow things down so much? - - # mutate - futures, positions = [], [] - for i, a in enumerate(offspring): - if random.random() < self.hparams.p_mutate: - futures.append(mutate_flip_bits.remote(a.value)) - for i, a in zip(positions, ray.get(futures)): - print(a) - offspring[i] = Member(a) # why does this step slow things down so much? - - # offspring = R.apply_mate_and_mutate( - # population=offspring, - # mate_fn=lambda a, b: R.mate_crossover_1d, - # mutate_fn=lambda a: ray.put(R.mutate_flip_bits(ray.get(a), p=0.05)), - # p_mate=self.hparams.p_mate, - # p_mutate=self.hparams.p_mutate, - # ) - # HACK - # 0.13915061950683594 - # offspring = [Member(ray.put(m.value), m.fitness_unsafe) for m in offspring] - # done + offspring = list(population) + np.random.shuffle(offspring) + offspring = random_map_pairs(mate, offspring, p=self.hparams.p_mate, map_fn=ray_map) + offspring = random_map(mutate, offspring, p=self.hparams.p_mutate, map_fn=ray_map) + # Done! return offspring def select_population(self, population: Population, offspring: Population) -> Population: - # Same as deap.algorithms.eaSimple - return offspring + return R.select_tournament(population + offspring, len(population), k=3) # TODO: tools.selNSGA2 def evaluate_values(self, values: List[Any]) -> List[float]: - # 0.1165781021118164 return ray_map(np.mean, values) -# @ray.remote -# def evaluate(value): -# return value.std() - # return [ray.get(value_id).std() for value_id in values] - -# @ray.remote -# class RayWorker(object): -# -# def gen_starting_population(self) -> Population: -# pass -# -# def generate_offspring(self, population: Population) -> Population: -# pass -# -# def select_population(self, population: Population, offspring: Population) -> Population: -# pass -# -# def evaluate_values(self, values: List[Any]) -> List[float]: -# pass - - -# class RayManager(): -# -# def __init__(self, num_workers: int = None): -# if num_workers is None: -# num_workers = ray.available_resources().get('CPU', 1) - - # ========================================================================= # # Main # # ========================================================================= # @@ -166,7 +147,7 @@ def evaluate_values(self, values: List[Any]) -> List[float]: logging.basicConfig(level=logging.INFO) - ray.init(num_cpus=128) + ray.init(num_cpus=64) with Timer('ruck:trainer'): module = OneMaxModule(population_size=512, member_size=1_000_000) diff --git a/ruck/util/_iter.py b/ruck/util/_iter.py index 91135d3..60eef2c 100644 --- a/ruck/util/_iter.py +++ b/ruck/util/_iter.py @@ -49,6 +49,102 @@ def splits(items: List[Any], num_chunks: int, keep_empty: bool = False) -> List[ return [list(items) for items in np.array_split(items, num_chunks)] +def replaced(targets: List[Any], idxs: Sequence[int], items: Sequence[int]): + targets = list(targets) + for i, v in zip(idxs, items): + targets[i] = v + return targets + + +def replaced_pairs(targets: List[Any], idx_item_pairs: Sequence[Tuple[int, Any]]): + targets = list(targets) + for i, v in idx_item_pairs: + targets[i] = v + return targets + + +def transposed(items, results: int) -> Tuple[List[Any], ...]: + """ + Like `zip(*items)` but not an iterators + and returns a tuple of lists instead + """ + lists = [[] for i in range(results)] + # get items + for item in items: + for l, v in zip(lists, item): + l.append(v) + # done + return tuple(lists) + + +# ========================================================================= # +# random # +# ========================================================================= # + + +T = TypeVar('T') + + +def random_map_pairs(fn: Callable[[T, T], Tuple[T, T]], items: Sequence[T], p: float, map_fn=map) -> List[T]: + return chained(random_map(lambda v: fn(v[0], v[1]), ipairs(items), p, map_fn)) + + +def random_map(fn: Callable[[T], T], items: Sequence[T], p: float, map_fn=map) -> List[T]: + items = list(items) + idxs, sel = transposed(itake_random(enumerate(items), p=p), results=2) + sel = map_fn(fn, sel) + return replaced(items, idxs, sel) + + +# ========================================================================= # +# iter # +# ========================================================================= # + + +def itake_random(items, p: float): + assert 0 <= p <= 1.0 + # exit early + if p == 0: + return + # take items + for item in items: + if random.random() < p: + yield item + + +def ipairs(items): + itr_a, itr_b = itertools.tee(items) + itr_a = itertools.islice(itr_a, 0, None, 2) + itr_b = itertools.islice(itr_b, 1, None, 2) + return zip(itr_a, itr_b) + + # equivalent slower alternative: + # itr = iter(items) + # while True: + # try: + # a = next(itr) + # b = next(itr) + # except StopIteration: + # return + # yield a, b + + +def imap_random(fn, items, p): + for i, item in itake_random(enumerate(items), p=p): + yield i, fn(item) + + +def imap_multi(*fns_last_is_items): + """ + Example: + >>> list(imap_multi(None, lambda x: x + 10, [[1, 2], [3, 4]])) + >>> [(1, 12), (3, 14)] + """ + *fns, items = fns_last_is_items + for item in items: + yield tuple((v if (fn is None) else fn(v)) for fn, v in zip(fns, item)) + + # ========================================================================= # # END # # ========================================================================= # From 518a01cb018f26f2970e13c757b2c67f577e27f7 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 00:49:10 +0200 Subject: [PATCH 12/18] cleanup --- examples/onemax.py | 45 +++---- examples/onemax_ray.py | 131 ++++++-------------- ruck/_member.py | 12 +- ruck/functional/_algorithm.py | 227 +++++++++++++++++++++++++--------- ruck/functional/_mate.py | 8 +- ruck/functional/_mutate.py | 11 +- ruck/functional/_select.py | 14 ++- ruck/util/__init__.py | 2 +- ruck/util/_iter.py | 149 ++++++++++------------ ruck/util/_ray.py | 75 ++++++++--- 10 files changed, 380 insertions(+), 294 deletions(-) diff --git a/examples/onemax.py b/examples/onemax.py index dac83af..3e3c891 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -22,15 +22,17 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +import functools import logging from typing import Any from typing import List import numpy as np -from ruck import * from ruck import EaModule from ruck import Population +from ruck import R +from ruck import Trainer from ruck.util import Timer @@ -41,6 +43,11 @@ class OneMaxModule(EaModule): + # trick pycharm overrides error checking against `EaModule` + # it doesn't like that we set the values in the constructor! + generate_offspring = None + select_population = None + def __init__( self, population_size: int = 300, @@ -48,8 +55,17 @@ def __init__( p_mate: float = 0.5, p_mutate: float = 0.5, ): - super().__init__() + # save the arguments to the .hparams property. values are taken from the + # local scope so modifications can be captured if the call to this is delayed. self.save_hyperparameters() + # implement the required functions for `EaModule` + self.generate_offspring, self.select_population = R.factory_simple_ea( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bits, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), # tools.selNSGA2 + p_mate=self.hparams.p_mate, + p_mutate=self.hparams.p_mutate, + ) def evaluate_values(self, values: List[Any]): # this is a large reason why the deap version is slow, @@ -57,25 +73,10 @@ def evaluate_values(self, values: List[Any]): return map(np.sum, values) def gen_starting_values(self) -> Population: - for _ in range(self.hparams.population_size): - yield np.random.random(self.hparams.member_size) < 0.5 - - def generate_offspring(self, population: Population) -> Population: - # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd - offspring = R.select_tournament(population, len(population), k=3) # tools.selNSGA2 - # vary population - return R.apply_mate_and_mutate( - population=offspring, - mate_fn=R.mate_crossover_1d, - mutate_fn=lambda a: R.mutate_flip_bits(a, p=0.05), - p_mate=self.hparams.p_mate, - p_mutate=self.hparams.p_mutate, - ) - - def select_population(self, population: Population, offspring: Population) -> Population: - # Same as deap.algorithms.eaSimple - return offspring - + return [ + np.random.random(self.hparams.member_size) < 0.5 + for i in range(self.hparams.population_size) + ] # ========================================================================= # # Main # @@ -83,7 +84,7 @@ def select_population(self, population: Population, offspring: Population) -> Po if __name__ == '__main__': - # about 18x faster than deap's numpy onemax example (0.145s vs 2.6s) + # about 15x faster than deap's numpy onemax example (0.17s vs 2.6s) # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py logging.basicConfig(level=logging.INFO) diff --git a/examples/onemax_ray.py b/examples/onemax_ray.py index 4f93593..7671335 100644 --- a/examples/onemax_ray.py +++ b/examples/onemax_ray.py @@ -22,27 +22,23 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import functools import logging -from functools import wraps -from typing import Any from typing import List import numpy as np +import psutil import ray +from ray import ObjectRef -from ruck import * from ruck import EaModule from ruck import Population -from ruck.util import chained -from ruck.util import ray_map +from ruck import R +from ruck import Trainer +from ruck.util import ray_mapped from ruck.util import Timer -from ruck.util._iter import ipairs - -from ruck.util._iter import itake_random -from ruck.util._iter import random_map -from ruck.util._iter import random_map_pairs -from ruck.util._iter import replaced -from ruck.util._iter import transposed +from ruck.util._ray import ray_refs_handler # ========================================================================= # @@ -50,61 +46,12 @@ # ========================================================================= # -def ray_store(get: bool = True, put: bool = True, iter_results: bool = False): - def wrapper(fn): - @wraps(fn) - def inner(*args): - # get values from object store - if get: - args = (ray.get(v) for v in args) - # call function - result = fn(*args) - # store values in the object store - if put: - if iter_results: - result = tuple(ray.put(v) for v in result) - else: - result = ray.put(result) - # done! - return result - return inner - return wrapper - - -def member_values(unwrap: bool = True, wrap: bool = True, iter_results: bool = False): - def wrapper(fn): - @wraps(fn) - def inner(*args): - # unwrap member values - if unwrap: - args = (m.value for m in args) - # call function - result = fn(*args) - # wrap values withing members again - if wrap: - if iter_results: - result = tuple(Member(v) for v in result) - else: - result = Member(result) - # done! - return result - return inner - return wrapper - - -@member_values(iter_results=True) -@ray_store(iter_results=True) -def mate(a, b): - return R.mate_crossover_1d(a, b) - - -@member_values() -@ray_store() -def mutate(v): - return R.mutate_flip_bit_types(v, p=0.05) - - -class OneMaxModule(EaModule): +class OneMaxRayModule(EaModule[ObjectRef]): + + # trick pycharm overrides error checking against `EaModule` + # it doesn't like that we set the values in the constructor! + generate_offspring = None + select_population = None def __init__( self, @@ -113,27 +60,29 @@ def __init__( p_mate: float = 0.5, p_mutate: float = 0.5, ): - super().__init__() + # save the arguments to the .hparams property. values are taken from the + # local scope so modifications can be captured if the call to this is delayed. self.save_hyperparameters() - - def gen_starting_values(self): - for _ in range(self.hparams.population_size): - yield ray.put(np.random.random(self.hparams.member_size) < 0.5) - - def generate_offspring(self, population: Population) -> Population: - # Same as deap.algorithms.eaSimple which uses deap.algorithms.varAnd - offspring = list(population) - np.random.shuffle(offspring) - offspring = random_map_pairs(mate, offspring, p=self.hparams.p_mate, map_fn=ray_map) - offspring = random_map(mutate, offspring, p=self.hparams.p_mutate, map_fn=ray_map) - # Done! - return offspring - - def select_population(self, population: Population, offspring: Population) -> Population: - return R.select_tournament(population + offspring, len(population), k=3) # TODO: tools.selNSGA2 - - def evaluate_values(self, values: List[Any]) -> List[float]: - return ray_map(np.mean, values) + # implement the required functions for `EaModule` + self.generate_offspring, self.select_population = R.factory_simple_ea( + mate_fn=ray_refs_handler(R.mate_crossover_1d, iter_results=True), + mutate_fn=ray_refs_handler(functools.partial(R.mutate_flip_bits, p=0.05)), + select_fn=functools.partial(R.select_tournament, k=3), # tools.selNSGA2 + p_mate=self.hparams.p_mate, + p_mutate=self.hparams.p_mutate, + map_fn=ray_mapped, + ) + + def evaluate_values(self, values: List[ObjectRef]) -> List[float]: + # this is a large reason why the deap version is slow, + # it does not make use of numpy operations + return ray_mapped(np.sum, values) + + def gen_starting_values(self) -> Population[ObjectRef]: + return [ + ray.put(np.random.random(self.hparams.member_size) < 0.5) + for i in range(self.hparams.population_size) + ] # ========================================================================= # @@ -142,16 +91,16 @@ def evaluate_values(self, values: List[Any]) -> List[float]: if __name__ == '__main__': - # about 18x faster than deap's numpy onemax example (0.145s vs 2.6s) + # about 15x faster than deap's numpy onemax example (0.17s vs 2.6s) # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py logging.basicConfig(level=logging.INFO) - ray.init(num_cpus=64) + ray.init(num_cpus=min(psutil.cpu_count(), 16)) with Timer('ruck:trainer'): - module = OneMaxModule(population_size=512, member_size=1_000_000) - pop, logbook, halloffame = Trainer(generations=1000, progress=True).fit(module) + module = OneMaxRayModule(population_size=512, member_size=1_000_000) + pop, logbook, halloffame = Trainer(generations=100, progress=False).fit(module) print('initial stats:', logbook[0]) print('final stats:', logbook[-1]) diff --git a/ruck/_member.py b/ruck/_member.py index 9864e3a..9f55fc0 100644 --- a/ruck/_member.py +++ b/ruck/_member.py @@ -89,10 +89,14 @@ def __str__(self): return repr(self) def __repr__(self): - if self.is_evaluated: - return f'{self.__class__.__name__}<{self.fitness}>' - else: - return f'{self.__class__.__name__}<>' + value_str = repr(self.value) + # cut short + if len(value_str) > 20: + value_str = f'{value_str[:20]} ...' + # get fitness + fitness_str = f', {self.fitness}' if self.is_evaluated else '' + # combine + return f'{self.__class__.__name__}({value_str}{fitness_str})' # ========================================================================= # diff --git a/ruck/functional/_algorithm.py b/ruck/functional/_algorithm.py index 8e4c386..2dd52e7 100644 --- a/ruck/functional/_algorithm.py +++ b/ruck/functional/_algorithm.py @@ -22,35 +22,85 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +from typing import Callable +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import TypeVar + from ruck._member import Member from ruck._member import Population +from ruck.functional import SelectFnHint from ruck.functional._mate import MateFnHint from ruck.functional._mutate import MutateFnHint +from ruck.util._iter import replaced_random_taken_pairs +from ruck.util._iter import replaced_random_taken_elems import random import numpy as np +# ========================================================================= # +# Helper # +# ========================================================================= # + + +T = TypeVar('T') + + # ========================================================================= # # Crossover & Mutate Helpers # # ========================================================================= # +def _mate_wrap_unwrap_values(mate_fn: MateFnHint[T]): + def wrapper(ma: Member[T], mb: Member[T]) -> Tuple[Member[T], Member[T]]: + va, vb = mate_fn(ma.value, mb.value) + return Member(va), Member(vb) + return wrapper + + +def _mutate_wrap_unwrap_values(mutate_fn: MutateFnHint[T]): + def wrapper(m: Member[T]) -> Member[T]: + v = mutate_fn(m.value) + return Member(v) + return wrapper + + +# ========================================================================= # +# Function Wrappers # +# ========================================================================= # + + def apply_mate( - population: Population, - mate_fn: MateFnHint, + population: Population[T], + mate_fn: MateFnHint[T], p: float = 0.5, -) -> Population: + keep_order: bool = True, + map_fn=map, +) -> Population[T]: # randomize order so we have randomized pairs - offspring = list(population) - np.random.shuffle(offspring) - # apply mating to population -- why is this faster than pre-generating the boolean mask? - for i in range(1, len(population), 2): - if random.random() < p: - v0, v1 = mate_fn(offspring[i-1].value, offspring[i].value) - offspring[i-1], offspring[i] = Member(v0), Member(v1) + if keep_order: + indices = np.arange(len(population)) + np.random.shuffle(indices) + offspring = [population[i] for i in indices] + else: + offspring = list(population) + np.random.shuffle(offspring) + # apply mating to population + offspring = replaced_random_taken_pairs( + fn=_mate_wrap_unwrap_values(mate_fn), + items=offspring, + p=p, + map_fn=map_fn, + ) + # undo random order + if keep_order: + offspring = [offspring[i] for i in np.argsort(indices)] # done! + assert len(offspring) == len(population) return offspring @@ -58,22 +108,28 @@ def apply_mutate( population: Population, mutate_fn: MutateFnHint, p: float = 0.5, + map_fn=map, ) -> Population: - elem_mask = np.random.random(size=len(population)) < p - # apply mutate to population - return [ - Member(mutate_fn(m.value)) if do_mutate else m - for m, do_mutate in zip(population, elem_mask) - ] + # apply mutations to population + offspring = replaced_random_taken_elems( + fn=_mutate_wrap_unwrap_values(mutate_fn), + items=population, + p=p, + map_fn=map_fn, + ) + # done! + assert len(offspring) == len(population) + return offspring def apply_mate_and_mutate( - population: Population, - mate_fn: MateFnHint, - mutate_fn: MutateFnHint, + population: Population[T], + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], p_mate: float = 0.5, p_mutate: float = 0.5, -) -> Population: + map_fn=map, +) -> Population[T]: """ Apply crossover AND mutation @@ -83,19 +139,34 @@ def apply_mate_and_mutate( ** Should be equivalent to varAnd from DEAP ** """ - population = apply_mate(population, mate_fn, p=p_mate) - population = apply_mutate(population, mutate_fn, p=p_mutate) - return population + offspring = apply_mate(population, mate_fn, p=p_mate, keep_order=True, map_fn=map_fn) + offspring = apply_mutate(offspring, mutate_fn, p=p_mutate, map_fn=map_fn) + return offspring + + +def _get_generate_member_fn( + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], + p_mate: float = 0.5, + p_mutate: float = 0.5, +): + def _generate_member(a_b_r: Tuple[Member[T], Optional[Member[T]], float]) -> Member[T]: + ma, mb, r = a_b_r + if r < p_mate: return Member(mate_fn(ma.value, mb.value)[0]) # Apply crossover | only take first item | mb is only defined for this case + elif r < p_mate + p_mutate: return Member(mutate_fn(ma.value)) # Apply mutation + else: return ma # Apply reproduction + return _generate_member def apply_mate_or_mutate_or_reproduce( - population: Population, + population: Population[T], num_offspring: int, # lambda_ - mate_fn: MateFnHint, - mutate_fn: MutateFnHint, + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], p_mate: float = 0.5, p_mutate: float = 0.5, -) -> Population: + map_fn=map, +) -> Population[T]: """ Apply crossover OR mutation OR reproduction @@ -107,16 +178,22 @@ def apply_mate_or_mutate_or_reproduce( """ assert (p_mate + p_mutate) <= 1.0, 'The sum of the crossover and mutation probabilities must be smaller or equal to 1.0.' - pairs = np.random.randint(0, len(population), size=[2, num_offspring]) - rand = np.random.random(len(population)) - - def _fn(a: int, b: int, r: float): - if r < p_mate: return Member(mate_fn(population[a].value, population[b].value)[0]) # Apply crossover - elif r < p_mate + p_mutate: return Member(mutate_fn(population[a].value)) # Apply mutation - else: return population[a] # Apply reproduction - - # np.vectorize can help, but only about 10% faster for large populations, and 3x slower for tiny populations - return [_fn(a, b, r) for a, b, r in zip(pairs[0], pairs[1], rand)] + # choose which action should be taken for each element + probabilities = np.random.random(num_offspring) + # select offspring + choices_a = [random.choice(population) for p in probabilities] + choices_b = [random.choice(population) if (p < p_mate) else None for p in probabilities] # these are only needed for crossover, when (p < p_mate) + # get function to generate offspring + # - we create the function so that we don't accidentally pickle anything else + fn = _get_generate_member_fn(mate_fn=mate_fn, mutate_fn=mutate_fn, p_mate=p_mate, p_mutate=p_mutate) + # generate offspring + # - TODO: this is actually not optimal! we should only pass mate and + # mutate operations to the map function, we could distribute + # work unevenly between processes if map_fn is replaced + offspring = list(map_fn(fn, zip(choices_a, choices_b, probabilities))) + # done! + assert len(offspring) == num_offspring + return offspring # ========================================================================= # @@ -124,28 +201,60 @@ def _fn(a: int, b: int, r: float): # ========================================================================= # -# def factory_ea_alg( -# mate_fn, -# mutate_fn, -# select_fn, -# mode: str = 'simple', -# p_mate: float = 0.5, -# p_mutate: float = 0.5, -# offspring_num: int = 128, -# population_num: int = 128, -# ): -# if mode == 'simple': -# def _generate(population): return apply_mate_and_mutate(population=select_fn(population, len(population)), p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) -# def _select(population, offspring): return offspring -# elif mode == 'mu_plus_lambda': -# def _generate(population): return apply_mate_or_mutate_or_reproduce(population, num_offspring=offspring_num, p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) -# def _select(population, offspring): return select_fn(population + offspring, population_num) -# elif mode == 'mu_comma_lambda': -# def _generate(population): return apply_mate_or_mutate_or_reproduce(population, num_offspring=offspring_num, p_mate=p_mate, mate=mate_fn, p_mutate=p_mutate, mutate=mutate_fn) -# def _select(population, offspring): return select_fn(offspring, population_num) -# else: -# raise KeyError(f'invalid mode: {repr(mode)}') -# return _generate, _select +def factory_simple_ea( + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], + select_fn: SelectFnHint[T], + p_mate: float = 0.5, + p_mutate: float = 0.5, + map_fn=map, +): + def generate(population): + return apply_mate_and_mutate(population=select_fn(population, len(population)), p_mate=p_mate, mate_fn=mate_fn, p_mutate=p_mutate, mutate_fn=mutate_fn, map_fn=map_fn) + + def select(population, offspring): + return offspring + + return generate, select + + +def factory_mu_and_lambda( + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], + select_fn: SelectFnHint[T], + offspring_num: int, # lambda + p_mate: float = 0.5, + p_mutate: float = 0.5, + map_fn=map, +): + def generate(population): + num = len(population) if (offspring_num is None) else offspring_num + return apply_mate_or_mutate_or_reproduce(population, num, mate_fn=mate_fn, mutate_fn=mutate_fn, p_mate=p_mate, p_mutate=p_mutate, map_fn=map_fn) + + def select(population: Population[T], offspring: Population[T]): + return select_fn(population + offspring, len(population)) + + return generate, select + + +def factory_mu_comma_lambda( + mate_fn: MateFnHint[T], + mutate_fn: MutateFnHint[T], + select_fn: SelectFnHint[T], + offspring_num: Optional[int] = None, # lambda + p_mate: float = 0.5, + p_mutate: float = 0.5, + map_fn=map, +): + def generate(population): + num = len(population) if (offspring_num is None) else offspring_num + return apply_mate_or_mutate_or_reproduce(population, num, mate_fn=mate_fn, mutate_fn=mutate_fn, p_mate=p_mate, p_mutate=p_mutate, map_fn=map_fn) + + def select(population, offspring): + assert len(offspring) >= len(population), f'invalid arguments, the number of offspring: {len(offspring)} (lambda) must be greater than or equal to the size of the population: {len(population)} (mu)' + return select_fn(offspring, len(population)) + + return generate, select # # ========================================================================= # diff --git a/ruck/functional/_mate.py b/ruck/functional/_mate.py index c07aa5c..ebc7d49 100644 --- a/ruck/functional/_mate.py +++ b/ruck/functional/_mate.py @@ -23,9 +23,10 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ from functools import wraps -from typing import Any from typing import Callable from typing import Tuple +from typing import TypeVar + import numpy as np @@ -34,12 +35,13 @@ # ========================================================================= # -MateFnHint = Callable[[Any, Any], Tuple[Any, Any]] +T = TypeVar('T') +MateFnHint = Callable[[T, T], Tuple[T, T]] def check_mating(fn): @wraps(fn) - def wrapper(value_a, value_b, *args, **kwargs): + def wrapper(value_a: T, value_b: T, *args, **kwargs) -> Tuple[T, T]: mated_a, mated_b = fn(value_a, value_b, *args, **kwargs) assert mated_a is not value_a, f'Mate function: {fn} should return new values' assert mated_a is not value_b, f'Mate function: {fn} should return new values' diff --git a/ruck/functional/_mutate.py b/ruck/functional/_mutate.py index 648a110..ea8b8b6 100644 --- a/ruck/functional/_mutate.py +++ b/ruck/functional/_mutate.py @@ -23,8 +23,8 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ from functools import wraps -from typing import Any from typing import Callable +from typing import TypeVar import numpy as np @@ -34,12 +34,13 @@ # ========================================================================= # -MutateFnHint = Callable[[Any], Any] +T = TypeVar('T') +MutateFnHint = Callable[[T], T] def check_mutation(fn): @wraps(fn) - def wrapper(value, *args, **kwargs): + def wrapper(value: T, *args, **kwargs): mutated = fn(value, *args, **kwargs) assert mutated is not value, f'Mutate function: {fn} should return a new value' return mutated @@ -52,12 +53,12 @@ def wrapper(value, *args, **kwargs): @check_mutation -def mutate_flip_bits(a: np.ndarray, p: float = 0.05): +def mutate_flip_bits(a: np.ndarray, p: float = 0.05) -> np.ndarray: return a ^ (np.random.random(a.shape) < p) @check_mutation -def mutate_flip_bit_types(a: np.ndarray, p: float = 0.05): +def mutate_flip_bit_types(a: np.ndarray, p: float = 0.05) -> np.ndarray: if np.random.random() < 0.5: # flip set bits return a ^ ((np.random.random(a.shape) < p) & a) diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index a7e1592..1dfbe70 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -25,6 +25,7 @@ import random from functools import wraps from typing import Callable +from typing import TypeVar from ruck._member import Population @@ -34,12 +35,13 @@ # ========================================================================= # -SelectFnHint = Callable[[Population, int], Population] +T = TypeVar('T') +SelectFnHint = Callable[[Population[T], int], Population[T]] def check_selection(fn): @wraps(fn) - def wrapper(population: Population, num: int, *args, **kwargs): + def wrapper(population: Population[T], num: int, *args, **kwargs) -> Population[T]: selected = fn(population, num, *args, **kwargs) assert selected is not population, f'Select function: {fn} should return a new list' assert len(selected) == num, f'Select function: {fn} returned an incorrect number of elements, got: {len(selected)}, should be: {num}' @@ -53,22 +55,22 @@ def wrapper(population: Population, num: int, *args, **kwargs): @check_selection -def select_best(population: Population, num: int) -> Population: +def select_best(population: Population[T], num: int) -> Population[T]: return sorted(population, key=lambda m: m.fitness, reverse=True)[:num] @check_selection -def select_worst(population: Population, num: int) -> Population: +def select_worst(population: Population[T], num: int) -> Population[T]: return sorted(population, key=lambda m: m.fitness, reverse=False)[:num] @check_selection -def select_random(population: Population, num: int) -> Population: +def select_random(population: Population[T], num: int) -> Population[T]: return random.sample(population, k=num) @check_selection -def select_tournament(population: Population, num: int, k: int = 3) -> Population: +def select_tournament(population: Population[T], num: int, k: int = 3) -> Population[T]: key = lambda m: m.fitness return [ max(random.sample(population, k=k), key=key) diff --git a/ruck/util/__init__.py b/ruck/util/__init__.py index b37cac7..6f42217 100644 --- a/ruck/util/__init__.py +++ b/ruck/util/__init__.py @@ -27,5 +27,5 @@ from ruck.util._iter import splits from ruck.util._iter import chained -from ruck.util._ray import ray_map +from ruck.util._ray import ray_mapped from ruck.util._ray import ray_map_chunks diff --git a/ruck/util/_iter.py b/ruck/util/_iter.py index 60eef2c..20acc2a 100644 --- a/ruck/util/_iter.py +++ b/ruck/util/_iter.py @@ -22,10 +22,13 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + import itertools import random from typing import Any from typing import Callable +from typing import Iterable +from typing import Iterator from typing import List from typing import Sequence from typing import Tuple @@ -35,114 +38,92 @@ # ========================================================================= # -# lists # +# Helper # # ========================================================================= # -def chained(list_of_lists: List[List[Any]]) -> List[Any]: - return [item for items in list_of_lists for item in items] - - -def splits(items: List[Any], num_chunks: int, keep_empty: bool = False) -> List[List[Any]]: - if not keep_empty: - num_chunks = min(num_chunks, len(items)) - return [list(items) for items in np.array_split(items, num_chunks)] +T = TypeVar('T') -def replaced(targets: List[Any], idxs: Sequence[int], items: Sequence[int]): - targets = list(targets) - for i, v in zip(idxs, items): - targets[i] = v - return targets +# ========================================================================= # +# iter # +# ========================================================================= # -def replaced_pairs(targets: List[Any], idx_item_pairs: Sequence[Tuple[int, Any]]): - targets = list(targets) - for i, v in idx_item_pairs: - targets[i] = v - return targets +# NOTE: +# Iterable: objects that return Iterators when passed to `iter()` +# Iterator: return the next item when used with `next()` +# every Iterator is ALSO an Iterable -def transposed(items, results: int) -> Tuple[List[Any], ...]: - """ - Like `zip(*items)` but not an iterators - and returns a tuple of lists instead - """ - lists = [[] for i in range(results)] - # get items - for item in items: - for l, v in zip(lists, item): - l.append(v) - # done - return tuple(lists) +def ipairs(items: Iterable[T]) -> Iterator[Tuple[T, T]]: + itr_a, itr_b = itertools.tee(items) + itr_a = itertools.islice(itr_a, 0, None, 2) + itr_b = itertools.islice(itr_b, 1, None, 2) + return zip(itr_a, itr_b) # ========================================================================= # -# random # +# lists # # ========================================================================= # -T = TypeVar('T') - - -def random_map_pairs(fn: Callable[[T, T], Tuple[T, T]], items: Sequence[T], p: float, map_fn=map) -> List[T]: - return chained(random_map(lambda v: fn(v[0], v[1]), ipairs(items), p, map_fn)) +def chained(list_of_lists: Iterable[Iterable[T]]) -> List[T]: + return list(itertools.chain(*list_of_lists)) -def random_map(fn: Callable[[T], T], items: Sequence[T], p: float, map_fn=map) -> List[T]: - items = list(items) - idxs, sel = transposed(itake_random(enumerate(items), p=p), results=2) - sel = map_fn(fn, sel) - return replaced(items, idxs, sel) +def splits(items: Sequence[Any], num_chunks: int, keep_empty: bool = False) -> List[List[Any]]: + # np.array_split will return empty elements if required + if not keep_empty: + num_chunks = min(num_chunks, len(items)) + # we return a lists of lists, not a list of + # tuples so that it is compatible with ray.get + return [list(items) for items in np.array_split(items, num_chunks)] # ========================================================================= # -# iter # +# random -- used for ruck.functional._algorithm # # ========================================================================= # -def itake_random(items, p: float): - assert 0 <= p <= 1.0 - # exit early - if p == 0: - return - # take items - for item in items: +def replaced_random_taken_pairs(fn: Callable[[T, T], Tuple[T, T]], items: Iterable[T], p: float, map_fn=map) -> List[T]: + # shallow copy because we want to update elements in this list + # - we need to take care to handle the special case where the length + # of items is odd, thus we cannot just call random_map with modified + # args using pairs and chaining the output + items = list(items) + # select random items + idxs, vals = [], [] + for i, pair in enumerate(zip(items[0::2], items[1::2])): if random.random() < p: - yield item - - -def ipairs(items): - itr_a, itr_b = itertools.tee(items) - itr_a = itertools.islice(itr_a, 0, None, 2) - itr_b = itertools.islice(itr_b, 1, None, 2) - return zip(itr_a, itr_b) - - # equivalent slower alternative: - # itr = iter(items) - # while True: - # try: - # a = next(itr) - # b = next(itr) - # except StopIteration: - # return - # yield a, b - - -def imap_random(fn, items, p): - for i, item in itake_random(enumerate(items), p=p): - yield i, fn(item) - - -def imap_multi(*fns_last_is_items): - """ - Example: - >>> list(imap_multi(None, lambda x: x + 10, [[1, 2], [3, 4]])) - >>> [(1, 12), (3, 14)] - """ - *fns, items = fns_last_is_items - for item in items: - yield tuple((v if (fn is None) else fn(v)) for fn, v in zip(fns, item)) + vals.append(pair) + idxs.append(i) + # map selected values + vals = map_fn(lambda pair: fn(pair[0], pair[1]), vals) + # update values + for i, (v0, v1) in zip(idxs, vals): + items[i*2+0] = v0 + items[i*2+1] = v1 + # done! + return items + + +def replaced_random_taken_elems(fn: Callable[[T], T], items: Iterable[T], p: float, map_fn=map) -> List[T]: + # shallow copy because we want to update elements in this list + items = list(items) + # select random items + idxs, vals = [], [] + for i, v in enumerate(items): + if random.random() < p: + vals.append(v) + idxs.append(i) + # map selected values + vals = map_fn(fn, vals) + # update values + for i, v in zip(idxs, vals): + items[i] = v + # done! + return items # ========================================================================= # diff --git a/ruck/util/_ray.py b/ruck/util/_ray.py index 73a145e..52467f4 100644 --- a/ruck/util/_ray.py +++ b/ruck/util/_ray.py @@ -25,28 +25,17 @@ import functools from typing import Any from typing import List -from typing import Protocol from typing import Sequence import ray from ray.remote_function import RemoteFunction -from ruck.util._iter import chained -from ruck.util._iter import splits - # ========================================================================= # # ray # # ========================================================================= # -class _RayFnHint(Protocol): - def remote(self, *args, **kwargs) -> Any: - pass - def __call__(self, *args, **kwargs) -> Any: - pass - - @functools.lru_cache(maxsize=16) def _to_remote_ray_fn(fn): if not isinstance(fn, RemoteFunction): @@ -54,21 +43,69 @@ def _to_remote_ray_fn(fn): return fn -@functools.lru_cache() -def _cpus(): - return ray.available_resources().get('CPU', 1) - +def ray_mapped(ray_fn, items: Sequence[Any]) -> List[Any]: + """ + A more convenient alternative to `ray.util.multiprocessing.Pool`s `map` function! + Using a similar API to python `map`, except returning a list of mapped values + instead of an iterable. -def ray_map(ray_fn: _RayFnHint, items: Sequence[Any]) -> List[Any]: + The advantage of this functions it that we automatically wrap passed functions to + ray.remote functions, also enabling automatic getting of ObjectRef values. + """ # make sure the function is a remote function ray_fn = _to_remote_ray_fn(ray_fn) # pass each item to ray and wait for the result return ray.get(list(map(ray_fn.remote, items))) -def ray_map_chunks(ray_fn: _RayFnHint, items: List[Any], num_chunks: int = None) -> List[Any]: - # split items into chunks, and pass each chunk to function, then chain results back together - return chained(ray_map(ray_fn, splits(items, num_chunks=num_chunks))) +# ========================================================================= # +# ray - object store # +# ========================================================================= # + + +def ray_refs_handler(fn = None, get: bool = True, put: bool = True, iter_results: bool = False): + """ + Wrap a function so that we automatically ray.get + all the arguments and ray.put the result. + + iter_results=True instead treats the result as an + iterable and applies ray.put to each result item + + for example: + >>> def mate(a, b): + >>> a, b = ray.get(a), ray.get(b) + >>> a, b = R.mate_crossover_1d(a, b) + >>> return ray.put(a), ray.put(b) + + becomes: + >>> @ray_refs_handler(iter_results=True) + >>> def mate(a, b): + >>> return R.mate_crossover_1d(a, b) + """ + + def wrapper(fn): + @functools.wraps(fn) + def inner(*args): + # get values from object store + if get: + args = (ray.get(v) for v in args) + # call function + result = fn(*args) + # store values in the object store + if put: + if iter_results: + result = tuple(ray.put(v) for v in result) + else: + result = ray.put(result) + # done! + return result + return inner + + # handle correct case + if fn is None: + return wrapper + else: + return wrapper(fn) # ========================================================================= # From 66afad97638fc7c29be66ff8547104f1e4809c8c Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 02:51:13 +0200 Subject: [PATCH 13/18] fixes + halloffame freezing --- ruck/_history.py | 56 ++++++++++++++++++++++++++++++----- ruck/_member.py | 11 ++++--- ruck/_train.py | 4 +-- ruck/functional/_algorithm.py | 5 +--- ruck/functional/_mate.py | 3 +- ruck/functional/_mutate.py | 5 ++-- ruck/functional/_select.py | 3 +- ruck/util/__init__.py | 8 ++--- ruck/util/_ray.py | 6 ++-- 9 files changed, 72 insertions(+), 29 deletions(-) diff --git a/ruck/_history.py b/ruck/_history.py index 723b48e..8b1796f 100644 --- a/ruck/_history.py +++ b/ruck/_history.py @@ -158,16 +158,33 @@ class HallOfFameItem: member: Any = dataclasses.field(compare=False) +class HallOfFameFrozenError(Exception): + pass + + +class HallOfFameNotFrozenError(Exception): + pass + + class HallOfFame(Generic[T]): def __init__(self, n_best: int = 5, maximize: bool = True): self._maximize = maximize assert maximize self._n_best = n_best + # update values self._heap = [] # element 0 is always the smallest self._scores = {} + # frozen values + self._frozen = False + self._frozen_members = None + self._frozen_values = None + self._frozen_scores = None def update(self, population: Population[T]): + if self.is_frozen: + raise HallOfFameFrozenError('The hall of fame has been frozen, no more members can be added!') + # get potential best in population best = sorted(population, key=lambda m: m.fitness, reverse=True)[:self._n_best] # add the best for member in best: @@ -185,17 +202,42 @@ def update(self, population: Population[T]): removed = heapq.heappushpop(self._heap, item) del self._scores[removed.fitness] - @property - def members(self) -> Population[T]: - return [m.member for m in sorted(self._heap, reverse=True)] + def freeze(self) -> 'HallOfFame': + if self.is_frozen: + raise HallOfFameFrozenError('The hall of fame has already been frozen, cannot freeze again!') + # freeze + self._frozen = True + self._frozen_members = [m.member for m in sorted(self._heap, reverse=True)] # 0 is best, -1 is worst + # reset values + self._scores = None + self._heap = None + return self @property - def values(self) -> List[T]: - return [m.value for m in self.members] + def is_frozen(self) -> bool: + return self._frozen @property - def scores(self) -> List[float]: - return [m.fitness for m in self.members] + def members(self) -> Population[T]: + return list(self._frozen_members) + + def __getitem__(self, idx: int): + if not self.is_frozen: + raise HallOfFameNotFrozenError('The hall of fame has not yet been frozen by a completed training run, cannot access members!') + assert isinstance(idx, int) + return self._frozen_members[idx] + + def __len__(self): + if not self.is_frozen: + raise HallOfFameNotFrozenError('The hall of fame has not yet been frozen by a completed training run, cannot access length!') + return len(self._frozen_members) + + def __iter__(self): + if not self.is_frozen: + raise HallOfFameNotFrozenError('The hall of fame has not yet been frozen by a completed training run, cannot access members!') + for i in range(len(self)): + yield self[i] + # ========================================================================= # diff --git a/ruck/_member.py b/ruck/_member.py index 9f55fc0..e1a7c38 100644 --- a/ruck/_member.py +++ b/ruck/_member.py @@ -21,7 +21,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ - +import re from typing import Generic from typing import List from typing import Optional @@ -46,6 +46,9 @@ class MemberAlreadyEvaluatedError(Exception): T = TypeVar('T') +_RE_WHITESPACE = re.compile(r'\s\s+') + + class Member(Generic[T]): def __init__(self, value: T, fitness: float = None): @@ -89,10 +92,10 @@ def __str__(self): return repr(self) def __repr__(self): - value_str = repr(self.value) + value_str = _RE_WHITESPACE.sub(' ', repr(self.value)) # cut short - if len(value_str) > 20: - value_str = f'{value_str[:20]} ...' + if len(value_str) > 33: + value_str = f'{value_str[:14]} ... {value_str[-14:]}' # get fitness fitness_str = f', {self.fitness}' if self.is_evaluated else '' # combine diff --git a/ruck/_train.py b/ruck/_train.py index 4bd611e..01f90d0 100644 --- a/ruck/_train.py +++ b/ruck/_train.py @@ -132,7 +132,7 @@ def fit(self, module: EaModule[T]) -> Tuple[Population[T], Logbook[T], HallOfFam # history trackers logbook, halloffame = self._create_default_trackers(module) # progress bar and training loop - with tqdm(total=self._generations+1, desc='generation', disable=not self._progress, ncols=120) as p: + with tqdm(total=self._generations, desc='generation', disable=not self._progress, ncols=120) as p: for gen, population, offspring, evals in itertools.islice(self._offspring_generator(module), self._generations): # update statistics with new population halloffame.update(offspring) @@ -141,7 +141,7 @@ def fit(self, module: EaModule[T]) -> Tuple[Population[T], Logbook[T], HallOfFam p.update() p.set_postfix({k: stats[k] for k in module.get_progress_stats()}) # done - return population, logbook, halloffame + return population, logbook, halloffame.freeze() def _create_default_trackers(self, module: EaModule[T]) -> Tuple[Logbook[T], HallOfFame[T]]: halloffame = HallOfFame( diff --git a/ruck/functional/_algorithm.py b/ruck/functional/_algorithm.py index 2dd52e7..4519e78 100644 --- a/ruck/functional/_algorithm.py +++ b/ruck/functional/_algorithm.py @@ -22,10 +22,7 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -from typing import Callable -from typing import List from typing import Optional -from typing import Sequence from typing import Tuple from typing import TypeVar @@ -218,7 +215,7 @@ def select(population, offspring): return generate, select -def factory_mu_and_lambda( +def factory_mu_plus_lambda( mate_fn: MateFnHint[T], mutate_fn: MutateFnHint[T], select_fn: SelectFnHint[T], diff --git a/ruck/functional/_mate.py b/ruck/functional/_mate.py index ebc7d49..efdb718 100644 --- a/ruck/functional/_mate.py +++ b/ruck/functional/_mate.py @@ -35,11 +35,12 @@ # ========================================================================= # +F = TypeVar('F') T = TypeVar('T') MateFnHint = Callable[[T, T], Tuple[T, T]] -def check_mating(fn): +def check_mating(fn: F) -> F: @wraps(fn) def wrapper(value_a: T, value_b: T, *args, **kwargs) -> Tuple[T, T]: mated_a, mated_b = fn(value_a, value_b, *args, **kwargs) diff --git a/ruck/functional/_mutate.py b/ruck/functional/_mutate.py index ea8b8b6..f493982 100644 --- a/ruck/functional/_mutate.py +++ b/ruck/functional/_mutate.py @@ -34,11 +34,12 @@ # ========================================================================= # +F = TypeVar('F') T = TypeVar('T') MutateFnHint = Callable[[T], T] -def check_mutation(fn): +def check_mutation(fn: F) -> F: @wraps(fn) def wrapper(value: T, *args, **kwargs): mutated = fn(value, *args, **kwargs) @@ -58,7 +59,7 @@ def mutate_flip_bits(a: np.ndarray, p: float = 0.05) -> np.ndarray: @check_mutation -def mutate_flip_bit_types(a: np.ndarray, p: float = 0.05) -> np.ndarray: +def mutate_flip_bit_groups(a: np.ndarray, p: float = 0.05) -> np.ndarray: if np.random.random() < 0.5: # flip set bits return a ^ ((np.random.random(a.shape) < p) & a) diff --git a/ruck/functional/_select.py b/ruck/functional/_select.py index 1dfbe70..392560f 100644 --- a/ruck/functional/_select.py +++ b/ruck/functional/_select.py @@ -35,11 +35,12 @@ # ========================================================================= # +F = TypeVar('F') T = TypeVar('T') SelectFnHint = Callable[[Population[T], int], Population[T]] -def check_selection(fn): +def check_selection(fn: F) -> F: @wraps(fn) def wrapper(population: Population[T], num: int, *args, **kwargs) -> Population[T]: selected = fn(population, num, *args, **kwargs) diff --git a/ruck/util/__init__.py b/ruck/util/__init__.py index 6f42217..1a85507 100644 --- a/ruck/util/__init__.py +++ b/ruck/util/__init__.py @@ -23,9 +23,7 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -from ruck.util._timer import Timer +from ruck.util._ray import ray_map +from ruck.util._ray import ray_refs_wrapper -from ruck.util._iter import splits -from ruck.util._iter import chained -from ruck.util._ray import ray_mapped -from ruck.util._ray import ray_map_chunks +from ruck.util._timer import Timer diff --git a/ruck/util/_ray.py b/ruck/util/_ray.py index 52467f4..6c5dea2 100644 --- a/ruck/util/_ray.py +++ b/ruck/util/_ray.py @@ -43,7 +43,7 @@ def _to_remote_ray_fn(fn): return fn -def ray_mapped(ray_fn, items: Sequence[Any]) -> List[Any]: +def ray_map(ray_fn, items: Sequence[Any]) -> List[Any]: """ A more convenient alternative to `ray.util.multiprocessing.Pool`s `map` function! Using a similar API to python `map`, except returning a list of mapped values @@ -63,7 +63,7 @@ def ray_mapped(ray_fn, items: Sequence[Any]) -> List[Any]: # ========================================================================= # -def ray_refs_handler(fn = None, get: bool = True, put: bool = True, iter_results: bool = False): +def ray_refs_wrapper(fn = None, get: bool = True, put: bool = True, iter_results: bool = False): """ Wrap a function so that we automatically ray.get all the arguments and ray.put the result. @@ -78,7 +78,7 @@ def ray_refs_handler(fn = None, get: bool = True, put: bool = True, iter_results >>> return ray.put(a), ray.put(b) becomes: - >>> @ray_refs_handler(iter_results=True) + >>> @ray_refs_wrapper(iter_results=True) >>> def mate(a, b): >>> return R.mate_crossover_1d(a, b) """ From 4ff73f653f913f59ce9d245d7e1773fa3ff37400 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 02:51:21 +0200 Subject: [PATCH 14/18] readme additions --- README.md | 225 +++++++++++++++++++++++++++++++++++++ examples/onemax.py | 55 +++------ examples/onemax_minimal.py | 63 +++++++++++ examples/onemax_ray.py | 83 +++++--------- 4 files changed, 330 insertions(+), 96 deletions(-) create mode 100644 examples/onemax_minimal.py diff --git a/README.md b/README.md index 4e08483..8d3baa6 100644 --- a/README.md +++ b/README.md @@ -53,3 +53,228 @@ Please use the following citation if you use Ruck in your research: url = {https://github.com/nmichlo/ruck} } ``` + +## Overview + +Ruck takes inspiration from PyTorch Lightning's module system. The population creation, +offspring, evaluation and selection steps are all contained within a single module inheriting +from `EaModule`. While the training logic and components are separated into its own class. + +`Members` of a `Population` (A list of Members) are intended to be read-only. Modifications should not +be made to members, instead new members should be created with the modified values. This enables us to +easily implement efficient multi-threading, see below! + +The trainer automatically constructs `HallOfFame` and `LogBook` objects which keep track of your +population and offspring. `EaModule` provides defaults for `get_stats_groups` that can be overridden +if you wish to customize the tracked statistics. + + +### Minimal OneMax Example + +```python +import random +import numpy as np +from ruck import * + + +class OneMaxModule(EaModule): + """ + Minimal onemax example + - The goal is to flip all the bits of a boolean array to True + - Offspring are generated as bit flipped versions of the previous population + - Selection tournament is performed between the previous population and the offspring + """ + + # evaluate unevaluated members according to their values + def evaluate_values(self, values): + return [v.sum() for v in values] + + # generate 300 random members of size 100 with 50% bits flipped + def gen_starting_values(self): + return [np.random.random(100) < 0.5 for _ in range(300)] + + # randomly flip 5% of the bits of each each member in the population + # the previous population members should never be modified + def generate_offspring(self, population): + return [Member(m.value ^ (np.random.random(m.value.shape) < 0.05)) for m in population] + + # selection tournament between population and offspring + def select_population(self, population, offspring): + combined = population + offspring + return [max(random.sample(combined, k=3), key=lambda m: m.fitness) for _ in range(len(population))] + + +if __name__ == '__main__': + # create and train the population + module = OneMaxModule() + pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) + + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) + print('best member:', halloffame.members[0]) +``` + +### Advanced OneMax Example + +Ruck provides various helper functions and implementations of evolutionary algorithms for convenience. +The following example makes use of these additional features so that components and behaviour can +easily be swapped out. + +The three basic evolutionary algorithms provided are based around [deap's](http://www.github.com/deap/deap) +default algorithms from `deap.algorithms`: `eaSimple`, `eaMuPlusLambda`, and `eaMuCommaLambda`. These +algorithms can be accessed from `ruck.functional` which has the alias `R`: `R.factory_simple_ea`, +`R.factory_mu_plus_lambda` and `R.factory_mu_comma_lambda`. + + +
Code Example +

+ +```python +""" +OneMax serial example based on: +https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py +""" + +import functools +import numpy as np +from ruck import * + + +class OneMaxModule(EaModule): + + def __init__( + self, + population_size: int = 300, + member_size: int = 100, + p_mate: float = 0.5, + p_mutate: float = 0.5, + ): + # save the arguments to the .hparams property. values are taken from the + # local scope so modifications can be captured if the call to this is delayed. + self.save_hyperparameters() + # implement the required functions for `EaModule` + self.generate_offspring, self.select_population = R.factory_simple_ea( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), + p_mate=self.hparams.p_mate, + p_mutate=self.hparams.p_mutate, + ) + + def evaluate_values(self, values): + return map(np.sum, values) + + def gen_starting_values(self) -> Population: + return [ + np.random.random(self.hparams.member_size) < 0.5 + for i in range(self.hparams.population_size) + ] + + +if __name__ == '__main__': + # create and train the population + module = OneMaxModule(population_size=300, member_size=100) + pop, logbook, halloffame = Trainer(generations=40, progress=True).fit(module) + + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) + print('best member:', halloffame.members[0]) +``` + +

+
+ +### Multithreading OneMax Example (Ray) + +If we need to scale up the computational requirements, for example requiring increased +member and population sizes, the above serial implementations will soon run into performance problems. + +The basic Ruck implementations of various evolutionary algorithms are designed around a `map` +function that can be swapped out to add multi-threading support. We can easily do this using +[ray](https://github.com/ray-project/ray) and we even provide various helper functions that +enhance ray support. + +1. We begin by placing member's values into shared memory using ray's read-only object store +and the `ray.put` function. These [ObjectRef's](https://docs.ray.io/en/latest/memory-management.html) +values point to the original `np.ndarray` values. When retrieved with `ray.get` they obtain the original +arrays using an efficient zero-copy procedure. This is advantageous over something like Python's multiprocessing module which uses +expensive pickle operations to pass data around. + +2. The second step is to swap out the aforementioned `map` function in the previous example to a +multiprocessing equivalent. We provide the `ray_map` function that can be used instead, which +automatically wraps functions using `ray.remote`, and provides additional benefits when using `ObjectRef`s. + +3. Finally we need to update our `mate` and `mutate` functions to handle `ObjectRef`s, we provide a convenient +wrapper to automatically call `ray.get` on function arguments and `ray.out` on function results so that +you can re-use your existing code. + +
Code Example +

+ +```python +""" +OneMax parallel example using ray's object store. + +8 bytes * 1_000_000 * 128 members ~= 128 MB of memory to store this population. +This is quite a bit of processing that needs to happen! But using ray +and its object store we can do this efficiently! +""" + +from functools import partial +import numpy as np +import ray +from ruck import * +from ruck.util import * + + +class OneMaxRayModule(EaModule): + + def __init__( + self, + population_size: int = 300, + member_size: int = 100, + p_mate: float = 0.5, + p_mutate: float = 0.5, + ): + self.save_hyperparameters() + # implement the required functions for `EaModule` + # - decorate the functions with `ray_refs_wrapper` which + # automatically `ray.get` arguments and `ray.put` returned results + self.generate_offspring, self.select_population = R.factory_simple_ea( + mate_fn=ray_refs_wrapper(R.mate_crossover_1d, iter_results=True), + mutate_fn=ray_refs_wrapper(partial(R.mutate_flip_bit_groups, p=0.05)), + select_fn=partial(R.select_tournament, k=3), # OK to compute locally, because we only look at the fitness + p_mate=self.hparams.p_mate, + p_mutate=self.hparams.p_mutate, + map_fn=ray_map, # specify the map function to enable multiprocessing + ) + + def evaluate_values(self, values): + # values is a list of `ray.ObjectRef`s not `np.ndarray`s + # ray_map automatically converts np.sum to a `ray.remote` function which + # automatically handles `ray.get`ing of `ray.ObjectRef`s passed as arguments + return ray_map(np.sum, values) + + def gen_starting_values(self): + # generate objects and place in ray's object store + return [ + ray.put(np.random.random(self.hparams.member_size) < 0.5) + for i in range(self.hparams.population_size) + ] + + +if __name__ == '__main__': + # initialize ray to use the specified system resources + ray.init() + + # create and train the population + module = OneMaxRayModule(population_size=128, member_size=1_000_000) + pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) + + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) + print('best member:', halloffame.members[0]) +``` + +

+
diff --git a/examples/onemax.py b/examples/onemax.py index 3e3c891..20321c1 100644 --- a/examples/onemax.py +++ b/examples/onemax.py @@ -22,32 +22,18 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -import functools -import logging -from typing import Any -from typing import List +""" +OneMax serial example based on: +https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py +""" +import functools import numpy as np - -from ruck import EaModule -from ruck import Population -from ruck import R -from ruck import Trainer -from ruck.util import Timer - - -# ========================================================================= # -# Module # -# ========================================================================= # +from ruck import * class OneMaxModule(EaModule): - # trick pycharm overrides error checking against `EaModule` - # it doesn't like that we set the values in the constructor! - generate_offspring = None - select_population = None - def __init__( self, population_size: int = 300, @@ -61,15 +47,13 @@ def __init__( # implement the required functions for `EaModule` self.generate_offspring, self.select_population = R.factory_simple_ea( mate_fn=R.mate_crossover_1d, - mutate_fn=functools.partial(R.mutate_flip_bits, p=0.05), - select_fn=functools.partial(R.select_tournament, k=3), # tools.selNSGA2 + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), p_mate=self.hparams.p_mate, p_mutate=self.hparams.p_mutate, ) - def evaluate_values(self, values: List[Any]): - # this is a large reason why the deap version is slow, - # it does not make use of numpy operations + def evaluate_values(self, values): return map(np.sum, values) def gen_starting_values(self) -> Population: @@ -78,25 +62,12 @@ def gen_starting_values(self) -> Population: for i in range(self.hparams.population_size) ] -# ========================================================================= # -# Main # -# ========================================================================= # - if __name__ == '__main__': - # about 15x faster than deap's numpy onemax example (0.17s vs 2.6s) - # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py - - logging.basicConfig(level=logging.INFO) - - with Timer('ruck:trainer'): - module = OneMaxModule(population_size=300, member_size=100) - pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + # create and train the population + module = OneMaxModule(population_size=300, member_size=100) + pop, logbook, halloffame = Trainer(generations=40, progress=True).fit(module) print('initial stats:', logbook[0]) print('final stats:', logbook[-1]) - - -# ========================================================================= # -# END # -# ========================================================================= # + print('best member:', halloffame.members[0]) diff --git a/examples/onemax_minimal.py b/examples/onemax_minimal.py new file mode 100644 index 0000000..7a21372 --- /dev/null +++ b/examples/onemax_minimal.py @@ -0,0 +1,63 @@ +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +import random +import numpy as np +from ruck import * + + +class OneMaxModule(EaModule): + """ + Minimal onemax example + - The goal is to flip all the bits of a boolean array to True + - Offspring are generated as bit flipped versions of the previous population + - Selection tournament is performed between the previous population and the offspring + """ + + # evaluate unevaluated members according to their values + def evaluate_values(self, values): + return [v.sum() for v in values] + + # generate 300 random members of size 100 with 50% bits flipped + def gen_starting_values(self): + return [np.random.random(100) < 0.5 for _ in range(300)] + + # randomly flip 5% of the bits of each each member in the population + # the previous population members should never be modified + def generate_offspring(self, population): + return [Member(m.value ^ (np.random.random(m.value.shape) < 0.05)) for m in population] + + # selection tournament between population and offspring + def select_population(self, population, offspring): + combined = population + offspring + return [max(random.sample(combined, k=3), key=lambda m: m.fitness) for _ in range(len(population))] + + +if __name__ == '__main__': + # create and train the population + module = OneMaxModule() + pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) + + print('initial stats:', logbook[0]) + print('final stats:', logbook[-1]) + print('best member:', halloffame.members[0]) diff --git a/examples/onemax_ray.py b/examples/onemax_ray.py index 7671335..d2691fd 100644 --- a/examples/onemax_ray.py +++ b/examples/onemax_ray.py @@ -22,36 +22,22 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +""" +OneMax parallel example using ray's object store. -import functools -import logging -from typing import List +8 bytes * 1_000_000 * 128 members ~= 128 MB of memory to store this population. +This is quite a bit of processing that needs to happen! But using ray +and its object store we can do this efficiently! +""" +from functools import partial import numpy as np -import psutil import ray -from ray import ObjectRef +from ruck import * +from ruck.util import * -from ruck import EaModule -from ruck import Population -from ruck import R -from ruck import Trainer -from ruck.util import ray_mapped -from ruck.util import Timer -from ruck.util._ray import ray_refs_handler - -# ========================================================================= # -# Module # -# ========================================================================= # - - -class OneMaxRayModule(EaModule[ObjectRef]): - - # trick pycharm overrides error checking against `EaModule` - # it doesn't like that we set the values in the constructor! - generate_offspring = None - select_population = None +class OneMaxRayModule(EaModule): def __init__( self, @@ -60,52 +46,41 @@ def __init__( p_mate: float = 0.5, p_mutate: float = 0.5, ): - # save the arguments to the .hparams property. values are taken from the - # local scope so modifications can be captured if the call to this is delayed. self.save_hyperparameters() # implement the required functions for `EaModule` + # - decorate the functions with `ray_refs_wrapper` which + # automatically `ray.get` arguments and `ray.put` returned results self.generate_offspring, self.select_population = R.factory_simple_ea( - mate_fn=ray_refs_handler(R.mate_crossover_1d, iter_results=True), - mutate_fn=ray_refs_handler(functools.partial(R.mutate_flip_bits, p=0.05)), - select_fn=functools.partial(R.select_tournament, k=3), # tools.selNSGA2 + mate_fn=ray_refs_wrapper(R.mate_crossover_1d, iter_results=True), + mutate_fn=ray_refs_wrapper(partial(R.mutate_flip_bit_groups, p=0.05)), + select_fn=partial(R.select_tournament, k=3), # OK to compute locally, because we only look at the fitness p_mate=self.hparams.p_mate, p_mutate=self.hparams.p_mutate, - map_fn=ray_mapped, + map_fn=ray_map, # specify the map function to enable multiprocessing ) - def evaluate_values(self, values: List[ObjectRef]) -> List[float]: - # this is a large reason why the deap version is slow, - # it does not make use of numpy operations - return ray_mapped(np.sum, values) + def evaluate_values(self, values): + # values is a list of `ray.ObjectRef`s not `np.ndarray`s + # ray_map automatically converts np.sum to a `ray.remote` function which + # automatically handles `ray.get`ing of `ray.ObjectRef`s passed as arguments + return ray_map(np.sum, values) - def gen_starting_values(self) -> Population[ObjectRef]: + def gen_starting_values(self): + # generate objects and place in ray's object store return [ ray.put(np.random.random(self.hparams.member_size) < 0.5) for i in range(self.hparams.population_size) ] -# ========================================================================= # -# Main # -# ========================================================================= # - - if __name__ == '__main__': - # about 15x faster than deap's numpy onemax example (0.17s vs 2.6s) - # -- https://github.com/DEAP/deap/blob/master/examples/ga/onemax_numpy.py - - logging.basicConfig(level=logging.INFO) - - ray.init(num_cpus=min(psutil.cpu_count(), 16)) + # initialize ray to use the specified system resources + ray.init() - with Timer('ruck:trainer'): - module = OneMaxRayModule(population_size=512, member_size=1_000_000) - pop, logbook, halloffame = Trainer(generations=100, progress=False).fit(module) + # create and train the population + module = OneMaxRayModule(population_size=128, member_size=1_000_000) + pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) print('initial stats:', logbook[0]) print('final stats:', logbook[-1]) - - -# ========================================================================= # -# END # -# ========================================================================= # + print('best member:', halloffame.members[0]) From 37ed8e833f53b1a8b7c66b07aa054766b9b7bf6a Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 02:51:38 +0200 Subject: [PATCH 15/18] initial tests --- tests/test.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 51cacab..5eeb446 100644 --- a/tests/test.py +++ b/tests/test.py @@ -23,13 +23,43 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +import random +import numpy as np +from ruck import Member +from ruck.functional import apply_mate + + # ========================================================================= # # TESTS # # ========================================================================= # -def test_tests(): - assert True +def test_mate_keep_order(): + random.seed(77) + np.random.seed(77) + # checks + offspring = apply_mate( + population=[Member(c) for c in 'abcde'], + mate_fn=lambda a, b: (a.upper(), b.upper()), + p=0.5, + keep_order=True, + ) + # done + assert ''.join(m.value for m in offspring) == 'ABcde' + + +def test_mate_random_order(): + random.seed(77) + np.random.seed(77) + # checks + offspring = apply_mate( + population=[Member(c) for c in 'abcde'], + mate_fn=lambda a, b: (a.upper(), b.upper()), + p=0.5, + keep_order=False, + ) + # done + assert ''.join(m.value for m in offspring) == 'cdBAe' # ========================================================================= # From eb46d24a488727d4351011e6925c4b6fc6e3a818 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 03:12:08 +0200 Subject: [PATCH 16/18] delete mkdocs --- .readthedocs.yml | 19 ------------------- docs/index.md | 25 ------------------------- docs/requirements.txt | 7 ------- mkdocs.yml | 33 --------------------------------- 4 files changed, 84 deletions(-) delete mode 100644 .readthedocs.yml delete mode 100644 docs/index.md delete mode 100644 docs/requirements.txt delete mode 100644 mkdocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index cc39a88..0000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,19 +0,0 @@ -# .readthedocs.yml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -mkdocs: - configuration: mkdocs.yml - fail_on_warning: false - -# Optionally build your docs in additional formats such as PDF -formats: all - -# Optionally set the version of Python and requirements required to build your docs -python: - version: 3.8 - install: - - requirements: docs/requirements.txt diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 65a1d35..0000000 --- a/docs/index.md +++ /dev/null @@ -1,25 +0,0 @@ -# Ruck - -Performant evolutionary algorithms for Python. - -## Goals - -Ruck aims to fill the following criteria: - -1. Provide **high quality**, **readable** implementations of algorithms. -2. Be easily **extensible** and **debuggable**. -3. Performant while maintaining its simplicity. - -## Citing Ruck - -Please use the following citation if you use Ruck in your research: - -```bibtex -@Misc{Michlo2021Ruck, - author = {Nathan Juraj Michlo}, - title = {Ruck - Performant evolutionary algorithms for Python}, - howpublished = {Github}, - year = {2021}, - url = {https://github.com/nmichlo/ruck} -} -``` diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index b86d1a4..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ - -mkdocs == 1.1.2 -mkdocstrings == 0.14.0 -mkdocs-material == 6.2.5 -mkdocs-git-revision-date-localized-plugin == 0.8 -# pygments == 2.7.4 -# pymdown-extensions == 8.1 diff --git a/mkdocs.yml b/mkdocs.yml deleted file mode 100644 index f6eef57..0000000 --- a/mkdocs.yml +++ /dev/null @@ -1,33 +0,0 @@ -site_name: 🧬 Ruck Docs -repo_url: https://github.com/nmichlo/ruck -repo_name: nmichlo/ruck -theme: - name: material - palette: - scheme: default - primary: green - icon: - repo: fontawesome/brands/github - logo: material/library - favicon: images/favicon.png -plugins: - - search - - mkdocstrings # reference functions and code in markdown `::: module.class.func` - - git-revision-date-localized: # visible last edit date on each page - type: date - fallback_to_build_date: false -markdown_extensions: - - admonition - - pymdownx.details - - pymdownx.highlight - - pymdownx.inlinehilite - - pymdownx.superfences - - pymdownx.snippets - - pymdownx.tabbed - - pymdownx.arithmatex: - generic: true -# THE !! CURRENTLY BREAKS READTHEDOCS -# https://github.com/readthedocs/readthedocs.org/issues/7865 -# - pymdownx.emoji: -# emoji_index: !!python/name:materialx.emoji.twemoji -# emoji_generator: !!python/name:materialx.emoji.to_svg From 41c58272a0aad8405c2ab4ce5836b341748d6031 Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 03:12:31 +0200 Subject: [PATCH 17/18] proper tests 86% cov --- README.md | 4 +- examples/onemax_minimal.py | 4 +- ruck/_member.py | 2 +- tests/test.py | 99 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8d3baa6..4009b92 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ import numpy as np from ruck import * -class OneMaxModule(EaModule): +class OneMaxMinimalModule(EaModule): """ Minimal onemax example - The goal is to flip all the bits of a boolean array to True @@ -106,7 +106,7 @@ class OneMaxModule(EaModule): if __name__ == '__main__': # create and train the population - module = OneMaxModule() + module = OneMaxMinimalModule() pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) print('initial stats:', logbook[0]) diff --git a/examples/onemax_minimal.py b/examples/onemax_minimal.py index 7a21372..1d30f8e 100644 --- a/examples/onemax_minimal.py +++ b/examples/onemax_minimal.py @@ -26,7 +26,7 @@ from ruck import * -class OneMaxModule(EaModule): +class OneMaxMinimalModule(EaModule): """ Minimal onemax example - The goal is to flip all the bits of a boolean array to True @@ -55,7 +55,7 @@ def select_population(self, population, offspring): if __name__ == '__main__': # create and train the population - module = OneMaxModule() + module = OneMaxMinimalModule() pop, logbook, halloffame = Trainer(generations=100, progress=True).fit(module) print('initial stats:', logbook[0]) diff --git a/ruck/_member.py b/ruck/_member.py index e1a7c38..283af9a 100644 --- a/ruck/_member.py +++ b/ruck/_member.py @@ -95,7 +95,7 @@ def __repr__(self): value_str = _RE_WHITESPACE.sub(' ', repr(self.value)) # cut short if len(value_str) > 33: - value_str = f'{value_str[:14]} ... {value_str[-14:]}' + value_str = f'{value_str[:14].rstrip(" ")} ... {value_str[-14:].lstrip(" ")}' # get fitness fitness_str = f', {self.fitness}' if self.is_evaluated else '' # combine diff --git a/tests/test.py b/tests/test.py index 5eeb446..8d6f2ee 100644 --- a/tests/test.py +++ b/tests/test.py @@ -23,10 +23,16 @@ # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +import functools import random import numpy as np +import pytest + +from examples.onemax import OneMaxModule +from examples.onemax_minimal import OneMaxMinimalModule from ruck import Member -from ruck.functional import apply_mate +from ruck import Trainer +from ruck import R # ========================================================================= # @@ -38,7 +44,7 @@ def test_mate_keep_order(): random.seed(77) np.random.seed(77) # checks - offspring = apply_mate( + offspring = R.apply_mate( population=[Member(c) for c in 'abcde'], mate_fn=lambda a, b: (a.upper(), b.upper()), p=0.5, @@ -52,7 +58,7 @@ def test_mate_random_order(): random.seed(77) np.random.seed(77) # checks - offspring = apply_mate( + offspring = R.apply_mate( population=[Member(c) for c in 'abcde'], mate_fn=lambda a, b: (a.upper(), b.upper()), p=0.5, @@ -62,6 +68,93 @@ def test_mate_random_order(): assert ''.join(m.value for m in offspring) == 'cdBAe' +def test_onemax_minimal(): + module = OneMaxMinimalModule() + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + assert logbook[0]['fit:max'] < logbook[-1]['fit:max'] + + +def test_onemax(): + module = OneMaxModule(population_size=300, member_size=100) + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + assert logbook[0]['fit:max'] < logbook[-1]['fit:max'] + + +def test_onemax_ea_simple(): + module = OneMaxModule(population_size=300, member_size=100) + + # EA SIMPLE + module.generate_offspring, module.select_population = R.factory_simple_ea( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), + p_mate=module.hparams.p_mate, + p_mutate=module.hparams.p_mutate, + ) + + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + assert logbook[0]['fit:max'] < logbook[-1]['fit:max'] + + +def test_onemax_mu_plus_lambda(): + module = OneMaxModule(population_size=300, member_size=100) + + # MU PLUS LAMBDA + module.generate_offspring, module.select_population = R.factory_mu_plus_lambda( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), + offspring_num=250, + p_mate=module.hparams.p_mate, + p_mutate=module.hparams.p_mutate, + ) + + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + assert logbook[0]['fit:max'] < logbook[-1]['fit:max'] + + +def test_onemax_mu_comma_lambda(): + module = OneMaxModule(population_size=300, member_size=100) + + # MU COMMA LAMBDA + module.generate_offspring, module.select_population = R.factory_mu_comma_lambda( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), + offspring_num=250, # INVALID + p_mate=module.hparams.p_mate, + p_mutate=module.hparams.p_mutate, + ) + + with pytest.raises(AssertionError, match=r'invalid arguments, the number of offspring: 250 \(lambda\) must be greater than or equal to the size of the population: 300 \(mu\)'): + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + + # MU COMMA LAMBDA + module.generate_offspring, module.select_population = R.factory_mu_comma_lambda( + mate_fn=R.mate_crossover_1d, + mutate_fn=functools.partial(R.mutate_flip_bit_groups, p=0.05), + select_fn=functools.partial(R.select_tournament, k=3), + offspring_num=400, + p_mate=module.hparams.p_mate, + p_mutate=module.hparams.p_mutate, + ) + + pop, logbook, halloffame = Trainer(generations=40, progress=False).fit(module) + assert logbook[0]['fit:max'] < logbook[-1]['fit:max'] + + + +def test_member(): + m = Member('abc') + assert str(m) == "Member('abc')" + m = Member('abc', 0.5) + assert str(m) == "Member('abc', 0.5)" + m = Member('abc'*100, 0.5) + assert str(m) == "Member('abcabcabcabca ... cabcabcabcabc', 0.5)" + m = Member('abc '*100, 0.5) + assert str(m) == "Member('abc abc abc a ... abc abc abc ', 0.5)" + + # ========================================================================= # # END # # ========================================================================= # From 1916ae72eaa9d8869a0ac37765b3493ae80d084b Mon Sep 17 00:00:00 2001 From: Nathan Michlo Date: Sat, 25 Sep 2021 03:16:09 +0200 Subject: [PATCH 18/18] add ray requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9e5bf19..f1f65b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ pip>=21.0 numpy>=1.21.0 tqdm>=4.60.0 +ray>=1.6.0