Skip to content
This repository has been archived by the owner on Dec 24, 2019. It is now read-only.

Add scale-down-step functionality #45

Merged
merged 5 commits into from
Dec 16, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ The following command line options are supported:
Time to sleep between runs in seconds, defaults to 60 seconds.
``--once``
Only run once and exit (useful for debugging).
``--scale-down-step-fixed``
Scale down step in terms of node count, defaults to 1.
``--scale-down-step-percentage``
Scale down step in terms of node percentage (1.0 is 100%), defaults to 0%


.. _"official" cluster-autoscaler: https://github.com/kubernetes/autoscaler
Expand Down
53 changes: 46 additions & 7 deletions kube_aws_autoscaler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import collections
import itertools
import logging
import math
import os
import re
import time
Expand Down Expand Up @@ -192,7 +193,15 @@ def format_resource(value: float, resource: str):
return '{:.0f}'.format(value)


def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict):
def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict, scale_down_step_fixed: int, scale_down_step_percentage: float):
# validate scale-down-step-fixed, must be >= 1
if scale_down_step_fixed < 1:
raise ValueError('scale-down-step-fixed must be >= 1')

# validate scale-down-step-percentage, must be 0 <= value < 1
if scale_down_step_percentage < 0 or scale_down_step_percentage >= 1:
raise ValueError('scale-down-step-precentage value must be: 0 < value <= 1')

node_counts_by_asg = collections.defaultdict(int)
for key, nodes in sorted(nodes_by_asg_zone.items()):
asg_name, zone = key
Expand All @@ -201,7 +210,12 @@ def slow_down_downscale(asg_sizes: dict, nodes_by_asg_zone: dict):
for asg_name, desired_size in sorted(asg_sizes.items()):
amount_of_downscale = node_counts_by_asg[asg_name] - desired_size
if amount_of_downscale >= 2:
new_desired_size = node_counts_by_asg[asg_name] - 1
new_desired_size_fixed = node_counts_by_asg[asg_name] - scale_down_step_fixed
new_desired_size_percentage = max(desired_size, int(math.ceil((1.00 - scale_down_step_percentage) * node_counts_by_asg[asg_name])))
if new_desired_size_percentage == node_counts_by_asg[asg_name]:
new_desired_size_percentage = new_desired_size_fixed
new_desired_size = min(new_desired_size_fixed, new_desired_size_percentage)

logger.info('Slowing down downscale: changing desired size of ASG {} from {} to {}'.format(asg_name, desired_size, new_desired_size))
asg_sizes[asg_name] = new_desired_size

Expand Down Expand Up @@ -361,8 +375,10 @@ def start_health_endpoint():
app.run(host='0.0.0.0', port=5000)


def autoscale(buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: int=0,
include_master_nodes: bool=False, dry_run: bool=False, disable_scale_down: bool=False):
def autoscale(buffer_percentage: dict, buffer_fixed: dict,
scale_down_step_fixed: int, scale_down_step_percentage: float,
buffer_spare_nodes: int = 0, include_master_nodes: bool=False,
dry_run: bool=False, disable_scale_down: bool=False):
api = get_kube_api()

all_nodes = get_nodes(api, include_master_nodes)
Expand All @@ -378,7 +394,7 @@ def autoscale(buffer_percentage: dict, buffer_fixed: dict, buffer_spare_nodes: i
usage_by_asg_zone = calculate_usage_by_asg_zone(pods, nodes_by_name)
asg_size = calculate_required_auto_scaling_group_sizes(nodes_by_asg_zone, usage_by_asg_zone, buffer_percentage, buffer_fixed,
buffer_spare_nodes=buffer_spare_nodes, disable_scale_down=disable_scale_down)
asg_size = slow_down_downscale(asg_size, nodes_by_asg_zone)
asg_size = slow_down_downscale(asg_size, nodes_by_asg_zone, scale_down_step_fixed, scale_down_step_percentage)
ready_nodes_by_asg = get_ready_nodes_by_asg(nodes_by_asg_zone)
resize_auto_scaling_groups(autoscaling, asg_size, ready_nodes_by_asg, dry_run)

Expand All @@ -398,15 +414,34 @@ def main():
parser.add_argument('--enable-healthcheck-endpoint', help='Enable Healtcheck',
action='store_true')
parser.add_argument('--no-scale-down', help='Disable scaling down', action='store_true')

for resource in RESOURCES:
parser.add_argument('--buffer-{}-percentage'.format(resource), type=float,
help='{} buffer %%'.format(resource.capitalize()),
default=os.getenv('BUFFER_{}_PERCENTAGE'.format(resource.upper()), DEFAULT_BUFFER_PERCENTAGE[resource]))
parser.add_argument('--buffer-{}-fixed'.format(resource), type=str,
help='{} buffer (fixed amount)'.format(resource.capitalize()),
default=os.getenv('BUFFER_{}_FIXED'.format(resource.upper()), DEFAULT_BUFFER_FIXED[resource]))

parser.add_argument('--scale-down-step-fixed',
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add some os.getenv default (similar to the --buffer-* options) as we are configuring our downscaler via a config map (envFrom).

help='Scale down strategy expressed in terms of instances count, defaults to 1. Note: value must be >= 1',
type=int, default=os.getenv('SCALE_DOWN_STEP_FIXED', 1))
parser.add_argument('--scale-down-step-percentage',
help='Scale down strategy expressed in terms of instances count, defaults to 0.01, i.e. 1%.',
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the default is 0.0, but the help says 0.01 😏

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah... nice catch.

type=float, default=os.getenv('SCALE_DOWN_STEP_PRECENTAGE', 0.0))

args = parser.parse_args()

# validate scale-down-step values
if args.scale_down_step_fixed < 1:
msg = 'Invalid scale-down-step-fixed value: {}'.format(args.scale_down_step_fixed)
logger.exception(msg)
raise ValueError(msg)
if args.scale_down_step_percentage < 0 or args.scale_down_step_percentage > 1:
msg = 'Invalid scale-down-step-percentage value: {}'.format(args.scale_down_step_percentage)
logger.exception(msg)
raise ValueError(msg)

logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.DEBUG if args.debug else logging.INFO)
logging.getLogger('botocore').setLevel(logging.WARN)

Expand All @@ -425,8 +460,12 @@ def main():

while True:
try:
autoscale(buffer_percentage, buffer_fixed, buffer_spare_nodes=args.buffer_spare_nodes,
include_master_nodes=args.include_master_nodes, dry_run=args.dry_run, disable_scale_down=args.no_scale_down)
autoscale(buffer_percentage, buffer_fixed,
scale_down_step_fixed=args.scale_down_step_fixed,
scale_down_step_percentage=args.scale_down_step_percentage,
buffer_spare_nodes=args.buffer_spare_nodes,
include_master_nodes=args.include_master_nodes, dry_run=args.dry_run,
disable_scale_down=args.no_scale_down)
except Exception:
global Healthy
Healthy = False
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import io

from setuptools import find_packages, setup
from setuptools.command.test import test as TestCommand
Expand Down Expand Up @@ -40,7 +41,7 @@ def run_tests(self):


def readme():
return open('README.rst', encoding='utf-8').read()
return io.open('README.rst', encoding='utf-8').read()


tests_require = [
Expand Down
78 changes: 67 additions & 11 deletions tests/test_autoscaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def test_autoscale(monkeypatch):

buffer_percentage = {}
buffer_fixed = {}
autoscale(buffer_percentage, buffer_fixed, False)
autoscale(buffer_percentage, buffer_fixed, 1, 0.0, False)
boto3_client.return_value.set_desired_capacity.assert_called_with(AutoScalingGroupName='a1', DesiredCapacity=2)


Expand Down Expand Up @@ -394,7 +394,7 @@ def test_autoscale_node_without_asg(monkeypatch):

buffer_percentage = {}
buffer_fixed = {}
autoscale(buffer_percentage, buffer_fixed, False)
autoscale(buffer_percentage, buffer_fixed, 1, 0.0, False)


def test_main(monkeypatch):
Expand All @@ -405,7 +405,9 @@ def test_main(monkeypatch):
autoscale.assert_called_once_with(
{'memory': 10, 'pods': 10, 'cpu': 10},
{'memory': 209715200, 'pods': 10, 'cpu': 0.2},
buffer_spare_nodes=1, include_master_nodes=False, dry_run=True, disable_scale_down=False)
buffer_spare_nodes=1, include_master_nodes=False, dry_run=True, disable_scale_down=False,
scale_down_step_fixed=1, scale_down_step_percentage=0.0
)

autoscale.side_effect = ValueError

Expand All @@ -414,6 +416,27 @@ def test_main(monkeypatch):
with pytest.raises(Exception):
main()

def test_main_step_down(monkeypatch):
monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run', '--scale-down-step-fixed=0'])
with pytest.raises(ValueError) as err:
main()
assert 'Invalid scale-down-step-fixed value: 0' in str(err.value)

monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run', '--scale-down-step-fixed=-1'])
with pytest.raises(ValueError) as err:
main()
assert 'Invalid scale-down-step-fixed value: -1' in str(err.value)

monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run', '--scale-down-step-percentage=-0.0001'])
with pytest.raises(ValueError) as err:
main()
assert 'Invalid scale-down-step-percentage value: -0.0001' in str(err.value)

monkeypatch.setattr('sys.argv', ['foo', '--once', '--dry-run', '--scale-down-step-percentage=1.0001'])
with pytest.raises(ValueError) as err:
main()
assert 'Invalid scale-down-step-percentage value: 1.0001' in str(err.value)


def test_format_resource():
assert format_resource(1, 'cpu') == '1.0'
Expand All @@ -423,14 +446,47 @@ def test_format_resource():


def test_slow_down_downscale():
assert slow_down_downscale({}, {}) == {}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}]}) == {'a1': 1}
# scale up
assert slow_down_downscale({'a1': 2}, {('a1', 'z1'): [{}]}) == {'a1': 2}
assert slow_down_downscale({'a1': 10}, {('a1', 'z1'): [{}]}) == {'a1': 10}
# scale down
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}]}) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}]}) == {'a1': 2}
# assert invalid scale-down-step-fixed value
with pytest.raises(ValueError) as err:
slow_down_downscale({}, {}, 0, 0.0)
assert 'scale-down-step-fixed must be >= 1' in str(err.value)

# assert invalid scale-down-step-percentage (negative)
with pytest.raises(ValueError) as err:
slow_down_downscale({}, {}, 1, -1)
assert 'scale-down-step-precentage value must be: 0 < value <= 1' in str(err.value)

# assert invalid scale-down-step-percentage (greater than 100%)
with pytest.raises(ValueError) as err:
slow_down_downscale({}, {}, 1, 1.1)
assert 'scale-down-step-precentage value must be: 0 < value <= 1' in str(err.value)

# test with 1 step fixed and 0 step percentage
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}]}, 1, 0.0) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}]}, 1, 0.0) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}]}, 1, 0.0) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}]}, 1, 0.0) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.0) == {'a1': 3}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.0) == {'a1': 4}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}, {}]}, 1, 0.0) == {'a1': 5}

# test with 1 step fixed and 1% step percentage (same as above)
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}]}, 1, 0.01) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}]}, 1, 0.01) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}]}, 1, 0.01) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}]}, 1, 0.01) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.01) == {'a1': 3}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.0) == {'a1': 4}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}, {}]}, 1, 0.0) == {'a1': 5}

# test with 1 step fixed and 50% step percentage
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}]}, 1, 0.5) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}]}, 1, 0.5) == {'a1': 1}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}]}, 1, 0.5) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}]}, 1, 0.5) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.5) == {'a1': 2}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}]}, 1, 0.5) == {'a1': 3}
assert slow_down_downscale({'a1': 1}, {('a1', 'z1'): [{}, {}, {}], ('a1', 'z2'): [{}, {}, {}]}, 1, 0.5) == {'a1': 3}


def test_is_node_ready():
Expand Down