Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRDB working set tests #61

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions service_capacity_modeling/hardware/profiles/shapes/aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@
"ram_gib": 15.48,
"net_mbps": 781,
"drive": {
"name": "ephem", "size_gib": 436.5,
"name": "ephem", "size_gib": 436,
"read_io_latency_ms": {
"minimum_value":0.05,
"low":0.10, "mid":0.125, "high":0.17,
Expand All @@ -271,7 +271,7 @@
"ram_gib": 30.955,
"net_mbps": 1875,
"drive": {
"name": "ephem", "size_gib": 873.0,
"name": "ephem", "size_gib": 873,
"read_io_latency_ms": {
"minimum_value": 0.05,
"low": 0.10, "mid": 0.125, "high": 0.17,
Expand Down
4 changes: 2 additions & 2 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,11 +555,11 @@ class DataShape(ExcludeUnsetModel):

# How much fixed memory must be provisioned per instance for the
# application (e.g. for process heap memory)
reserved_instance_app_mem_gib: int = 2
reserved_instance_app_mem_gib: float = 2

# How much fixed memory must be provisioned per instance for the
# system (e.g. for kernel and other system processes)
reserved_instance_system_mem_gib: int = 1
reserved_instance_system_mem_gib: float = 1

# How durable does this dataset need to be. We want to provision
# sufficient replication and backups of data to achieve the target
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
description="Contains utilities for modeling database capacity on a cloud",
packages=setuptools.find_packages(exclude=("tests*", "notebooks*")),
install_requires=[
"pydantic",
"pydantic>=1.0,<2.0",
"scipy",
"numpy",
'importlib_resources; python_version < "3.7"',
Expand Down
69 changes: 52 additions & 17 deletions tests/netflix/test_crdb.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
from service_capacity_modeling.capacity_planner import planner
from service_capacity_modeling.hardware import shapes
from service_capacity_modeling.interface import CapacityDesires
from service_capacity_modeling.interface import DataShape
from service_capacity_modeling.interface import FixedInterval
from service_capacity_modeling.interface import Interval
from service_capacity_modeling.interface import QueryPattern
from service_capacity_modeling.models.common import working_set_from_drive_and_slo
from service_capacity_modeling.models.org.netflix import nflx_cockroachdb_capacity_model
from service_capacity_modeling.stats import dist_for_interval


def test_crdb_basic():
basic = CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
estimated_write_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
simple_desire = CapacityDesires(
service_tier=1,
query_pattern=QueryPattern(
estimated_read_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
data_shape=DataShape(
estimated_state_size_gib=Interval(
low=10, mid=100, high=1000, confidence=0.98
),
estimated_write_per_second=Interval(
low=100, mid=1000, high=10000, confidence=0.98
),
)
),
data_shape=DataShape(
estimated_state_size_gib=Interval(low=10, mid=100, high=1000, confidence=0.98),
),
)


def test_crdb_simple():
plan = planner.plan(
model_name="org.netflix.cockroachdb",
region="us-east-1",
desires=basic,
desires=simple_desire,
)

lr = plan.least_regret[0]
Expand All @@ -44,6 +47,38 @@ def test_crdb_basic():
assert lr_cluster.count * lr_cluster.instance.cpu >= 4


def test_crdb_working_set():
ephem = shapes.region("us-east-1").instances["i4i.xlarge"].drive
ebs = shapes.region("us-east-1").drives["gp3"]
super_slow_drive = ebs.copy(deep=True)
# Simulate a very slow drive
super_slow_drive.name = "slow"
super_slow_drive.read_io_latency_ms = FixedInterval(
low=5, mid=8, high=20, confidence=0.9
)

latency_sensitive = nflx_cockroachdb_capacity_model.default_desires(
simple_desire, {}
)
results = {}
for drive in (ephem, ebs, super_slow_drive):
working_set = working_set_from_drive_and_slo(
drive_read_latency_dist=dist_for_interval(drive.read_io_latency_ms),
read_slo_latency_dist=dist_for_interval(
latency_sensitive.query_pattern.read_latency_slo_ms
),
estimated_working_set=None,
# CRDB has looser latency SLOs but we still want a lot of the data
# hot in cache. Target the 95th percentile of disk latency to
# keep in RAM.
target_percentile=0.95,
).mid
results[drive.name] = working_set
assert results["ephem"] < 0.05
assert results["gp3"] < 0.05
assert results["slow"] > 0.5


def test_crdb_footprint():
space = CapacityDesires(
service_tier=1,
Expand Down