remove EBS from the suggestions

Netflix-Skunkworks · Nov 5, 2024 · c17944e · c17944e
1 parent 5734f51
commit c17944e
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 26 deletions.
diff --git a/service_capacity_modeling/models/org/netflix/cassandra.py b/service_capacity_modeling/models/org/netflix/cassandra.py
@@ -211,12 +211,12 @@ def _estimate_cassandra_cluster_zonal(
     desires: CapacityDesires,
     zones_per_region: int = 3,
     copies_per_region: int = 3,
-    require_local_disks: bool = False,
+    require_local_disks: bool = True,
     require_attached_disks: bool = False,
     required_cluster_size: Optional[int] = None,
     max_rps_to_disk: int = 500,
-    max_local_disk_gib: int = 2048,
-    max_regional_size: int = 96,
+    max_local_disk_gib: int = 5120,
+    max_regional_size: int = 192,
     max_write_buffer_percent: float = 0.25,
     max_table_buffer_percent: float = 0.11,
 ) -> Optional[CapacityPlan]:
@@ -462,7 +462,7 @@ class NflxCassandraArguments(BaseModel):
         " this will be deduced from durability and consistency desires",
     )
     require_local_disks: bool = Field(
-        default=False,
+        default=True,
         description="If local (ephemeral) drives are required",
     )
     require_attached_disks: bool = Field(
@@ -478,11 +478,11 @@ class NflxCassandraArguments(BaseModel):
         description="How many disk IOs should be allowed to hit disk per instance",
     )
     max_regional_size: int = Field(
-        default=96,
+        default=192,
         description="What is the maximum size of a cluster in this region",
     )
     max_local_disk_gib: int = Field(
-        default=2048,
+        default=5120,
         description="The maximum amount of data we store per machine",
     )
     max_write_buffer_percent: float = Field(
@@ -513,7 +513,7 @@ def capacity_plan(
             desires, extra_model_arguments.get("copies_per_region", None)
         )
         require_local_disks: bool = extra_model_arguments.get(
-            "require_local_disks", False
+            "require_local_disks", True
         )
         require_attached_disks: bool = extra_model_arguments.get(
             "require_attached_disks", False
@@ -522,8 +522,8 @@ def capacity_plan(
             "required_cluster_size", None
         )
         max_rps_to_disk: int = extra_model_arguments.get("max_rps_to_disk", 500)
-        max_regional_size: int = extra_model_arguments.get("max_regional_size", 96)
-        max_local_disk_gib: int = extra_model_arguments.get("max_local_disk_gib", 2048)
+        max_regional_size: int = extra_model_arguments.get("max_regional_size", 192)
+        max_local_disk_gib: int = extra_model_arguments.get("max_local_disk_gib", 5120)
         max_write_buffer_percent: float = min(
             0.5, extra_model_arguments.get("max_write_buffer_percent", 0.25)
         )

diff --git a/tests/netflix/test_cassandra.py b/tests/netflix/test_cassandra.py
@@ -13,6 +13,9 @@
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
 
+# from service_capacity_modeling.interface import CurrentClusters
+# from service_capacity_modeling.interface import CurrentZoneClusterCapacity
+
 small_but_high_qps = CapacityDesires(
     service_tier=1,
     query_pattern=QueryPattern(
@@ -92,7 +95,10 @@ def test_ebs_high_reads():
                 estimated_state_size_gib=certain_int(1_000),
             ),
         ),
-        extra_model_arguments={"require_attached_disks": True},
+        extra_model_arguments={
+            "require_attached_disks": True,
+            "require_local_disks": False,
+        },
     )[0]
     result = cap_plan.candidate_clusters.zonal[0]
 
@@ -103,8 +109,8 @@ def test_ebs_high_reads():
     # 1TiB / ~32 nodes
     assert result.attached_drives[0].read_io_per_s is not None
     ios = result.attached_drives[0].read_io_per_s * result.count
-    # Each zone is handling ~33k reads per second, so total disk ios should be < 3x that
-    # 3 from each level
+    # Each zone is handling ~33k reads per second, so total disk ios should be < 3x
+    # that 3 from each level
     assert 100_000 < ios < 400_000
 
 
@@ -123,7 +129,10 @@ def test_ebs_high_writes():
                 estimated_state_size_gib=certain_int(10_000),
             ),
         ),
-        extra_model_arguments={"require_attached_disks": True},
+        extra_model_arguments={
+            "require_attached_disks": True,
+            "require_local_disks": False,
+        },
     )[0]
     result = cap_plan.candidate_clusters.zonal[0]
 
@@ -192,15 +201,14 @@ def test_high_write_throughput():
         extra_model_arguments={"max_regional_size": 96 * 2},
     )[0]
     high_writes_result = cap_plan.candidate_clusters.zonal[0]
-    assert high_writes_result.instance.family in ("m5", "r5")
+    assert high_writes_result.instance.family not in ("m5", "r5")
     assert high_writes_result.count > 16
-
-    assert high_writes_result.attached_drives[0].size_gib >= 400
-    assert (
-        300_000
-        > high_writes_result.count * high_writes_result.attached_drives[0].size_gib
-        >= 100_000
-    )
+    # assert high_writes_result.instance.drive.size_gib >= 400
+    # assert (
+    #     300_000
+    #     > high_writes_result.count * high_writes_result.instance.drive.size_gib
+    #     >= 100_000
+    # )
 
     cluster_cost = cap_plan.candidate_clusters.annual_costs["cassandra.zonal-clusters"]
     assert 125_000 < cluster_cost < 900_000

diff --git a/tests/netflix/test_cassandra_uncertain.py b/tests/netflix/test_cassandra_uncertain.py
@@ -158,13 +158,14 @@ def test_worn_dataset():
         <= lr.candidate_clusters.annual_costs["cassandra.zonal-clusters"]
         < 1_000_000
     )
-    assert lr_cluster.instance.name.startswith(
+    assert not lr_cluster.instance.name.startswith(
         "m5."
     ) or lr_cluster.instance.name.startswith("r5.")
-    assert lr_cluster.attached_drives[0].name == "gp3"
-    # gp2 should not provision massive drives, prefer to upcolor
-    assert lr_cluster.attached_drives[0].size_gib < 9000
-    assert lr_cluster.attached_drives[0].size_gib * lr_cluster.count * 3 > 204800
+    assert len(lr_cluster.attached_drives) == 0
+    # assert lr_cluster.attached_drives[0].name == "gp3"
+    # # gp2 should not provision massive drives, prefer to upcolor
+    # assert lr_cluster.instance.drive.size_gib < 9000
+    # assert lr_cluster.instance.drive.size_gib * lr_cluster.count * 3 > 204800
     # We should have S3 backup cost
     assert lr.candidate_clusters.services[0].annual_cost > 5_000