sigstore · mihaimaruseac · Jan 16, 2025 · Jan 15, 2025 · Jan 16, 2025
@@ -0,0 +1,70 @@
+# Copyright 2025 The Sigstore Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Script for running a benchmark to pick a chunk parameter."""
+
+import argparse
+import timeit
+
+import serialize
+
+
+def build_parser() -> argparse.ArgumentParser:
+    """Builds the command line parser for the chunk experiment."""
+    parser = argparse.ArgumentParser(
+        description="chunk size benchmark data for model signing"
+    )
+
+    parser.add_argument("path", help="path to model")
+
+    parser.add_argument(
+        "--repeat",
+        help="how many times to repeat each chunk size",
+        type=int,
+        default=5,
+    )
+
+    parser.add_argument(
+        "--sizes", help="chunk sizes to benchmark", nargs="+", type=int
+    )
+
+    return parser
+
+
+def _default_sizes() -> list[int]:
+    # 0 is a special value to (attempt to) read whole files into RAM
+    # then powers of 2 between 1KB and 1GB
+
+    # https://github.com/google/pytype/issues/795
+    # pytype: disable=bad-return-type
+    return [0] + [2**i for i in range(10, 31)]
+    # pytype: enable=bad-return-type
+
+
+if __name__ == "__main__":
+    chunk_args = build_parser().parse_args()
+
+    chunk_sizes = chunk_args.sizes or _default_sizes()
+    padding = len(f"{max(chunk_sizes)}: ")
+    for chunk_size in chunk_sizes:
+        args = serialize.build_parser().parse_args(
+            [chunk_args.path, f"--chunk={chunk_size}"]
+        )
+        times = timeit.repeat(
+            lambda args=args: serialize.run(args),
+            number=1,
+            repeat=chunk_args.repeat,
+        )
+        print(f"{f'{chunk_size}: ':<{padding}}{min(times):10.4f}")
@@ -71,6 +71,8 @@ python = ["3.9", "3.10", "3.11", "3.12", "3.13"]
 description = """Custom environment for running benchmarks.
 Use `hatch run +py=3... bench:generate ${args}` to generate test models.
 Use `hatch run +py=3... bench:serialize ${args}` to benchmark serialization code.
+Use `hatch run +py=3... bench:hash ${args}` to benchmark hashing code.
+Use `hatch run +py=3... bench:chunk ${args}` to benchmark the chunk size parameter.
 """
 extra-dependencies = [
   "numpy",
@@ -83,6 +85,7 @@ python = ["3.9", "3.10", "3.11", "3.12", "3.13"]
 generate = "python benchmarks/generate.py {args}"
 serialize = "python benchmarks/serialize.py {args}"
 hash = "python benchmarks/exp_hash.py {args}"
+chunk = "python benchmarks/exp_chunk.py {args}"
 
 [tool.hatch.envs.docs]
 description = """Custom environment for pdoc.