diff --git a/.gitignore b/.gitignore index a8e86c83..37cf191c 100644 --- a/.gitignore +++ b/.gitignore @@ -169,6 +169,5 @@ version.txt actions-runner/ experiments/ -examples/ .engine/ amdsmi \ No newline at end of file diff --git a/examples/api_launch.py b/examples/api_launch.py index 987ec8c9..734dfd3c 100644 --- a/examples/api_launch.py +++ b/examples/api_launch.py @@ -4,7 +4,6 @@ from optimum_benchmark.launchers.torchrun.config import TorchrunConfig from optimum_benchmark.logging_utils import setup_logging - if __name__ == "__main__": setup_logging(level="INFO") launcher_config = TorchrunConfig(nproc_per_node=2) diff --git a/examples/fast-mteb/README.md b/examples/fast-mteb/README.md deleted file mode 100644 index fee12ad6..00000000 --- a/examples/fast-mteb/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# Optimum-Benchmark x MTEB - -A set of performance benchmarks using [`BAAI/bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5), the number one embedding model on the [`Massive Text Embedding Benchmark (MTEB) Leaderboard`](https://huggingface.co/spaces/mteb/leaderboard). - -For these benchmarks in particular, we recommend using a docker image with TensorRT intalled, for example [`nvcr.io/nvidia/tensorrt:22.12-py3`](../../docker/gpu.dockerfile) or removing the `TensorrtExecutionProvider` experiments from `configs/`. - -Just run `script.sh` from this directory: - -```bash -sh script.sh -``` - -This script will run sweeps over batch sizes and sequence lengths for each backend using the config files in `configs/`. The results of the sweeps will be saved in `experiments/`. Then run the reporting script `report.py`: - -```bash -python report.py -e experiments/ -``` - -Which will generate the plots and csv files in `artifact/`. - -## Results - -### Latency - -For latency, we consider the case of a server processing requests one at a time (i.e. batch size of 1). -We achieve 1 to 2 milliseconds latency for the forward pass of the embedding model using either `CUDAExecutionProvider` with `O4` optimization level or `TensorrtExecutionProvider` with `fp16` precision. This can be seen as a 5x to 7x speedup over the baseline pytorch model. - -
- -
- -### Throughput - -For throughput, we consider the case of a server processing requests of average length (i.e. sequence length of 256). -We achieve a throughput of +2000 samples per second for the forward pass of the embedding model at an optimal batch size of 128 using either `CUDAExecutionProvider` with `O4` optimization level or `TensorrtExecutionProvider` with `fp16` precision. This can be seen as a 7.5x increase over the baseline pytorch model. - -- -
- -### Notes - -- The `TensorrtExecutionProvider` requires engine building, which can take a few minutes during model loading and the first forward pass. It also rebuilds the engine every time the sequence length changes. This is why I think that `CUDAExecutionProvider` with `O4` optimization level is the sweetspot for GPU inference. -- Some other cases, such as processing big batches of short sequences, can demonstrate even higher speedups (~15x). We don't study them here. diff --git a/examples/fast-mteb/artifacts/forward_latency_plot.png b/examples/fast-mteb/artifacts/forward_latency_plot.png deleted file mode 100644 index c4aecc6d..00000000 Binary files a/examples/fast-mteb/artifacts/forward_latency_plot.png and /dev/null differ diff --git a/examples/fast-mteb/artifacts/forward_throughput_plot.png b/examples/fast-mteb/artifacts/forward_throughput_plot.png deleted file mode 100644 index 4be441a3..00000000 Binary files a/examples/fast-mteb/artifacts/forward_throughput_plot.png and /dev/null differ diff --git a/examples/fast-mteb/artifacts/full_report.csv b/examples/fast-mteb/artifacts/full_report.csv deleted file mode 100644 index 8a7e23cf..00000000 --- a/examples/fast-mteb/artifacts/full_report.csv +++ /dev/null @@ -1,52 +0,0 @@ -experiment_name,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.initial_isolation_check,backend.continous_isolation_check,backend.delete_cache,backend.no_weights,backend.export,backend.use_cache,backend.use_merged,backend.torch_dtype,backend.provider,backend.provider_options.trt_engine_cache_enable,backend.provider_options.trt_engine_cache_path,backend.provider_options.device_id,backend.provider_options.trt_fp16_enable,backend.use_io_binding,backend.session_options.enable_profiling,backend.optimization,backend.quantization,backend.calibration,backend.auto_optimization,backend.auto_quantization,backend.use_inference_session,backend.use_ddp,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,model,device,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.transformers_version,environment.accelerate_version,environment.diffusers_version,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,Unnamed: 0,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),backend.auto_optimization_config.disable_shape_inference,backend.device_map,backend.disable_grad,backend.eval_mode,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.bettertransformer,backend.quantization_scheme -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,128,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0584,2190.0,4612,,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0152,2110.0,4016,,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0303,2110.0,4150,,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,128,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0639,2000.0,5583,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0324,1980.0,4304,False,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00811,1970.0,3943,,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0167,1920.0,4304,False,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,256,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.137,1870.0,5514,,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00434,1840.0,3899,,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00883,1810.0,3767,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,256,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.146,1750.0,8152,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,512,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.294,1740.0,13263,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1024,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.595,1720.0,23700,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2048,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,1.2,1710.0,44135,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00486,1650.0,3767,False,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00259,1540.0,3871,,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00307,1300.0,3765,False,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00176,1140.0,3851,,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00195,1030.0,3500,False,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,16,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.000995,1010.0,3832,,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,32,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00105,952.0,3844,,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,16,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0011,909.0,3532,False,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,64,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0011,909.0,3857,,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,64,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00113,885.0,3496,False,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,32,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00114,877.0,3536,False,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00124,806.0,3869,,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00132,758.0,3498,False,,,,,,,, -bge_batch_size_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00145,690.0,3498,False,,,,,,,, -bge_batch_size_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00146,685.0,3838,,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00148,676.0,3888,,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0015,667.0,3502,False,,,,,,,, -bge_seq_len_sweep_ort_trt_fp16,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,TensorrtExecutionProvider,True,tmp/trt_cache,0,True,False,False,False,False,False,,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00197,508.0,3920,,,,,,,,, -bge_seq_len_sweep_ort_cuda_o4,onnxruntime,ort-gpu:1.15.1,optimum_benchmark.backends.onnxruntime.backend.ORTBackend,42,,,False,False,False,False,True,True,False,,CUDAExecutionProvider,,,0,,False,False,False,False,False,O4,,True,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00201,498.0,3500,False,,,,,,,, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,512,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,1.58,324.0,8328,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2048,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,6.34,323.0,29283,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,256,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.792,323.0,5107,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1024,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,3.17,323.0,15576,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,128,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.403,318.0,3494,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.204,314.0,2689,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0537,298.0,2083,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.109,294.0,2286,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0302,265.0,1995,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0155,258.0,1936,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00901,222.0,1902,,,True,True,False,,False,False, -bge_batch_size_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00649,154.0,1890,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,32,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00669,149.0,1883,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00687,146.0,1890,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,64,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00689,145.0,1885,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00689,145.0,1888,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,16,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.00729,137.0,1881,,,True,True,False,,False,False, -bge_seq_len_sweep_baseline,pytorch,2.0.1,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,,,,,,,,,,,,,,,,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,,False,False,BAAI/bge-base-en-v1.5,cuda,feature-extraction,main,,False,False,1.13.0,4.34.0.dev0,0.23.0.dev0,0.21.0.dev0,3.8.10,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0,0.0096,104.0,1906,,,True,True,False,,False,False, diff --git a/examples/fast-mteb/artifacts/rich_table.svg b/examples/fast-mteb/artifacts/rich_table.svg deleted file mode 100644 index a43cab78..00000000 --- a/examples/fast-mteb/artifacts/rich_table.svg +++ /dev/null @@ -1,479 +0,0 @@ - diff --git a/examples/fast-mteb/artifacts/short_report.csv b/examples/fast-mteb/artifacts/short_report.csv deleted file mode 100644 index c6255ea7..00000000 --- a/examples/fast-mteb/artifacts/short_report.csv +++ /dev/null @@ -1,52 +0,0 @@ -experiment_name,Backend,Provider,Batch Size,Sequence Length,Forward Latency (s),Forward Throughput (samples/s) -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,16,0.000995,1010.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,16,0.0011,909.0 -bge_seq_len_sweep_baseline,PyTorch,,1,16,0.00729,137.0 -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,32,0.00105,952.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,32,0.00114,877.0 -bge_seq_len_sweep_baseline,PyTorch,,1,32,0.00669,149.0 -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,64,0.0011,909.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,64,0.00113,885.0 -bge_seq_len_sweep_baseline,PyTorch,,1,64,0.00689,145.0 -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,128,0.00124,806.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,128,0.00132,758.0 -bge_seq_len_sweep_baseline,PyTorch,,1,128,0.00689,145.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,256,0.00145,690.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,256,0.00146,685.0 -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,256,0.00148,676.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,256,0.0015,667.0 -bge_batch_size_sweep_baseline,PyTorch,,1,256,0.00649,154.0 -bge_seq_len_sweep_baseline,PyTorch,,1,256,0.00687,146.0 -bge_seq_len_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,1,512,0.00197,508.0 -bge_seq_len_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1,512,0.00201,498.0 -bge_seq_len_sweep_baseline,PyTorch,,1,512,0.0096,104.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,2,256,0.00176,1140.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,2,256,0.00195,1030.0 -bge_batch_size_sweep_baseline,PyTorch,,2,256,0.00901,222.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,4,256,0.00259,1540.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,4,256,0.00307,1300.0 -bge_batch_size_sweep_baseline,PyTorch,,4,256,0.0155,258.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,8,256,0.00434,1840.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,8,256,0.00486,1650.0 -bge_batch_size_sweep_baseline,PyTorch,,8,256,0.0302,265.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,16,256,0.00811,1970.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,16,256,0.00883,1810.0 -bge_batch_size_sweep_baseline,PyTorch,,16,256,0.0537,298.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,32,256,0.0152,2110.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,32,256,0.0167,1920.0 -bge_batch_size_sweep_baseline,PyTorch,,32,256,0.109,294.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,64,256,0.0303,2110.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,64,256,0.0324,1980.0 -bge_batch_size_sweep_baseline,PyTorch,,64,256,0.204,314.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,128,256,0.0584,2190.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,128,256,0.0639,2000.0 -bge_batch_size_sweep_baseline,PyTorch,,128,256,0.403,318.0 -bge_batch_size_sweep_ort_trt_fp16,OnnxRuntime,TensorrtExecutionProvider,256,256,0.137,1870.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,256,256,0.146,1750.0 -bge_batch_size_sweep_baseline,PyTorch,,256,256,0.792,323.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,512,256,0.294,1740.0 -bge_batch_size_sweep_baseline,PyTorch,,512,256,1.58,324.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,1024,256,0.595,1720.0 -bge_batch_size_sweep_baseline,PyTorch,,1024,256,3.17,323.0 -bge_batch_size_sweep_ort_cuda_o4,OnnxRuntime,CUDAExecutionProvider,2048,256,1.2,1710.0 -bge_batch_size_sweep_baseline,PyTorch,,2048,256,6.34,323.0 diff --git a/examples/fast-mteb/configs/bge_base_config.yaml b/examples/fast-mteb/configs/bge_base_config.yaml deleted file mode 100644 index 96b60263..00000000 --- a/examples/fast-mteb/configs/bge_base_config.yaml +++ /dev/null @@ -1,31 +0,0 @@ -defaults: - - benchmark: inference # default benchmark - - experiment # inheriting from experiment config - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging - -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - job: - chdir: true - env_set: - CUDA_VISIBLE_DEVICES: 0 - -experiment_name: bge_seq_len_sweep_pytorch -model: BAAI/bge-base-en-v1.5 -task: feature-extraction -device: cuda - -backend: - initial_isolation_check: false - continous_isolation_check: false - -benchmark: - memory: true - input_shapes: - batch_size: null - sequence_length: null diff --git a/examples/fast-mteb/configs/bge_batch_size_sweep_baseline.yaml b/examples/fast-mteb/configs/bge_batch_size_sweep_baseline.yaml deleted file mode 100644 index 6fd462aa..00000000 --- a/examples/fast-mteb/configs/bge_batch_size_sweep_baseline.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: - - backend: pytorch - - bge_base_config - - _self_ - -hydra: - sweeper: - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: 256 - -experiment_name: bge_batch_size_sweep_baseline diff --git a/examples/fast-mteb/configs/bge_batch_size_sweep_ort_cuda_o4.yaml b/examples/fast-mteb/configs/bge_batch_size_sweep_ort_cuda_o4.yaml deleted file mode 100644 index 8d496384..00000000 --- a/examples/fast-mteb/configs/bge_batch_size_sweep_ort_cuda_o4.yaml +++ /dev/null @@ -1,10 +0,0 @@ -defaults: - - bge_batch_size_sweep_baseline - - _self_ - - override backend: onnxruntime - -experiment_name: bge_batch_size_sweep_ort_cuda_o4 - -backend: - provider: CUDAExecutionProvider - auto_optimization: O4 diff --git a/examples/fast-mteb/configs/bge_batch_size_sweep_ort_trt_fp16.yaml b/examples/fast-mteb/configs/bge_batch_size_sweep_ort_trt_fp16.yaml deleted file mode 100644 index 743daa4d..00000000 --- a/examples/fast-mteb/configs/bge_batch_size_sweep_ort_trt_fp16.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - bge_batch_size_sweep_baseline - - _self_ - - override backend: onnxruntime - -experiment_name: bge_batch_size_sweep_ort_trt_fp16 - -backend: - provider: TensorrtExecutionProvider - provider_options: - trt_fp16_enable: true diff --git a/examples/fast-mteb/configs/bge_seq_len_sweep_baseline.yaml b/examples/fast-mteb/configs/bge_seq_len_sweep_baseline.yaml deleted file mode 100644 index 3c0efe2d..00000000 --- a/examples/fast-mteb/configs/bge_seq_len_sweep_baseline.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: - - backend: pytorch - - bge_base_config - - _self_ - -hydra: - sweeper: - params: - benchmark.input_shapes.batch_size: 1 - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - -experiment_name: bge_seq_len_sweep_baseline diff --git a/examples/fast-mteb/configs/bge_seq_len_sweep_ort_cuda_o4.yaml b/examples/fast-mteb/configs/bge_seq_len_sweep_ort_cuda_o4.yaml deleted file mode 100644 index d9a053b2..00000000 --- a/examples/fast-mteb/configs/bge_seq_len_sweep_ort_cuda_o4.yaml +++ /dev/null @@ -1,10 +0,0 @@ -defaults: - - bge_seq_len_sweep_baseline - - _self_ - - override backend: onnxruntime - -experiment_name: bge_seq_len_sweep_ort_cuda_o4 - -backend: - provider: CUDAExecutionProvider - auto_optimization: O4 diff --git a/examples/fast-mteb/configs/bge_seq_len_sweep_ort_trt_fp16.yaml b/examples/fast-mteb/configs/bge_seq_len_sweep_ort_trt_fp16.yaml deleted file mode 100644 index 191d6614..00000000 --- a/examples/fast-mteb/configs/bge_seq_len_sweep_ort_trt_fp16.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - bge_seq_len_sweep_baseline - - _self_ - - override backend: onnxruntime - -experiment_name: bge_seq_len_sweep_ort_trt_fp16 - -backend: - provider: TensorrtExecutionProvider - provider_options: - trt_fp16_enable: true diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml deleted file mode 100644 index 4a0d3af7..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 7a1a42b2..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '0' - num: 0 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/hydra_config.yaml deleted file mode 100644 index a8374b0b..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/inference_results.csv deleted file mode 100644 index 64e5148b..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1)_sequence_length(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00649,154.0,1890 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml deleted file mode 100644 index dfa93908..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1024 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml deleted file mode 100644 index 6245d526..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1024 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1024,benchmark.input_shapes.sequence_length=256 - id: '10' - num: 10 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml deleted file mode 100644 index 874d4c51..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1024 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml deleted file mode 100644 index 176cbe80..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1024 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/inference_results.csv deleted file mode 100644 index 13937065..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(1024)_sequence_length(256)/10/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,3.17,323.0,15576 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml deleted file mode 100644 index dcf20c08..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml deleted file mode 100644 index 0af85235..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=128 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=128,benchmark.input_shapes.sequence_length=256 - id: '7' - num: 7 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml deleted file mode 100644 index 459be207..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=128 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/hydra_config.yaml deleted file mode 100644 index 967d4978..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/inference_results.csv deleted file mode 100644 index f1aea2af..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(128)_sequence_length(256)/7/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.403,318.0,3494 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index d1415b57..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index ee269ee2..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 9e742a69..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=16 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index c77066b1..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index d5d39469..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(16)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0537,298.0,2083 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml deleted file mode 100644 index e7e7cc9d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 03514e6a..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=256 - id: '1' - num: 1 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml deleted file mode 100644 index bf46a09f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/hydra_config.yaml deleted file mode 100644 index 6526e0c2..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/inference_results.csv deleted file mode 100644 index 1326276e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2)_sequence_length(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00901,222.0,1902 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml deleted file mode 100644 index 5d10c040..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2048 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml deleted file mode 100644 index eaa83b80..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2048 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2048,benchmark.input_shapes.sequence_length=256 - id: '11' - num: 11 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml deleted file mode 100644 index c0074884..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2048 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml deleted file mode 100644 index a0d85fbe..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2048 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/inference_results.csv deleted file mode 100644 index 12f5a323..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(2048)_sequence_length(256)/11/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,6.34,323.0,29283 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml deleted file mode 100644 index 6df59894..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml deleted file mode 100644 index 047bec6d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=256 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=256,benchmark.input_shapes.sequence_length=256 - id: '8' - num: 8 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml deleted file mode 100644 index 827e72de..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=256 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/hydra_config.yaml deleted file mode 100644 index 1480d48d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/inference_results.csv deleted file mode 100644 index 91898ab3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(256)_sequence_length(256)/8/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.792,323.0,5107 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml deleted file mode 100644 index 64249ed0..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml deleted file mode 100644 index 8fc2114f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=32 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=32,benchmark.input_shapes.sequence_length=256 - id: '5' - num: 5 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml deleted file mode 100644 index 3e2c3635..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=32 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/hydra_config.yaml deleted file mode 100644 index d0d51aaf..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/inference_results.csv deleted file mode 100644 index a0c77f7c..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(32)_sequence_length(256)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.109,294.0,2286 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml deleted file mode 100644 index 4a4c4aa5..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml deleted file mode 100644 index a43fdac6..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4,benchmark.input_shapes.sequence_length=256 - id: '2' - num: 2 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml deleted file mode 100644 index 3c675c88..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=4 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/hydra_config.yaml deleted file mode 100644 index b2ccf7d5..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/inference_results.csv deleted file mode 100644 index 54f6d8d9..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(4)_sequence_length(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0155,258.0,1936 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml deleted file mode 100644 index 6e4fd05a..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml deleted file mode 100644 index 456b45d7..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=512 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=512,benchmark.input_shapes.sequence_length=256 - id: '9' - num: 9 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml deleted file mode 100644 index 830422b4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=512 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/hydra_config.yaml deleted file mode 100644 index d0416dc0..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/inference_results.csv deleted file mode 100644 index e0ede21a..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(512)_sequence_length(256)/9/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,1.58,324.0,8328 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml deleted file mode 100644 index 078ebee2..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml deleted file mode 100644 index 4d33e000..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=64 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=64,benchmark.input_shapes.sequence_length=256 - id: '6' - num: 6 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml deleted file mode 100644 index 31aa8a55..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=64 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/hydra_config.yaml deleted file mode 100644 index 78e0093d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/inference_results.csv deleted file mode 100644 index af286bcb..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(64)_sequence_length(256)/6/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.204,314.0,2689 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml deleted file mode 100644 index 812a7865..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml deleted file mode 100644 index 783acfbc..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8,benchmark.input_shapes.sequence_length=256 - id: '3' - num: 3 - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 854c6355..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=8 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/hydra_config.yaml deleted file mode 100644 index 511e3988..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/inference_results.csv deleted file mode 100644 index 202cc59b..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(8)_sequence_length(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0302,265.0,1995 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index 8ff2f6cb..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,242 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_batch_size_sweep_baseline.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml deleted file mode 100644 index 07de7b09..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 77b4d0d4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '0' - num: 0 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/hydra_config.yaml deleted file mode 100644 index 8a12a256..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/inference_results.csv deleted file mode 100644 index 095db4b2..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00145,690.0,3498 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml deleted file mode 100644 index 7d772929..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1024 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml deleted file mode 100644 index c836f4da..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1024 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1024,benchmark.input_shapes.sequence_length=256 - id: '10' - num: 10 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml deleted file mode 100644 index 874d4c51..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1024 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml deleted file mode 100644 index 1c9acd28..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1024 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/inference_results.csv deleted file mode 100644 index 7bdbdf81..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(1024)_sequence_length(256)/10/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.595,1720.0,23700 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml deleted file mode 100644 index 58a6fccd..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml deleted file mode 100644 index 34efa3ac..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=128 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=128,benchmark.input_shapes.sequence_length=256 - id: '7' - num: 7 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml deleted file mode 100644 index 459be207..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=128 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/hydra_config.yaml deleted file mode 100644 index b7c8696e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/inference_results.csv deleted file mode 100644 index 52458f7e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(128)_sequence_length(256)/7/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0639,2000.0,5583 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index 6d8ae445..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index c91dd995..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 9e742a69..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=16 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index d346a163..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index 1324094c..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(16)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00883,1810.0,3767 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml deleted file mode 100644 index 60bc9d90..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 2f2b897e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=256 - id: '1' - num: 1 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml deleted file mode 100644 index bf46a09f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/hydra_config.yaml deleted file mode 100644 index 5e598330..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/inference_results.csv deleted file mode 100644 index 56c0418f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2)_sequence_length(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00195,1030.0,3500 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml deleted file mode 100644 index 427d25f3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2048 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml deleted file mode 100644 index 3376a3de..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2048 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2048,benchmark.input_shapes.sequence_length=256 - id: '11' - num: 11 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml deleted file mode 100644 index c0074884..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2048 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml deleted file mode 100644 index ec0027cf..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2048 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/inference_results.csv deleted file mode 100644 index 3eed0c11..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(2048)_sequence_length(256)/11/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,1.2,1710.0,44135 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml deleted file mode 100644 index 1be484cb..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml deleted file mode 100644 index 357b41c3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=256 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=256,benchmark.input_shapes.sequence_length=256 - id: '8' - num: 8 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml deleted file mode 100644 index 827e72de..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=256 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/hydra_config.yaml deleted file mode 100644 index a0a13d98..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/inference_results.csv deleted file mode 100644 index b90bccca..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(256)_sequence_length(256)/8/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.146,1750.0,8152 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml deleted file mode 100644 index 5244ec01..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml deleted file mode 100644 index 69faa12f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=32 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=32,benchmark.input_shapes.sequence_length=256 - id: '5' - num: 5 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml deleted file mode 100644 index 3e2c3635..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=32 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/hydra_config.yaml deleted file mode 100644 index c7c3b724..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/inference_results.csv deleted file mode 100644 index 516c144b..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(32)_sequence_length(256)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0167,1920.0,4304 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml deleted file mode 100644 index bc7a3904..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml deleted file mode 100644 index 5d8bbff3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4,benchmark.input_shapes.sequence_length=256 - id: '2' - num: 2 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml deleted file mode 100644 index 3c675c88..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=4 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/hydra_config.yaml deleted file mode 100644 index 8865a904..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/inference_results.csv deleted file mode 100644 index 433d02f7..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(4)_sequence_length(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00307,1300.0,3765 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml deleted file mode 100644 index edbe07af..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml deleted file mode 100644 index 0ed92d8f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=512 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=512,benchmark.input_shapes.sequence_length=256 - id: '9' - num: 9 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml deleted file mode 100644 index 830422b4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=512 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/hydra_config.yaml deleted file mode 100644 index 794322d8..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/inference_results.csv deleted file mode 100644 index ab8286c9..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(512)_sequence_length(256)/9/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.294,1740.0,13263 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml deleted file mode 100644 index 422a64d4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml deleted file mode 100644 index b9bdeb5e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=64 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=64,benchmark.input_shapes.sequence_length=256 - id: '6' - num: 6 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml deleted file mode 100644 index 31aa8a55..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=64 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/hydra_config.yaml deleted file mode 100644 index 79d08b36..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/inference_results.csv deleted file mode 100644 index 2d63eb62..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(64)_sequence_length(256)/6/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0324,1980.0,4304 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml deleted file mode 100644 index a20b60b7..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml deleted file mode 100644 index 5a81e9e5..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8,benchmark.input_shapes.sequence_length=256 - id: '3' - num: 3 - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 854c6355..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=8 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/hydra_config.yaml deleted file mode 100644 index 28e33900..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/inference_results.csv deleted file mode 100644 index 82681477..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(8)_sequence_length(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00486,1650.0,3767 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index 62776c29..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,252 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_batch_size_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml deleted file mode 100644 index 7bfc99bc..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 988960a4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '0' - num: 0 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/hydra_config.yaml deleted file mode 100644 index a61f1d86..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/inference_results.csv deleted file mode 100644 index 1eb6aa3d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00146,685.0,3838 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml deleted file mode 100644 index e8b256a7..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml deleted file mode 100644 index 103379e5..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=128 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=128,benchmark.input_shapes.sequence_length=256 - id: '7' - num: 7 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml deleted file mode 100644 index 459be207..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=128 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/hydra_config.yaml deleted file mode 100644 index 45419696..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/inference_results.csv deleted file mode 100644 index 9bf5e5ed..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(128)_sequence_length(256)/7/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0584,2190.0,4612 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index 3d72d184..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index a316954a..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 9e742a69..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=16 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index 7db739c3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index 25531fcb..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(16)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00811,1970.0,3943 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml deleted file mode 100644 index 89eddd9d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 7835eaef..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=256 - id: '1' - num: 1 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml deleted file mode 100644 index bf46a09f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/hydra_config.yaml deleted file mode 100644 index ee4705e6..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/inference_results.csv deleted file mode 100644 index 66368e46..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(2)_sequence_length(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00176,1140.0,3851 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml deleted file mode 100644 index 9ae4cf91..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml deleted file mode 100644 index a8e2e54d..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=256 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=256,benchmark.input_shapes.sequence_length=256 - id: '8' - num: 8 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml deleted file mode 100644 index 827e72de..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=256 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/hydra_config.yaml deleted file mode 100644 index 06530c91..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 256 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/inference_results.csv deleted file mode 100644 index e0c9802a..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(256)_sequence_length(256)/8/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.137,1870.0,5514 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml deleted file mode 100644 index 9bbdc30e..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml deleted file mode 100644 index a5555aae..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=32 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=32,benchmark.input_shapes.sequence_length=256 - id: '5' - num: 5 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml deleted file mode 100644 index 3e2c3635..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=32 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/hydra_config.yaml deleted file mode 100644 index 7d8e18de..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/inference_results.csv deleted file mode 100644 index e20af000..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(32)_sequence_length(256)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0152,2110.0,4016 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml deleted file mode 100644 index 7e63f163..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml deleted file mode 100644 index 3c839db0..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4,benchmark.input_shapes.sequence_length=256 - id: '2' - num: 2 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml deleted file mode 100644 index 3c675c88..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=4 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/hydra_config.yaml deleted file mode 100644 index a64e8e87..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/inference_results.csv deleted file mode 100644 index 0cd91e11..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(4)_sequence_length(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00259,1540.0,3871 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml deleted file mode 100644 index ccee80c1..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml deleted file mode 100644 index f2b834d4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=512 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=512,benchmark.input_shapes.sequence_length=256 - id: '9' - num: 9 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml deleted file mode 100644 index 830422b4..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=512 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/hydra_config.yaml deleted file mode 100644 index 909ded7f..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(512)_sequence_length(256)/9/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 512 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml deleted file mode 100644 index 1d37b256..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml deleted file mode 100644 index bd1eac69..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=64 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=64,benchmark.input_shapes.sequence_length=256 - id: '6' - num: 6 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml deleted file mode 100644 index 31aa8a55..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=64 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/hydra_config.yaml deleted file mode 100644 index 2618f579..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/inference_results.csv deleted file mode 100644 index 54978095..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(64)_sequence_length(256)/6/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0303,2110.0,4150 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml deleted file mode 100644 index b85ddcb6..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml deleted file mode 100644 index d0b65270..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8,benchmark.input_shapes.sequence_length=256 - id: '3' - num: 3 - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 854c6355..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=8 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/hydra_config.yaml deleted file mode 100644 index 9b73cf99..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/inference_results.csv deleted file mode 100644 index 286e93b3..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(8)_sequence_length(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00434,1840.0,3899 diff --git a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index 7ead4d1b..00000000 --- a/examples/fast-mteb/experiments/bge_batch_size_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,253 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128,256,512,1024,2048 - benchmark.input_shapes.sequence_length: '256' - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_batch_size_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_batch_size_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml deleted file mode 100644 index 0ccfe4de..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml deleted file mode 100644 index 161f2c19..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=128 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=128 - id: '3' - num: 3 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml deleted file mode 100644 index 1c55c407..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=128 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/hydra_config.yaml deleted file mode 100644 index 5f2a4904..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/inference_results.csv deleted file mode 100644 index 732c490f..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(128)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00689,145.0,1888 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml deleted file mode 100644 index 478a8d82..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml deleted file mode 100644 index f6f55836..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=16 - id: '0' - num: 0 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml deleted file mode 100644 index 7d89e6a3..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=16 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/hydra_config.yaml deleted file mode 100644 index 4562d4f4..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/inference_results.csv deleted file mode 100644 index 9057696a..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(16)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00729,137.0,1881 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index 6a6fc649..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index 8995d1aa..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index c14f63a6..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index b2d73b86..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00687,146.0,1890 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml deleted file mode 100644 index 05f11981..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml deleted file mode 100644 index 3b60c1e3..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=32 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=32 - id: '1' - num: 1 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml deleted file mode 100644 index f47df8c7..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=32 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/hydra_config.yaml deleted file mode 100644 index 7c4a5cfb..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/inference_results.csv deleted file mode 100644 index 209fabad..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(32)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00669,149.0,1883 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml deleted file mode 100644 index b0dbc122..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml deleted file mode 100644 index 31e76c7e..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=512 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=512 - id: '5' - num: 5 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml deleted file mode 100644 index a262b1b5..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=512 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/hydra_config.yaml deleted file mode 100644 index fad0e265..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/inference_results.csv deleted file mode 100644 index c4cd6554..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(512)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0096,104.0,1906 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml deleted file mode 100644 index e700bc5f..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml deleted file mode 100644 index 1d8e5207..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=64 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=64 - id: '2' - num: 2 - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedder/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml deleted file mode 100644 index cba39030..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=64 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/hydra_config.yaml deleted file mode 100644 index 99404dfd..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/hydra_config.yaml +++ /dev/null @@ -1,69 +0,0 @@ -backend: - name: pytorch - version: 2.0.1 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/inference_results.csv deleted file mode 100644 index 3ed7fd6d..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(1)_sequence_length(64)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00689,145.0,1885 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index d4c2c139..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_baseline_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,242 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_seq_len_sweep_baseline - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedder - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedder/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - device_map: null - torch_dtype: null - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_baseline -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml deleted file mode 100644 index f8432893..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml deleted file mode 100644 index c7a9d347..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=128 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=128 - id: '3' - num: 3 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml deleted file mode 100644 index 1c55c407..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=128 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/hydra_config.yaml deleted file mode 100644 index 63bbae2b..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/inference_results.csv deleted file mode 100644 index dcc94eaa..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(128)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00132,758.0,3498 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml deleted file mode 100644 index 64afa779..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml deleted file mode 100644 index 7726101f..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=16 - id: '0' - num: 0 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml deleted file mode 100644 index 7d89e6a3..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=16 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/hydra_config.yaml deleted file mode 100644 index 407f267d..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/inference_results.csv deleted file mode 100644 index 74272d3d..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(16)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0011,909.0,3532 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index acfac450..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index c8780806..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index adaffc10..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index 3ef35c01..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0015,667.0,3502 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml deleted file mode 100644 index 645481ab..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml deleted file mode 100644 index 5f21f5a0..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=32 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=32 - id: '1' - num: 1 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml deleted file mode 100644 index f47df8c7..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=32 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/hydra_config.yaml deleted file mode 100644 index 2cbedd4a..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/inference_results.csv deleted file mode 100644 index 15a2e38c..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(32)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00114,877.0,3536 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml deleted file mode 100644 index a247a6a7..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml deleted file mode 100644 index 5ff5e201..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=512 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=512 - id: '5' - num: 5 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml deleted file mode 100644 index a262b1b5..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=512 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/hydra_config.yaml deleted file mode 100644 index 05f00cb8..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/inference_results.csv deleted file mode 100644 index 120e5f03..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(512)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00201,498.0,3500 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml deleted file mode 100644 index 3a245689..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml deleted file mode 100644 index 3025fc1d..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=64 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=64 - id: '2' - num: 2 - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml deleted file mode 100644 index cba39030..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=64 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/hydra_config.yaml deleted file mode 100644 index 606f06c6..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: 0 - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: - disable_shape_inference: false - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/inference_results.csv deleted file mode 100644 index e8e30549..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(1)_sequence_length(64)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00113,885.0,3496 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index a1ea59cd..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_cuda_o4_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,252 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_seq_len_sweep_ort_cuda_o4.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: CUDAExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: O4 - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_cuda_o4 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml deleted file mode 100644 index d4b6b5d4..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml deleted file mode 100644 index c5333f4d..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=128 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=128 - id: '3' - num: 3 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml deleted file mode 100644 index 1c55c407..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=128 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/hydra_config.yaml deleted file mode 100644 index e40d334f..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/inference_results.csv deleted file mode 100644 index 6cff36d1..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(128)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00124,806.0,3869 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml deleted file mode 100644 index 4134efb9..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml deleted file mode 100644 index e6886a78..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=16 - id: '0' - num: 0 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml deleted file mode 100644 index 7d89e6a3..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=16 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/hydra_config.yaml deleted file mode 100644 index 01b97ae8..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 16 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/inference_results.csv deleted file mode 100644 index 003987a6..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(16)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.000995,1010.0,3832 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml deleted file mode 100644 index b35d8efc..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml deleted file mode 100644 index 515c308b..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=256 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=256 - id: '4' - num: 4 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml deleted file mode 100644 index 68453b03..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=256 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/hydra_config.yaml deleted file mode 100644 index fa81af8c..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/inference_results.csv deleted file mode 100644 index 06f60d6f..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00148,676.0,3888 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml deleted file mode 100644 index 430baf34..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml deleted file mode 100644 index b2a9e143..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=32 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=32 - id: '1' - num: 1 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml deleted file mode 100644 index f47df8c7..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=32 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/hydra_config.yaml deleted file mode 100644 index 42dbfbb1..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 32 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/inference_results.csv deleted file mode 100644 index 57b1a868..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(32)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00105,952.0,3844 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml deleted file mode 100644 index adbde47a..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml deleted file mode 100644 index cb6168be..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=512 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=512 - id: '5' - num: 5 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml deleted file mode 100644 index a262b1b5..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=512 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/hydra_config.yaml deleted file mode 100644 index 2a5170bd..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/inference_results.csv deleted file mode 100644 index cc6a0421..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(512)/5/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.00197,508.0,3920 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml deleted file mode 100644 index 1cfd0f78..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml deleted file mode 100644 index 7d5c355b..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,175 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - benchmark.input_shapes.sequence_length=64 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=64 - id: '2' - num: 2 - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/fast-embedders/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2 - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml deleted file mode 100644 index cba39030..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- benchmark.input_shapes.sequence_length=64 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/hydra_config.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/hydra_config.yaml deleted file mode 100644 index ad3cfaeb..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/hydra_config.yaml +++ /dev/null @@ -1,82 +0,0 @@ -backend: - name: onnxruntime - version: ort-gpu:1.15.1 - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - trt_engine_cache_enable: true - trt_engine_cache_path: tmp/trt_cache - device_id: 0 - trt_fp16_enable: true - use_io_binding: false - session_options: - enable_profiling: false - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: true - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 64 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: false - can_generate: false - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/inference_results.csv b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/inference_results.csv deleted file mode 100644 index 26698cd4..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(1)_sequence_length(64)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB) -0,0.0011,909.0,3857 diff --git a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml b/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml deleted file mode 100644 index f58dc5f7..00000000 --- a/examples/fast-mteb/experiments/bge_seq_len_sweep_ort_trt_fp16_batch_size(None)_sequence_length(None)/multirun.yaml +++ /dev/null @@ -1,253 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - sweep: - dir: experiments/${experiment_name}_batch_size(${benchmark.input_shapes.batch_size})_sequence_length(${benchmark.input_shapes.sequence_length}) - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: '1' - benchmark.input_shapes.sequence_length: 16,32,64,128,256,512 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: bge_seq_len_sweep_ort_trt_fp16.yaml - env_set: - CUDA_VISIBLE_DEVICES: '3' - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/fast-embedders - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/fast-embedders/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: onnxruntime - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: onnxruntime - version: ${onnxruntime_version:} - _target_: optimum_benchmark.backends.onnxruntime.backend.ORTBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: false - continous_isolation_check: false - delete_cache: false - no_weights: false - export: true - use_cache: true - use_merged: false - torch_dtype: null - provider: TensorrtExecutionProvider - provider_options: - device_id: ${infer_device_id:${device}} - trt_fp16_enable: true - use_io_binding: ${io_bind:${device}} - session_options: - enable_profiling: ${is_profiling:${benchmark.name}} - optimization: false - optimization_config: {} - quantization: false - quantization_config: {} - calibration: false - calibration_config: {} - auto_optimization: null - auto_optimization_config: {} - auto_quantization: null - auto_quantization_config: {} - use_inference_session: ${is_inference:${benchmark.name}} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: null - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: null - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bge_seq_len_sweep_ort_trt_fp16 -model: BAAI/bge-base-en-v1.5 -device: cuda -task: feature-extraction -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.0 - transformers_version: 4.34.0.dev0 - accelerate_version: 0.23.0.dev0 - diffusers_version: 0.21.0.dev0 - python_version: 3.8.10 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/fast-mteb/report.py b/examples/fast-mteb/report.py deleted file mode 100644 index 750e824f..00000000 --- a/examples/fast-mteb/report.py +++ /dev/null @@ -1,222 +0,0 @@ -from argparse import ArgumentParser -from pathlib import Path - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from flatten_dict import flatten -from omegaconf import OmegaConf -from pandas import DataFrame -from rich.console import Console -from rich.table import Table -from rich.terminal_theme import MONOKAI - - -def gather_inference_report(root_folder: Path) -> DataFrame: - # key is path to inference file as string, value is dataframe - inference_dfs = { - f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/inference_results.csv") - } - - # key is path to config file as string, value is flattened dict - config_dfs = { - f.parent.absolute() - .as_posix(): pd.DataFrame.from_dict(flatten(OmegaConf.load(f), reducer="dot"), orient="index") - .T - for f in root_folder.glob("**/hydra_config.yaml") - if f.parent.absolute().as_posix() in inference_dfs.keys() - } - - if len(inference_dfs) == 0 or len(config_dfs) == 0: - raise ValueError(f"No results found in {root_folder}") - - # Merge inference and config dataframes - inference_reports = [ - config_dfs[name].merge(inference_dfs[name], left_index=True, right_index=True) for name in inference_dfs.keys() - ] - - # Concatenate all reports - inference_report = pd.concat(inference_reports, axis=0, ignore_index=True) - inference_report.set_index("experiment_name", inplace=True) - return inference_report - - -def style_element(element, style=""): - if style: - return f"[{style}]{element}[/{style}]" - else: - return element - - -def format_element(element, style=""): - if isinstance(element, float): - if element != element: # nan - formated_element = "" - elif abs(element) >= 1: - formated_element = f"{element:.2f}" - elif abs(element) > 1e-6: - formated_element = f"{element:.2e}" - else: - formated_element = f"{element}" - elif element is None: - formated_element = "" - elif isinstance(element, bool): - if element: - formated_element = style_element("✔", style="green") - else: - formated_element = style_element("✘", style="red") - else: - formated_element = str(element) - - return style_element(formated_element, style=style) - - -def format_row(row, style=""): - formated_row = [] - for element in row: - formated_row.append(format_element(element, style=style)) - return formated_row - - -def get_short_report(inference_report): - short_columns = { - "backend.name": "Backend", - "backend.provider": "Provider", - "benchmark.input_shapes.batch_size": "Batch Size", - "benchmark.input_shapes.sequence_length": "Sequence Length", - "forward.latency(s)": "Forward Latency (s)", - "forward.throughput(samples/s)": "Forward Throughput (samples/s)", - } - short_report = ( - inference_report[list(short_columns.keys())] - .rename(columns=short_columns) - .sort_values(by=["Batch Size", "Sequence Length"], ascending=True) - ) - - short_report["Backend"] = short_report["Backend"].str.replace("pytorch", "PyTorch") - short_report["Backend"] = short_report["Backend"].str.replace("onnxruntime", "OnnxRuntime") - - return short_report - - -def get_rich_table(short_report): - # create rich table - rich_table = Table(show_header=True, show_lines=True) - # we add a column for the index - rich_table.add_column("Experiment Name", justify="left", header_style="") - # we populate the table with values - for column in short_report.columns: - rich_table.add_column(column, justify="right", header_style="bold") - # we add rows - for index, row in short_report.iterrows(): - rich_table.add_row(index, *format_row(row.values, style="")) - - return rich_table - - -def get_throughput_plot(short_report): - fig1, ax1 = plt.subplots() - fig2, ax2 = plt.subplots() - - short_report["Forward Latency (ms)"] = short_report["Forward Latency (s)"] * 1000 - short_report["Backend"] = short_report[["Backend", "Provider"]].apply( - lambda x: f"{x.iloc[0]}+{x.iloc[1]}" if x.iloc[1] == x.iloc[1] else f"{x.iloc[0]}", axis=1 - ) - - width = 0.3 - n_backends = len(short_report["Backend"].unique()) - for i, backend in enumerate(short_report["Backend"].unique(), start=-n_backends // 2): - # for latency, we study the case of batch size 1 across all sequence lengths - backend_report = ( - short_report[(short_report["Backend"] == backend) & (short_report["Batch Size"] == 1)] - .drop_duplicates(subset=["Sequence Length"]) - .sort_values(by="Sequence Length", ascending=True) - ) - seq_lens_axis = np.arange(backend_report["Sequence Length"].nunique()) + width * i - ax1.bar( - seq_lens_axis, - backend_report["Forward Latency (ms)"], - width=width, - label=backend, - ) - - # for throughput, we study the case of sequence length 256 across all batch sizes - backend_report = ( - short_report[ - (short_report["Backend"] == backend) - & (short_report["Sequence Length"] == 256) - & (short_report["Batch Size"] <= 256) - ] - .drop_duplicates(subset=["Batch Size"]) - .sort_values(by="Batch Size", ascending=True) - ) - ax2.plot( - backend_report["Batch Size"], - backend_report["Forward Throughput (samples/s)"], - label=backend, - marker="o", - ) - - ax1.legend() - ax1.set_xlabel("Sequence Length") - ax1.set_ylabel("Forward Latency (ms)") - ax1.set_title("Forward Latency per Sequence Length") - ax1.set_xticks(np.arange(len(short_report["Sequence Length"].unique()))) - ax1.set_xticklabels(short_report["Sequence Length"].unique()) - ax1.axhline(y=1, color="black", linestyle="--") - ax1.axhline(y=2, color="red", linestyle="--") - - ax2.legend() - ax2.set_xlabel("Batch Size") - ax2.set_ylabel("Forward Throughput (samples/s)") - ax2.set_title("Forward Throughput per Batch Size") - - return fig1, fig2 - - -def generate_report(): - parser = ArgumentParser() - parser.add_argument( - "--experiments", - "-e", - type=Path, - required=True, - help="The folder containing the results of experiments.", - ) - parser.add_argument( - "--report-name", - "-r", - type=str, - required=False, - help="The name of the report.", - ) - - args = parser.parse_args() - experiments_folders = args.experiments - - if args.report_name: - report_folder = f"artifacts/{args.report_name}" - else: - report_folder = "artifacts" - Path(report_folder).mkdir(parents=True, exist_ok=True) - - # gather experiments results - inference_report = gather_inference_report(experiments_folders) - inference_report.sort_values(by="forward.throughput(samples/s)", ascending=False, inplace=True) - inference_report.to_csv(f"{report_folder}/full_report.csv") - - short_report = get_short_report(inference_report) - short_report.to_csv(f"{report_folder}/short_report.csv") - - rich_table = get_rich_table(short_report) - console = Console(record=True) - console.print(rich_table, justify="center") - console.save_svg(f"{report_folder}/rich_table.svg", theme=MONOKAI, title="Inference Report") - - forward_latency_plot, forward_throughput_plot = get_throughput_plot(short_report) - forward_latency_plot.savefig(f"{report_folder}/forward_latency_plot.png") - forward_throughput_plot.savefig(f"{report_folder}/forward_throughput_plot.png") - - -if __name__ == "__main__": - generate_report() diff --git a/examples/fast-mteb/script.sh b/examples/fast-mteb/script.sh deleted file mode 100644 index ef7bf418..00000000 --- a/examples/fast-mteb/script.sh +++ /dev/null @@ -1,7 +0,0 @@ -for f in configs/*.yaml; do - if [ "$f" = "configs/bge_base_config.yaml" ]; then - # skip - continue - fi - optimum-benchmark --config-dir configs --config-name $(basename $f .json) -m -done diff --git a/examples/openvino_diffusion.yaml b/examples/openvino_diffusion.yaml index f9f62e64..b7b325f0 100644 --- a/examples/openvino_diffusion.yaml +++ b/examples/openvino_diffusion.yaml @@ -7,10 +7,9 @@ defaults: - override hydra/job_logging: colorlog # colorful logging - override hydra/hydra_logging: colorlog # colorful logging -model: stabilityai/stable-diffusion-2-1 - backend: device: cpu + model: stabilityai/stable-diffusion-2-1 experiment_name: openvino_diffusion reshape: true export: true diff --git a/examples/running-llamas/README.md b/examples/running-llamas/README.md deleted file mode 100644 index f4c9036c..00000000 --- a/examples/running-llamas/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# Optimum-Benchmark x LLaMA - -A set of benchmarks on Meta's LLaMA2's inference. - -## Setup - -You will need to install any necessary third-party libraries like `deepspeed` or `auto-gptq` depending on the hardware and benchmarks you want to run. - -For example running FlashAttentionV2 on two devices with Tensor Parallelism (i.e. `fp16+fa2+tp=2`) will require: `deepspeed` and `flash-attn` - -## Running - -Then run the benchmarks from this directory with: - -```bash -optimum-benchmark --config-dir configs/ --config-name fp16 --multirun -optimum-benchmark --config-dir configs/ --config-name fp16+fa2+tp=2 --multirun -[...] -``` - -This will create a folder called `experiments` with the results of the benchmarks with an inference `batch_size` ranging from 1 to 128 and an input `sequence_length` (prompt size) of 256. - -## Reporting - -To create a report for 7B models on A100-80GB, run: - -```bash -python report.py -e experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/ experiments/hf-dgx-01/TheBloke/LLaMa-7B-GPTQ/ -r artifacts/Llama-7b/ -python report.py -e experiments/hf-dgx-01/NousResearch/Llama-2-13b-hf/ experiments/hf-dgx-01/TheBloke/LLaMa-13B-GPTQ/ -r artifacts/Llama-13b/ -python report.py -e experiments/hf-dgx-01/NousResearch/Llama-2-65b-hf/ experiments/hf-dgx-01/TheBloke/LLaMa-65B-GPTQ/ -r artifacts/Llama-65b/ -``` - -Which will create some quick reporting artifacts like a `full_report.csv`, `short_report.csv`, and some interesting analysis plots. - - -## Results - -### LLaMA-7B on A100-80GB - -- -
- -- -
- -### LLaMA-13B on A100-80GB - -- -
- -- -
- -### LLaMA-65B on A100-80GB - -- -
- -- -
\ No newline at end of file diff --git a/examples/running-llamas/artifacts/Llama-13b/decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-13b/decode_throughput_bar_plot.png deleted file mode 100644 index 9c8f6b34..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/decode_throughput_line_plot.png b/examples/running-llamas/artifacts/Llama-13b/decode_throughput_line_plot.png deleted file mode 100644 index fa374c90..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/decode_throughput_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/full_report.csv b/examples/running-llamas/artifacts/Llama-13b/full_report.csv deleted file mode 100644 index 313d3b63..00000000 --- a/examples/running-llamas/artifacts/Llama-13b/full_report.csv +++ /dev/null @@ -1,31 +0,0 @@ -,launcher.name,launcher._target_,launcher.start_method,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.continuous_isolation,backend.isolation_check_interval,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.eval_mode,backend.disable_grad,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.to_bettertransformer,backend.use_flash_attention_2,backend.quantization_scheme,backend.data_parallel,backend.deepspeed_inference,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.num_return_sequences,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,experiment_name,model,task,device,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB),backend.quantization_config.exllama_config.version -0,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0543,18.4,28034,28034,26512,26562,17.1,29.9,17.0,30.1,29420,29420,27506,27946, -1,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.206,19.4,29265,29265,27297,27793,17.8,115.0,17.6,116.0,53638,53638,31284,52164, -2,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.52,21.1,39721,39721,34617,38249,42.6,385.0,41.1,398.0,68242,68242,66527,84414, -3,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.363,22.0,30728,30728,28342,29257,20.3,202.0,19.9,205.0,79217,79217,36318,84401, -4,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.707,22.6,33732,33732,30433,32260,28.2,290.0,27.5,297.0,67049,67049,46388,84422, -5,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.106,18.9,28497,28497,26773,27025,17.1,59.9,17.0,60.1,32882,32882,28762,31409, -6,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0508,19.7,28023,28023,26512,26552,15.4,33.2,15.3,33.4,29409,29409,27506,27936, -7,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.194,20.6,29248,29248,27297,27776,16.7,123.0,16.5,124.0,53583,53583,31284,52110, -8,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.43,22.4,39562,39562,34617,38090,42.6,385.0,41.2,397.0,68075,68075,66524,84422, -9,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.348,23.0,30689,30689,28342,29217,20.3,202.0,20.0,204.0,79317,79317,36318,84418, -10,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.678,23.6,33652,33652,30433,32180,28.1,292.0,27.4,298.0,67211,67211,46387,84408, -11,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.101,19.8,28493,28493,26773,27021,16.2,63.2,16.1,63.5,32857,32857,28762,31383, -12,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0518,19.3,28023,28023,26512,26552,15.8,32.4,15.7,32.5,29409,29409,27506,27936, -13,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.194,20.6,29246,29246,27297,27774,16.6,123.0,16.4,125.0,53583,53583,31284,52110, -14,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.44,22.2,39583,39583,34617,38111,42.6,385.0,41.2,397.0,68473,68473,66524,84418, -15,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.353,22.7,30684,30684,28342,29213,20.2,203.0,19.8,206.0,79320,79320,36318,84416, -16,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.697,23.0,33650,33650,30433,32178,28.0,293.0,27.3,299.0,67523,67523,46387,84412, -17,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-13b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.101,19.8,28493,28493,26773,27021,16.2,63.2,16.1,63.5,32857,32857,28762,31383, -18,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0772,13.0,10300,10300,8714,8824,18.4,27.8,18.3,27.9,11684,11684,9708,10208,2 -19,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.208,19.2,11542,11542,9499,10066,18.8,109.0,18.6,110.0,35894,35894,13487,34418,2 -20,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.49,21.5,21998,21998,16820,20522,59.1,277.0,57.6,284.0,50767,50767,48729,84418,2 -21,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.382,20.9,13005,13005,10545,11530,20.3,202.0,19.9,205.0,42821,42821,18521,84370,2 -22,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.749,21.4,16120,16120,12636,14644,32.6,251.0,31.9,256.0,30456,30456,28590,84420,2 -23,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.122,16.4,10772,10772,8976,9296,18.6,55.1,18.5,55.2,15145,15145,10965,13669,2 -24,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0683,14.6,9392,9392,7802,7916,19.2,26.7,19.1,26.8,10776,10776,8796,9300,1 -25,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.177,22.6,10621,10621,8586,9145,25.5,80.3,25.3,80.8,34992,34992,12574,33516,1 -26,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.21,26.4,21079,21079,15907,19604,54.3,302.0,53.1,308.0,67565,67565,47817,84418,1 -27,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.328,24.4,12085,12085,9632,10609,32.8,125.0,32.5,126.0,41900,41900,17608,84389,1 -28,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.733,21.8,15088,15088,11723,13612,40.0,205.0,39.3,208.0,85422,85422,27678,84418,1 -29,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-13B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.106,18.9,9851,9851,8063,8376,18.4,55.7,18.3,55.8,14197,14197,10052,12721,1 diff --git a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_bar_plot.png b/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_bar_plot.png deleted file mode 100644 index e93e3e4d..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_line_plot.png b/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_line_plot.png deleted file mode 100644 index 8b1327ad..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_allocated_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_bar_plot.png b/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_bar_plot.png deleted file mode 100644 index 80b9dbd6..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_line_plot.png b/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_line_plot.png deleted file mode 100644 index 12a57525..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/generate_max_memory_reserved_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/peak_decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-13b/peak_decode_throughput_bar_plot.png deleted file mode 100644 index 65a4ae32..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/peak_decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/prefill_latency_bar_plot.png b/examples/running-llamas/artifacts/Llama-13b/prefill_latency_bar_plot.png deleted file mode 100644 index 903dc1b6..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/prefill_latency_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/prefill_latency_line_plot.png b/examples/running-llamas/artifacts/Llama-13b/prefill_latency_line_plot.png deleted file mode 100644 index 4d47c480..00000000 Binary files a/examples/running-llamas/artifacts/Llama-13b/prefill_latency_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-13b/short_report.csv b/examples/running-llamas/artifacts/Llama-13b/short_report.csv deleted file mode 100644 index 27e5a935..00000000 --- a/examples/running-llamas/artifacts/Llama-13b/short_report.csv +++ /dev/null @@ -1,31 +0,0 @@ -,Model,GPUs,Experiment Name,Per Process Batch Size,Sequence Length,Decode Latency (s),Prefill Latency (s),Decode Throughput (tokens/s),Prefill Throughput (samples/s),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),GPU Name,Num GPUs,Effective Batch Size,Group -0,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,1,256,17.0,0.0543,30.1,18.4,27506,27946,1xA100,1,1,1xA100-fp16 -1,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,4,256,17.6,0.206,116.0,19.4,31284,52164,1xA100,1,4,1xA100-fp16 -2,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,32,256,41.1,1.52,398.0,21.1,66527,84414,1xA100,1,32,1xA100-fp16 -3,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,8,256,19.9,0.363,205.0,22.0,36318,84401,1xA100,1,8,1xA100-fp16 -4,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,16,256,27.5,0.707,297.0,22.6,46388,84422,1xA100,1,16,1xA100-fp16 -5,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16,2,256,17.0,0.106,60.1,18.9,28762,31409,1xA100,1,2,1xA100-fp16 -6,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,1,256,15.3,0.0508,33.4,19.7,27506,27936,1xA100,1,1,1xA100-fp16+bt -7,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,4,256,16.5,0.194,124.0,20.6,31284,52110,1xA100,1,4,1xA100-fp16+bt -8,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,32,256,41.2,1.43,397.0,22.4,66524,84422,1xA100,1,32,1xA100-fp16+bt -9,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,8,256,20.0,0.348,204.0,23.0,36318,84418,1xA100,1,8,1xA100-fp16+bt -10,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,16,256,27.4,0.678,298.0,23.6,46387,84408,1xA100,1,16,1xA100-fp16+bt -11,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,2,256,16.1,0.101,63.5,19.8,28762,31383,1xA100,1,2,1xA100-fp16+bt -12,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,1,256,15.7,0.0518,32.5,19.3,27506,27936,1xA100,1,1,1xA100-fp16+fa2 -13,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,4,256,16.4,0.194,125.0,20.6,31284,52110,1xA100,1,4,1xA100-fp16+fa2 -14,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,32,256,41.2,1.44,397.0,22.2,66524,84418,1xA100,1,32,1xA100-fp16+fa2 -15,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,8,256,19.8,0.353,206.0,22.7,36318,84416,1xA100,1,8,1xA100-fp16+fa2 -16,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,16,256,27.3,0.697,299.0,23.0,46387,84412,1xA100,1,16,1xA100-fp16+fa2 -17,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,2,256,16.1,0.101,63.5,19.8,28762,31383,1xA100,1,2,1xA100-fp16+fa2 -18,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,1,256,18.3,0.0772,27.9,13.0,9708,10208,1xA100,1,1,1xA100-fp16+gptq+exllamav2 -19,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,4,256,18.6,0.208,110.0,19.2,13487,34418,1xA100,1,4,1xA100-fp16+gptq+exllamav2 -20,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,32,256,57.6,1.49,284.0,21.5,48729,84418,1xA100,1,32,1xA100-fp16+gptq+exllamav2 -21,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,8,256,19.9,0.382,205.0,20.9,18521,84370,1xA100,1,8,1xA100-fp16+gptq+exllamav2 -22,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,16,256,31.9,0.749,256.0,21.4,28590,84420,1xA100,1,16,1xA100-fp16+gptq+exllamav2 -23,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,2,256,18.5,0.122,55.2,16.4,10965,13669,1xA100,1,2,1xA100-fp16+gptq+exllamav2 -24,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,1,256,19.1,0.0683,26.8,14.6,8796,9300,1xA100,1,1,1xA100-fp16+gptq+exllamav1 -25,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,4,256,25.3,0.177,80.8,22.6,12574,33516,1xA100,1,4,1xA100-fp16+gptq+exllamav1 -26,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,32,256,53.1,1.21,308.0,26.4,47817,84418,1xA100,1,32,1xA100-fp16+gptq+exllamav1 -27,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,8,256,32.5,0.328,126.0,24.4,17608,84389,1xA100,1,8,1xA100-fp16+gptq+exllamav1 -28,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,16,256,39.3,0.733,208.0,21.8,27678,84418,1xA100,1,16,1xA100-fp16+gptq+exllamav1 -29,TheBloke/LLaMa-13B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,2,256,18.3,0.106,55.8,18.9,10052,12721,1xA100,1,2,1xA100-fp16+gptq+exllamav1 diff --git a/examples/running-llamas/artifacts/Llama-65b/decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-65b/decode_throughput_bar_plot.png deleted file mode 100644 index 2f0547b3..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/decode_throughput_line_plot.png b/examples/running-llamas/artifacts/Llama-65b/decode_throughput_line_plot.png deleted file mode 100644 index 912bb8f1..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/decode_throughput_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/full_report.csv b/examples/running-llamas/artifacts/Llama-65b/full_report.csv deleted file mode 100644 index 7ebf49b1..00000000 --- a/examples/running-llamas/artifacts/Llama-65b/full_report.csv +++ /dev/null @@ -1,9 +0,0 @@ -,launcher.name,launcher._target_,launcher.start_method,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.continuous_isolation,backend.isolation_check_interval,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.eval_mode,backend.disable_grad,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.to_bettertransformer,backend.use_flash_attention_2,backend.quantization_scheme,backend.quantization_config.exllama_config.version,backend.data_parallel,backend.deepspeed_inference,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.num_return_sequences,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,experiment_name,model,task,device,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,2,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.348,2.87,37657,37657,36106,36182,36.3,14.1,36.0,14.2,42020,42020,39404,40544 -1,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,2,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.957,4.18,40956,40956,38279,39480,41.4,49.5,40.4,50.6,60776,60776,51488,84401 -2,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,2,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.77,4.52,44979,44979,41176,43503,67.0,61.1,65.2,62.7,84835,84835,67597,84408 -3,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,2,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.56,3.57,38916,38916,36830,37440,39.3,26.1,38.7,26.4,67076,67076,43434,65601 -4,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,1,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.355,2.82,36315,36315,34753,34819,49.1,10.4,48.7,10.5,40694,40694,38052,39197 -5,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,1,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.967,4.14,39623,39623,36926,38126,136.0,15.1,135.0,15.1,51725,51725,50136,84397 -6,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,1,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.78,4.49,43645,43645,39823,42148,139.0,29.5,137.0,29.8,83501,83501,66244,84391 -7,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,1,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-65B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.574,3.48,37582,37582,35477,36085,77.0,13.3,76.4,13.4,65743,65743,42082,64246 diff --git a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_bar_plot.png b/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_bar_plot.png deleted file mode 100644 index 8258f208..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_line_plot.png b/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_line_plot.png deleted file mode 100644 index dccd8188..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_allocated_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_bar_plot.png b/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_bar_plot.png deleted file mode 100644 index 762ea0f7..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_line_plot.png b/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_line_plot.png deleted file mode 100644 index 716aeeef..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/generate_max_memory_reserved_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/peak_decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-65b/peak_decode_throughput_bar_plot.png deleted file mode 100644 index af19789f..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/peak_decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/prefill_latency_bar_plot.png b/examples/running-llamas/artifacts/Llama-65b/prefill_latency_bar_plot.png deleted file mode 100644 index 81d1ac85..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/prefill_latency_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/prefill_latency_line_plot.png b/examples/running-llamas/artifacts/Llama-65b/prefill_latency_line_plot.png deleted file mode 100644 index de56dbba..00000000 Binary files a/examples/running-llamas/artifacts/Llama-65b/prefill_latency_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-65b/short_report.csv b/examples/running-llamas/artifacts/Llama-65b/short_report.csv deleted file mode 100644 index 943f7f5b..00000000 --- a/examples/running-llamas/artifacts/Llama-65b/short_report.csv +++ /dev/null @@ -1,9 +0,0 @@ -,Model,GPUs,Experiment Name,Per Process Batch Size,Sequence Length,Decode Latency (s),Prefill Latency (s),Decode Throughput (tokens/s),Prefill Throughput (samples/s),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),GPU Name,Num GPUs,Effective Batch Size,Group -0,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,1,256,36.0,0.348,14.2,2.87,39404,40544,1xA100,1,1,1xA100-fp16+gptq+exllamav2 -1,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,4,256,40.4,0.957,50.6,4.18,51488,84401,1xA100,1,4,1xA100-fp16+gptq+exllamav2 -2,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,8,256,65.2,1.77,62.7,4.52,67597,84408,1xA100,1,8,1xA100-fp16+gptq+exllamav2 -3,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,2,256,38.7,0.56,26.4,3.57,43434,65601,1xA100,1,2,1xA100-fp16+gptq+exllamav2 -4,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,1,256,48.7,0.355,10.5,2.82,38052,39197,1xA100,1,1,1xA100-fp16+gptq+exllamav1 -5,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,4,256,135.0,0.967,15.1,4.14,50136,84397,1xA100,1,4,1xA100-fp16+gptq+exllamav1 -6,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,8,256,137.0,1.78,29.8,4.49,66244,84391,1xA100,1,8,1xA100-fp16+gptq+exllamav1 -7,TheBloke/LLaMa-65B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,2,256,76.4,0.574,13.4,3.48,42082,64246,1xA100,1,2,1xA100-fp16+gptq+exllamav1 diff --git a/examples/running-llamas/artifacts/Llama-7b/decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-7b/decode_throughput_bar_plot.png deleted file mode 100644 index ccff3652..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/decode_throughput_line_plot.png b/examples/running-llamas/artifacts/Llama-7b/decode_throughput_line_plot.png deleted file mode 100644 index 378f1494..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/decode_throughput_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/full_report.csv b/examples/running-llamas/artifacts/Llama-7b/full_report.csv deleted file mode 100644 index abbe8b67..00000000 --- a/examples/running-llamas/artifacts/Llama-7b/full_report.csv +++ /dev/null @@ -1,36 +0,0 @@ -,launcher.name,launcher._target_,launcher.start_method,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.continuous_isolation,backend.isolation_check_interval,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.eval_mode,backend.disable_grad,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.to_bettertransformer,backend.use_flash_attention_2,backend.quantization_scheme,backend.data_parallel,backend.deepspeed_inference,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.num_return_sequences,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,experiment_name,model,task,device,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB),backend.quantization_config.exllama_config.version -0,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0336,29.8,15239,15239,13738,13767,13.2,38.8,13.2,38.7,15954,15954,14356,14480, -1,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.109,36.7,16107,16107,14295,14636,13.8,148.0,13.7,149.0,26434,26434,16774,24960, -2,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.826,38.7,23464,23464,19487,21992,27.2,602.0,26.4,619.0,53264,53264,39331,84422, -3,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.65,38.8,31911,31911,25422,30440,45.9,714.0,44.2,740.0,67584,67584,65112,84420, -4,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.21,38.1,17172,17172,15037,15701,14.1,290.0,13.9,294.0,64977,64977,19997,63503, -5,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.412,38.8,19251,19251,16520,17779,17.5,468.0,17.1,478.0,28076,28076,26442,84420, -6,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.059,33.9,15478,15478,13924,14006,13.5,75.9,13.4,76.3,17091,17091,15162,15617, -7,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0318,31.4,15239,15239,13738,13767,12.2,42.0,12.2,41.9,15954,15954,14356,14480, -8,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.104,38.5,16093,16093,14295,14621,13.1,156.0,13.0,157.0,26457,26457,16774,24983, -9,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.804,39.8,23334,23334,19487,21862,27.3,600.0,26.5,617.0,53531,53531,39331,84408, -10,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.65,38.8,31651,31651,25422,30180,46.7,702.0,45.1,725.0,66915,66915,65111,84420, -11,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.2,40.0,17143,17143,15037,15672,13.3,308.0,13.1,312.0,65229,65229,19997,63755, -12,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.4,40.0,19186,19186,16520,17714,17.4,471.0,17.0,481.0,28109,28109,26441,84410, -13,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,True,False,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+bt,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0558,35.8,15470,15470,13924,13998,12.7,80.6,12.6,81.1,17087,17087,15162,15613, -14,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0321,31.2,15239,15239,13738,13767,12.2,42.0,12.2,41.9,15954,15954,14356,14480, -15,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.104,38.5,16097,16097,14295,14625,12.7,161.0,12.6,162.0,26268,26268,16774,24794, -16,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.786,40.7,23347,23347,19487,21875,27.2,602.0,26.4,619.0,54047,54047,39331,84412, -17,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.56,41.0,31676,31676,25422,30205,46.3,708.0,44.7,732.0,67563,67563,65110,84422, -18,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.197,40.6,17139,17139,15037,15667,13.1,313.0,12.9,317.0,65229,65229,19997,63755, -19,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.392,40.8,19182,19182,16520,17710,17.2,476.0,16.8,487.0,28256,28256,26442,84404, -20,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,True,,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+fa2,NousResearch/Llama-2-7b-hf,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0548,36.5,15470,15470,13924,13998,13.0,78.8,12.9,79.2,17087,17087,15162,15613, -21,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0457,21.9,6620,6620,4991,5144,14.5,35.3,14.5,35.2,7479,7479,5609,6004,2 -22,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.122,32.8,7635,7635,5548,6159,15.1,136.0,15.0,136.0,17959,17959,8027,16483,2 -23,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.833,38.4,15184,15184,10740,13709,35.8,458.0,35.0,467.0,45304,45304,30585,84418,2 -24,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.61,39.8,23439,23439,16675,21963,53.4,614.0,51.8,631.0,59111,59111,56364,84418,2 -25,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.223,35.9,8700,8700,6289,7224,15.3,268.0,15.1,271.0,56502,56502,11250,55027,2 -26,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.425,37.6,10866,10866,7772,9391,20.5,400.0,20.1,407.0,65550,65550,17694,84385,2 -27,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,False,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav2,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0708,28.2,6745,6745,5176,5270,14.7,69.7,14.6,70.0,8696,8696,6414,7220,2 -28,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0416,24.0,5892,5892,4265,4416,14.6,35.1,14.6,35.0,6752,6752,4883,5276,1 -29,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.106,37.7,6907,6907,4822,5431,15.5,132.0,15.4,133.0,17231,17231,7301,15755,1 -30,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,32,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.688,46.5,14457,14457,10014,12981,33.0,496.0,32.3,506.0,32224,32224,29858,84420,1 -31,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,64,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],1.32,48.5,22711,22711,15949,21235,51.1,641.0,49.8,657.0,83617,83617,55638,84418,1 -32,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.191,41.9,7972,7972,5563,6496,19.6,209.0,19.4,211.0,55775,55775,10524,54299,1 -33,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.356,44.9,10137,10137,7046,8661,24.6,333.0,24.2,338.0,59378,59378,16969,84406,1 -34,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu121,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,True,True,False,,False,False,False,gptq,False,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,1,512,512,False,True,0,1,fp16+gptq+exllamav1,TheBloke/LLaMa-7B-GPTQ,text-generation,cuda,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],0.0621,32.2,6018,6018,4450,4542,14.8,69.2,14.7,69.5,7968,7968,5688,6492,1 diff --git a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_bar_plot.png b/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_bar_plot.png deleted file mode 100644 index 1df43ed8..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_line_plot.png b/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_line_plot.png deleted file mode 100644 index 1a10388a..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_allocated_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_bar_plot.png b/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_bar_plot.png deleted file mode 100644 index 7681b998..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_line_plot.png b/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_line_plot.png deleted file mode 100644 index cbb732e0..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/generate_max_memory_reserved_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/peak_decode_throughput_bar_plot.png b/examples/running-llamas/artifacts/Llama-7b/peak_decode_throughput_bar_plot.png deleted file mode 100644 index 6afda2bf..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/peak_decode_throughput_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/prefill_latency_bar_plot.png b/examples/running-llamas/artifacts/Llama-7b/prefill_latency_bar_plot.png deleted file mode 100644 index 305a84de..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/prefill_latency_bar_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/prefill_latency_line_plot.png b/examples/running-llamas/artifacts/Llama-7b/prefill_latency_line_plot.png deleted file mode 100644 index 5399864e..00000000 Binary files a/examples/running-llamas/artifacts/Llama-7b/prefill_latency_line_plot.png and /dev/null differ diff --git a/examples/running-llamas/artifacts/Llama-7b/short_report.csv b/examples/running-llamas/artifacts/Llama-7b/short_report.csv deleted file mode 100644 index bda8c65e..00000000 --- a/examples/running-llamas/artifacts/Llama-7b/short_report.csv +++ /dev/null @@ -1,36 +0,0 @@ -,Model,GPUs,Experiment Name,Per Process Batch Size,Sequence Length,Decode Latency (s),Prefill Latency (s),Decode Throughput (tokens/s),Prefill Throughput (samples/s),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),GPU Name,Num GPUs,Effective Batch Size,Group -0,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,1,256,13.2,0.0336,38.7,29.8,14356,14480,1xA100,1,1,1xA100-fp16 -1,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,4,256,13.7,0.109,149.0,36.7,16774,24960,1xA100,1,4,1xA100-fp16 -2,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,32,256,26.4,0.826,619.0,38.7,39331,84422,1xA100,1,32,1xA100-fp16 -3,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,64,256,44.2,1.65,740.0,38.8,65112,84420,1xA100,1,64,1xA100-fp16 -4,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,8,256,13.9,0.21,294.0,38.1,19997,63503,1xA100,1,8,1xA100-fp16 -5,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,16,256,17.1,0.412,478.0,38.8,26442,84420,1xA100,1,16,1xA100-fp16 -6,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,2,256,13.4,0.059,76.3,33.9,15162,15617,1xA100,1,2,1xA100-fp16 -7,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,1,256,12.2,0.0318,41.9,31.4,14356,14480,1xA100,1,1,1xA100-fp16+bt -8,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,4,256,13.0,0.104,157.0,38.5,16774,24983,1xA100,1,4,1xA100-fp16+bt -9,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,32,256,26.5,0.804,617.0,39.8,39331,84408,1xA100,1,32,1xA100-fp16+bt -10,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,64,256,45.1,1.65,725.0,38.8,65111,84420,1xA100,1,64,1xA100-fp16+bt -11,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,8,256,13.1,0.2,312.0,40.0,19997,63755,1xA100,1,8,1xA100-fp16+bt -12,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,16,256,17.0,0.4,481.0,40.0,26441,84410,1xA100,1,16,1xA100-fp16+bt -13,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+bt,2,256,12.6,0.0558,81.1,35.8,15162,15613,1xA100,1,2,1xA100-fp16+bt -14,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,1,256,12.2,0.0321,41.9,31.2,14356,14480,1xA100,1,1,1xA100-fp16+fa2 -15,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,4,256,12.6,0.104,162.0,38.5,16774,24794,1xA100,1,4,1xA100-fp16+fa2 -16,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,32,256,26.4,0.786,619.0,40.7,39331,84412,1xA100,1,32,1xA100-fp16+fa2 -17,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,64,256,44.7,1.56,732.0,41.0,65110,84422,1xA100,1,64,1xA100-fp16+fa2 -18,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,8,256,12.9,0.197,317.0,40.6,19997,63755,1xA100,1,8,1xA100-fp16+fa2 -19,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,16,256,16.8,0.392,487.0,40.8,26442,84404,1xA100,1,16,1xA100-fp16+fa2 -20,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+fa2,2,256,12.9,0.0548,79.2,36.5,15162,15613,1xA100,1,2,1xA100-fp16+fa2 -21,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,1,256,14.5,0.0457,35.2,21.9,5609,6004,1xA100,1,1,1xA100-fp16+gptq+exllamav2 -22,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,4,256,15.0,0.122,136.0,32.8,8027,16483,1xA100,1,4,1xA100-fp16+gptq+exllamav2 -23,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,32,256,35.0,0.833,467.0,38.4,30585,84418,1xA100,1,32,1xA100-fp16+gptq+exllamav2 -24,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,64,256,51.8,1.61,631.0,39.8,56364,84418,1xA100,1,64,1xA100-fp16+gptq+exllamav2 -25,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,8,256,15.1,0.223,271.0,35.9,11250,55027,1xA100,1,8,1xA100-fp16+gptq+exllamav2 -26,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,16,256,20.1,0.425,407.0,37.6,17694,84385,1xA100,1,16,1xA100-fp16+gptq+exllamav2 -27,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav2,2,256,14.6,0.0708,70.0,28.2,6414,7220,1xA100,1,2,1xA100-fp16+gptq+exllamav2 -28,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,1,256,14.6,0.0416,35.0,24.0,4883,5276,1xA100,1,1,1xA100-fp16+gptq+exllamav1 -29,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,4,256,15.4,0.106,133.0,37.7,7301,15755,1xA100,1,4,1xA100-fp16+gptq+exllamav1 -30,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,32,256,32.3,0.688,506.0,46.5,29858,84420,1xA100,1,32,1xA100-fp16+gptq+exllamav1 -31,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,64,256,49.8,1.32,657.0,48.5,55638,84418,1xA100,1,64,1xA100-fp16+gptq+exllamav1 -32,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,8,256,19.4,0.191,211.0,41.9,10524,54299,1xA100,1,8,1xA100-fp16+gptq+exllamav1 -33,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,16,256,24.2,0.356,338.0,44.9,16969,84406,1xA100,1,16,1xA100-fp16+gptq+exllamav1 -34,TheBloke/LLaMa-7B-GPTQ,['NVIDIA A100-SXM4-80GB'],fp16+gptq+exllamav1,2,256,14.7,0.0621,69.5,32.2,5688,6492,1xA100,1,2,1xA100-fp16+gptq+exllamav1 diff --git a/examples/running-llamas/configs/_base_.yaml b/examples/running-llamas/configs/_base_.yaml deleted file mode 100644 index 5df2bd58..00000000 --- a/examples/running-llamas/configs/_base_.yaml +++ /dev/null @@ -1,40 +0,0 @@ -defaults: - - backend: pytorch # default backend - - launcher: inline # default launcher - - benchmark: inference # default benchmark - - experiment # inheriting from experiment config - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging - -experiment_name: llama-experiment -model: llama-2-model -device: cuda - -backend: - no_weights: true - torch_dtype: float16 - -benchmark: - memory: true - warmup_runs: 10 - new_tokens: 512 - input_shapes: - batch_size: 1 - sequence_length: 256 - -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - job: - chdir: true - env_set: - CUDA_VISIBLE_DEVICES: 0 - CUDA_DEVICE_ORDER: PCI_BUS_ID - sweeper: - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf diff --git a/examples/running-llamas/configs/fp16+bt+tp=2.yaml b/examples/running-llamas/configs/fp16+bt+tp=2.yaml deleted file mode 100644 index efaddd22..00000000 --- a/examples/running-llamas/configs/fp16+bt+tp=2.yaml +++ /dev/null @@ -1,22 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+bt+tp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29500 - -backend: - to_bettertransformer: true - deepspeed_inference: true - deepspeed_inference_config: - tensor_parallel: - tp_size: 2 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/running-llamas/configs/fp16+bt.yaml b/examples/running-llamas/configs/fp16+bt.yaml deleted file mode 100644 index bee86deb..00000000 --- a/examples/running-llamas/configs/fp16+bt.yaml +++ /dev/null @@ -1,9 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16+bt - -backend: - to_bettertransformer: true diff --git a/examples/running-llamas/configs/fp16+dp=2.yaml b/examples/running-llamas/configs/fp16+dp=2.yaml deleted file mode 100644 index 80adde9d..00000000 --- a/examples/running-llamas/configs/fp16+dp=2.yaml +++ /dev/null @@ -1,15 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29511 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/running-llamas/configs/fp16+fa2+tp=2.yaml b/examples/running-llamas/configs/fp16+fa2+tp=2.yaml deleted file mode 100644 index a239e767..00000000 --- a/examples/running-llamas/configs/fp16+fa2+tp=2.yaml +++ /dev/null @@ -1,22 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+fa2+tp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29544 - -backend: - use_flash_attention_2: true - deepspeed_inference: true - deepspeed_inference_config: - tensor_parallel: - tp_size: 2 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/running-llamas/configs/fp16+fa2.yaml b/examples/running-llamas/configs/fp16+fa2.yaml deleted file mode 100644 index b045ebd1..00000000 --- a/examples/running-llamas/configs/fp16+fa2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16+fa2 - -backend: - use_flash_attention_2: true diff --git a/examples/running-llamas/configs/fp16+gptq+exllamav1+dp=2.yaml b/examples/running-llamas/configs/fp16+gptq+exllamav1+dp=2.yaml deleted file mode 100644 index f9a58bd4..00000000 --- a/examples/running-llamas/configs/fp16+gptq+exllamav1+dp=2.yaml +++ /dev/null @@ -1,27 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+gptq+exllamav1+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29522 - -backend: - # for some reason core gets dumped - # with dummy weights + exllamav1 for 65B - no_weights: false - quantization_scheme: gptq - quantization_config: - exllama_config: - version: 1 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 - sweeper: - params: - model: TheBloke/LLaMa-7B-GPTQ,TheBloke/LLaMa-13B-GPTQ,TheBloke/LLaMa-65B-GPTQ diff --git a/examples/running-llamas/configs/fp16+gptq+exllamav1.yaml b/examples/running-llamas/configs/fp16+gptq+exllamav1.yaml deleted file mode 100644 index c7555a8b..00000000 --- a/examples/running-llamas/configs/fp16+gptq+exllamav1.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16+gptq+exllamav1 - -backend: - # for some reason core gets dumped - # with 65B + exllamav1 - no_weights: false - quantization_scheme: gptq - quantization_config: - exllama_config: - version: 1 - -hydra: - sweeper: - params: - model: TheBloke/LLaMa-7B-GPTQ,TheBloke/LLaMa-13B-GPTQ,TheBloke/LLaMa-65B-GPTQ diff --git a/examples/running-llamas/configs/fp16+gptq+exllamav2+dp=2.yaml b/examples/running-llamas/configs/fp16+gptq+exllamav2+dp=2.yaml deleted file mode 100644 index fdb85109..00000000 --- a/examples/running-llamas/configs/fp16+gptq+exllamav2+dp=2.yaml +++ /dev/null @@ -1,27 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+gptq+exllamav2+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29533 - -backend: - # for some reason core gets dumped - # with dummy weights + exllamav2 - no_weights: false - quantization_scheme: gptq - quantization_config: - exllama_config: - version: 2 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 - sweeper: - params: - model: TheBloke/LLaMa-7B-GPTQ,TheBloke/LLaMa-13B-GPTQ,TheBloke/LLaMa-65B-GPTQ diff --git a/examples/running-llamas/configs/fp16+gptq+exllamav2.yaml b/examples/running-llamas/configs/fp16+gptq+exllamav2.yaml deleted file mode 100644 index dd314d5d..00000000 --- a/examples/running-llamas/configs/fp16+gptq+exllamav2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16+gptq+exllamav2 - -backend: - # for some reason core gets dumped - # with dummy weights + exllamav2 - no_weights: false - quantization_scheme: gptq - quantization_config: - exllama_config: - version: 2 - -hydra: - sweeper: - params: - model: TheBloke/LLaMa-7B-GPTQ,TheBloke/LLaMa-13B-GPTQ,TheBloke/LLaMa-65B-GPTQ diff --git a/examples/running-llamas/configs/fp16+tp=2.yaml b/examples/running-llamas/configs/fp16+tp=2.yaml deleted file mode 100644 index 712149df..00000000 --- a/examples/running-llamas/configs/fp16+tp=2.yaml +++ /dev/null @@ -1,21 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+tp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29544 - -backend: - deepspeed_inference: true - deepspeed_inference_config: - tensor_parallel: - tp_size: 2 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/running-llamas/configs/fp16.yaml b/examples/running-llamas/configs/fp16.yaml deleted file mode 100644 index 432d986f..00000000 --- a/examples/running-llamas/configs/fp16.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16 diff --git a/examples/running-llamas/report.py b/examples/running-llamas/report.py deleted file mode 100644 index 316f5bc1..00000000 --- a/examples/running-llamas/report.py +++ /dev/null @@ -1,289 +0,0 @@ -from argparse import ArgumentParser -from pathlib import Path -from typing import List - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from flatten_dict import flatten -from omegaconf import OmegaConf -from pandas import DataFrame - - -def gather_full_report(root_folders: List[Path], report_folder: str = "artifacts") -> DataFrame: - # key is path to inference file as string, value is dataframe - - config_dfs = {} - inference_dfs = {} - - for root_folder in root_folders: - inference_dfs.update( - {f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/inference_results.csv")} - ) - config_dfs.update( - { - f.parent.absolute() - .as_posix(): pd.DataFrame.from_dict(flatten(OmegaConf.load(f), reducer="dot"), orient="index") - .T - for f in root_folder.glob("**/hydra_config.yaml") - if f.parent.absolute().as_posix() in inference_dfs.keys() - } - ) - - if len(inference_dfs) == 0 or len(config_dfs) == 0: - raise ValueError(f"No results found in {root_folder}") - - # Merge inference and config dataframes - inference_reports = [ - config_dfs[name].merge(inference_dfs[name], left_index=True, right_index=True) for name in inference_dfs.keys() - ] - - # Concatenate all reports - inference_report = pd.concat(inference_reports, axis=0, ignore_index=True) - inference_report.to_csv(f"{report_folder}/full_report.csv") - - return inference_report - - -def get_short_report(full_report, report_folder: str = "artifacts"): - short_columns = { - "model": "Model", - "environment.gpus": "GPUs", - "experiment_name": "Experiment Name", - "benchmark.input_shapes.batch_size": "Per Process Batch Size", - "benchmark.input_shapes.sequence_length": "Sequence Length", - # - "decode.latency(s)": "Decode Latency (s)", - "forward.latency(s)": "Prefill Latency (s)", - # - "decode.throughput(tokens/s)": "Decode Throughput (tokens/s)", - "forward.throughput(samples/s)": "Prefill Throughput (samples/s)", - # - "generate.max_memory_allocated(MB)": "Generate Max Memory Allocated (MB)", - "generate.max_memory_reserved(MB)": "Generate Max Memory Reserved (MB)", - } - short_report = full_report[list(short_columns.keys())].rename(columns=short_columns) - - short_report["GPU Name"] = short_report["GPUs"].str[0] - short_report["Num GPUs"] = short_report["GPUs"].str.len() - short_report["GPU Name"].replace("NVIDIA A100-SXM4-80GB", "1xA100", inplace=True) - short_report["GPU Name"].replace("AMD INSTINCT MI250 (MCM) OAM AC MBA", "1xMI250", inplace=True) - short_report["Effective Batch Size"] = short_report["Per Process Batch Size"] * short_report["Num GPUs"] - short_report["Group"] = short_report["GPU Name"] + "-" + short_report["Experiment Name"] - short_report.to_csv(f"{report_folder}/short_report.csv") - - return short_report - - -def get_batch_plots(short_report, report_folder, plot="bar", memory=True): - fig1, ax1 = plt.subplots() - fig2, ax2 = plt.subplots() - fig3, ax3 = plt.subplots() - fig4, ax4 = plt.subplots() - - batch_column = "Effective Batch Size" - short_report = short_report.sort_values(by="Group", ascending=True) - groups = short_report["Group"].unique().tolist() - x = np.arange(len(short_report[batch_column].unique())) - width = 0.8 / len(short_report["Group"].unique().tolist()) - offset = -(width * (len(groups) - 1) / 2) - - for group in groups: - mask = short_report["Group"] == group - group_report = short_report[mask].sort_values(by=batch_column) - x_ = np.arange( - group_report[batch_column].min() - 1, - len(group_report[batch_column].unique()) + (group_report[batch_column].min() - 1), - ) - if plot == "bar": - ax1.bar( - x_ + offset, - group_report["Prefill Latency (s)"], - label=group, - width=width, - ) - ax2.bar( - x_ + offset, - group_report["Decode Throughput (tokens/s)"], - label=group, - width=width, - ) - ax3.bar( - x_ + offset, - group_report["Generate Max Memory Allocated (MB)"], - label=group, - width=width, - ) - ax4.bar( - x_ + offset, - group_report["Generate Max Memory Reserved (MB)"], - label=group, - width=width, - ) - offset += width - elif plot == "line": - ax1.plot( - x_, - group_report["Prefill Latency (s)"], - label=group, - marker="o", - ) - ax2.plot( - x_, - group_report["Decode Throughput (tokens/s)"], - label=group, - marker="o", - ) - ax3.plot( - x_, - group_report["Generate Max Memory Allocated (MB)"], - label=group, - marker="o", - ) - ax4.plot( - x_, - group_report["Generate Max Memory Reserved (MB)"], - label=group, - marker="o", - ) - - ax1.set_xticks(x) - ax1.set_ylim(bottom=0) - ax1.set_xticklabels(short_report[batch_column].sort_values().unique().tolist()) - ax1.set_xlabel(batch_column) - ax1.set_ylabel("Prefill Latency (s)") - ax1.set_title(f"Prefill Latency per Batch Size ({short_report['Model'].unique()[0]})") - ax1.legend(fancybox=True, shadow=True) - - ax2.set_xticks(x) - ax2.set_ylim(bottom=0) - ax2.set_xticklabels(short_report[batch_column].sort_values().unique().tolist()) - ax2.set_xlabel(batch_column) - ax2.set_ylabel("Effective Decode Throughput (tokens/s)") - ax2.set_title(f"Decode Throughput per Batch Size ({short_report['Model'].unique()[0]})") - ax2.legend(fancybox=True, shadow=True) - - ax3.set_xticks(x) - ax3.set_ylim(bottom=0) - ax3.set_xticklabels(short_report[batch_column].sort_values().unique().tolist()) - ax3.set_xlabel(batch_column) - ax3.set_ylabel("Generate Max Memory Allocated (MB)") - ax3.set_title(f"Generate Max Memory Allocated per Batch Size ({short_report['Model'].unique()[0]})") - ax3.legend(fancybox=True, shadow=True) - - ax4.set_xticks(x) - ax4.set_ylim(bottom=0) - ax4.set_xticklabels(short_report[batch_column].sort_values().unique().tolist()) - ax4.set_xlabel(batch_column) - ax4.set_ylabel("Generate Max Memory Reserved (MB)") - ax4.set_title(f"Generate Max Memory Reserved per Batch Size ({short_report['Model'].unique()[0]})") - ax4.legend(fancybox=True, shadow=True) - - legend = plt.legend(loc="upper center") - legend.get_frame().set_facecolor((0, 0, 1, 0.1)) - legend.get_frame().set_alpha(None) - plt.tight_layout() - - fig1.savefig(f"{report_folder}/prefill_latency_{plot}_plot.png") - fig2.savefig(f"{report_folder}/decode_throughput_{plot}_plot.png") - - if memory: - fig3.savefig(f"{report_folder}/generate_max_memory_allocated_{plot}_plot.png") - fig4.savefig(f"{report_folder}/generate_max_memory_reserved_{plot}_plot.png") - return fig1, fig2, fig3, fig4 - - return fig1, fig2 - - -def get_peak_decode_throughput_plot(short_report, report_folder): - # a bar plot with one bar per group, representing the max attainable throughput in tokens/s - fig, ax = plt.subplots() - - # - max_decode_throughput = short_report.groupby("Group")["Decode Throughput (tokens/s)"].max().reset_index() - max_decode_throughput = ( - short_report.merge(max_decode_throughput, on=["Group", "Decode Throughput (tokens/s)"]) - .sort_values(by="Decode Throughput (tokens/s)", ascending=True) - .reset_index() - ) - - ax.bar( - max_decode_throughput["Group"], - max_decode_throughput["Decode Throughput (tokens/s)"], - color=plt.cm.Paired(np.arange(len(max_decode_throughput))), - ) - - # add batch size on top of each bar - for i, v in enumerate(max_decode_throughput["Effective Batch Size"]): - ax.text( - i, - max_decode_throughput["Decode Throughput (tokens/s)"].iloc[i], - f"bs={v}", - ha="center", - va="bottom", - ) - - ax.set_xlabel("Group") - ax.set_ylabel("Peak Decode Throughput (tokens/s)") - ax.set_title(f"Peak Decode Throughput ({short_report['Model'].unique()[0]})") - ax.set_ylim(top=max_decode_throughput["Decode Throughput (tokens/s)"].max() * 1.1) - - plt.xticks(rotation=45, ha="right") - plt.tight_layout() - - fig.savefig(f"{report_folder}/peak_decode_throughput_bar_plot.png") - - return fig - - -def generate_report(): - parser = ArgumentParser() - parser.add_argument( - "--experiments-folders", - "-e", - type=Path, - nargs="+", - required=True, - help="The folder containing the results of experiments.", - ) - parser.add_argument( - "--report-name", - "-r", - type=str, - required=False, - default="artifacts", - help="The name of the report.", - ) - - args = parser.parse_args() - report_folder = args.report_name - experiments_folders = args.experiments_folders - - Path(report_folder).mkdir(parents=True, exist_ok=True) - - # gather experiments results - full_report = gather_full_report( - root_folders=experiments_folders, - report_folder=report_folder, - ) - short_report = get_short_report( - full_report, - report_folder=report_folder, - ) - for plot in ["bar", "line"]: - _ = get_batch_plots( - short_report, - report_folder, - plot=plot, - memory=True, - ) - - _ = get_peak_decode_throughput_plot( - short_report, - report_folder, - ) - print("Report generated successfully!") - - -if __name__ == "__main__": - generate_report() diff --git a/examples/running-mistrals/README.md b/examples/running-mistrals/README.md deleted file mode 100644 index 8d122cf8..00000000 --- a/examples/running-mistrals/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# Optimum-Benchmark x Mistral x BnB & GPTQ & AWQ - -A set of benchmarks on quantizing Misral AI's model. - -## Setup - -You will need to install these quantization packages: - -```bash -pip install bitsandbytes -pip install auto-gptq -pip install autoawq -``` - -## Running - -Then run these commands from this directory: - -```bash -optimum-benchmark --config-dir configs/ --config-name _base_ --multirun -optimum-benchmark --config-dir configs/ --config-name bnb --multirun -optimum-benchmark --config-dir configs/ --config-name gptq --multirun -optimum-benchmark --config-dir configs/ --config-name awq --multirun -``` - -This will create a folder called `experiments` with the results of the benchmarks with an inference `batch_size` ranging from 1 to 16 and an input `sequence_length` (prompt size) of 512. - -## Reporting - -To create a report run: - -```bash -python report.py -e experiments -``` - -Which will create some quick reporting artifacts like a `full_report.csv`, `short_report.csv`, some plots and a `rich_table.svg`. - -## Results - -### On A100-80GB - -- -
- -- -
- -- -
- -- -
- -- -
diff --git a/examples/running-mistrals/artifacts/A100-80GB/forward_latency_plot.png b/examples/running-mistrals/artifacts/A100-80GB/forward_latency_plot.png deleted file mode 100644 index 8a5c64b1..00000000 Binary files a/examples/running-mistrals/artifacts/A100-80GB/forward_latency_plot.png and /dev/null differ diff --git a/examples/running-mistrals/artifacts/A100-80GB/forward_memory_plot.png b/examples/running-mistrals/artifacts/A100-80GB/forward_memory_plot.png deleted file mode 100644 index 0143c4de..00000000 Binary files a/examples/running-mistrals/artifacts/A100-80GB/forward_memory_plot.png and /dev/null differ diff --git a/examples/running-mistrals/artifacts/A100-80GB/full_report.csv b/examples/running-mistrals/artifacts/A100-80GB/full_report.csv deleted file mode 100644 index 01e5b19f..00000000 --- a/examples/running-mistrals/artifacts/A100-80GB/full_report.csv +++ /dev/null @@ -1,21 +0,0 @@ -experiment_name,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.initial_isolation_check,backend.continous_isolation_check,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.disable_grad,backend.eval_mode,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.bettertransformer,backend.quantization_scheme,backend.use_ddp,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,model,device,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB),backend.quantization_config.llm_int8_threshold,backend.quantization_config.load_in_4bit,backend.quantization_config.bnb_4bit_compute_dtype -fp16-batch_size(4)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.233,17.2,17865,29.2,137.0,18509,,, -fp16-batch_size(8)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.467,17.1,19463,33.9,236.0,20524,,, -fp16-batch_size(16)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.937,17.1,22458,50.1,319.0,24393,,, -bnb-batch_size(16)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.94,17.0,13155,76.0,211.0,15254,0.0,True,float16 -fp16-batch_size(2)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.118,16.9,17158,29.4,68.0,17523,,, -gptq-batch_size(16)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,TheBloke/Mistral-7B-v0.1-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.963,16.6,13314,66.1,242.0,15235,,, -bnb-batch_size(8)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.483,16.6,10261,59.9,134.0,11330,0.0,True,float16 -gptq-batch_size(8)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,TheBloke/Mistral-7B-v0.1-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.486,16.5,10303,48.9,164.0,11280,,, -fp16-batch_size(1)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0633,15.8,16907,28.3,35.3,17066,,, -bnb-batch_size(4)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.254,15.7,8797,58.4,68.5,9317,0.0,True,float16 -gptq-batch_size(4)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,TheBloke/Mistral-7B-v0.1-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.261,15.3,8747,36.0,111.0,9239,,, -bnb-batch_size(2)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.141,14.2,8166,57.2,35.0,8401,0.0,True,float16 -gptq-batch_size(2)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,TheBloke/Mistral-7B-v0.1-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.142,14.1,7918,31.8,62.9,8279,,, -gptq-batch_size(1)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,TheBloke/Mistral-7B-v0.1-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0828,12.1,7669,31.5,31.7,7824,,, -bnb-batch_size(1)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,mistralai/Mistral-7B-v0.1,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0865,11.6,7822,43.4,23.0,7960,0.0,True,float16 -awq-batch_size(16)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,ybelkada/test-mistral-7b-v0.1-awq,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",2.52,6.35,14486,51.3,312.0,15080,,, -awq-batch_size(8)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,ybelkada/test-mistral-7b-v0.1-awq,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",1.27,6.3,10787,34.9,229.0,11175,,, -awq-batch_size(4)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,ybelkada/test-mistral-7b-v0.1-awq,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.638,6.27,8958,32.8,122.0,9101,,, -awq-batch_size(2)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,ybelkada/test-mistral-7b-v0.1-awq,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.321,6.23,7824,31.8,62.9,8136,,, -awq-batch_size(1)-sequence_length(512)-new_tokens(1000),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,512,1,80,3000,16000,1000,False,True,1000,1000,False,True,0,1,ybelkada/test-mistral-7b-v0.1-awq,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.164,6.1,7589,32.1,31.2,7660,,, diff --git a/examples/running-mistrals/artifacts/A100-80GB/generate_memory_plot.png b/examples/running-mistrals/artifacts/A100-80GB/generate_memory_plot.png deleted file mode 100644 index 74a91ef8..00000000 Binary files a/examples/running-mistrals/artifacts/A100-80GB/generate_memory_plot.png and /dev/null differ diff --git a/examples/running-mistrals/artifacts/A100-80GB/generate_throughput_plot.png b/examples/running-mistrals/artifacts/A100-80GB/generate_throughput_plot.png deleted file mode 100644 index 27784503..00000000 Binary files a/examples/running-mistrals/artifacts/A100-80GB/generate_throughput_plot.png and /dev/null differ diff --git a/examples/running-mistrals/artifacts/A100-80GB/rich_table.svg b/examples/running-mistrals/artifacts/A100-80GB/rich_table.svg deleted file mode 100644 index 2c839c7a..00000000 --- a/examples/running-mistrals/artifacts/A100-80GB/rich_table.svg +++ /dev/null @@ -1,235 +0,0 @@ - diff --git a/examples/running-mistrals/artifacts/A100-80GB/short_report.csv b/examples/running-mistrals/artifacts/A100-80GB/short_report.csv deleted file mode 100644 index 197e1c68..00000000 --- a/examples/running-mistrals/artifacts/A100-80GB/short_report.csv +++ /dev/null @@ -1,21 +0,0 @@ -experiment_name,Batch Size,Forward Latency (s),Forward Throughput (samples/s),Forward Peak Memory (MB),Generate Throughput (tokens/s),Generate Peak Memory (MB),Quantization Scheme -fp16-batch_size(4)-sequence_length(512)-new_tokens(1000),4,0.233,17.2,17865,137.0,18509,fp16 -fp16-batch_size(8)-sequence_length(512)-new_tokens(1000),8,0.467,17.1,19463,236.0,20524,fp16 -fp16-batch_size(16)-sequence_length(512)-new_tokens(1000),16,0.937,17.1,22458,319.0,24393,fp16 -bnb-batch_size(16)-sequence_length(512)-new_tokens(1000),16,0.94,17.0,13155,211.0,15254,bnb -fp16-batch_size(2)-sequence_length(512)-new_tokens(1000),2,0.118,16.9,17158,68.0,17523,fp16 -gptq-batch_size(16)-sequence_length(512)-new_tokens(1000),16,0.963,16.6,13314,242.0,15235,gptq -bnb-batch_size(8)-sequence_length(512)-new_tokens(1000),8,0.483,16.6,10261,134.0,11330,bnb -gptq-batch_size(8)-sequence_length(512)-new_tokens(1000),8,0.486,16.5,10303,164.0,11280,gptq -fp16-batch_size(1)-sequence_length(512)-new_tokens(1000),1,0.0633,15.8,16907,35.3,17066,fp16 -bnb-batch_size(4)-sequence_length(512)-new_tokens(1000),4,0.254,15.7,8797,68.5,9317,bnb -gptq-batch_size(4)-sequence_length(512)-new_tokens(1000),4,0.261,15.3,8747,111.0,9239,gptq -bnb-batch_size(2)-sequence_length(512)-new_tokens(1000),2,0.141,14.2,8166,35.0,8401,bnb -gptq-batch_size(2)-sequence_length(512)-new_tokens(1000),2,0.142,14.1,7918,62.9,8279,gptq -gptq-batch_size(1)-sequence_length(512)-new_tokens(1000),1,0.0828,12.1,7669,31.7,7824,gptq -bnb-batch_size(1)-sequence_length(512)-new_tokens(1000),1,0.0865,11.6,7822,23.0,7960,bnb -awq-batch_size(16)-sequence_length(512)-new_tokens(1000),16,2.52,6.35,14486,312.0,15080,awq -awq-batch_size(8)-sequence_length(512)-new_tokens(1000),8,1.27,6.3,10787,229.0,11175,awq -awq-batch_size(4)-sequence_length(512)-new_tokens(1000),4,0.638,6.27,8958,122.0,9101,awq -awq-batch_size(2)-sequence_length(512)-new_tokens(1000),2,0.321,6.23,7824,62.9,8136,awq -awq-batch_size(1)-sequence_length(512)-new_tokens(1000),1,0.164,6.1,7589,31.2,7660,awq diff --git a/examples/running-mistrals/configs/_base_.yaml b/examples/running-mistrals/configs/_base_.yaml deleted file mode 100644 index b02223bb..00000000 --- a/examples/running-mistrals/configs/_base_.yaml +++ /dev/null @@ -1,36 +0,0 @@ -defaults: - - backend: pytorch # default backend - - benchmark: inference # default benchmark - - experiment # inheriting from experiment config - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging - -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - job: - chdir: true - env_set: - CUDA_VISIBLE_DEVICES: 0 - CUDA_DEVICE_ORDER: PCI_BUS_ID - sweeper: - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda - -backend: - torch_dtype: float16 - -benchmark: - memory: true - warmup_runs: 10 - - new_tokens: 1000 - input_shapes: - sequence_length: 512 diff --git a/examples/running-mistrals/configs/awq.yaml b/examples/running-mistrals/configs/awq.yaml deleted file mode 100644 index 88f22cb5..00000000 --- a/examples/running-mistrals/configs/awq.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq diff --git a/examples/running-mistrals/configs/bnb.yaml b/examples/running-mistrals/configs/bnb.yaml deleted file mode 100644 index 61cf1ebd..00000000 --- a/examples/running-mistrals/configs/bnb.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) - -backend: - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 diff --git a/examples/running-mistrals/configs/gptq.yaml b/examples/running-mistrals/configs/gptq.yaml deleted file mode 100644 index 927a172e..00000000 --- a/examples/running-mistrals/configs/gptq.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml deleted file mode 100644 index dd8a2808..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml deleted file mode 100644 index bcfd32be..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: awq - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml deleted file mode 100644 index 8b6686c5..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq-batch_size(1)-sequence_length(512)-new_tokens(1000) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv deleted file mode 100644 index 5b69d36b..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.164,6.1,7589,32.1,31.2,7660 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml deleted file mode 100644 index 23deebd5..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml deleted file mode 100644 index 472713b3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: awq - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml deleted file mode 100644 index 56f693fb..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq-batch_size(16)-sequence_length(512)-new_tokens(1000) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv deleted file mode 100644 index 709cdc10..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -2.52,6.35,14486,51.3,312.0,15080 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml deleted file mode 100644 index 15ca6c32..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml deleted file mode 100644 index ab5e3ddb..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: awq - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml deleted file mode 100644 index 52724f0c..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq-batch_size(2)-sequence_length(512)-new_tokens(1000) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv deleted file mode 100644 index 44139984..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.321,6.23,7824,31.8,62.9,8136 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml deleted file mode 100644 index c5659988..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml deleted file mode 100644 index 676eb33b..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: awq - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml deleted file mode 100644 index 528e56a1..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq-batch_size(4)-sequence_length(512)-new_tokens(1000) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv deleted file mode 100644 index 8aafc741..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.638,6.27,8958,32.8,122.0,9101 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml deleted file mode 100644 index 37a3bdb1..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml deleted file mode 100644 index 6403b0cb..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: awq - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml deleted file mode 100644 index c830c5f3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq-batch_size(8)-sequence_length(512)-new_tokens(1000) -model: ybelkada/test-mistral-7b-v0.1-awq -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv deleted file mode 100644 index 7f765da2..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/awq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -1.27,6.3,10787,34.9,229.0,11175 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml deleted file mode 100644 index 5aa1ffa3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml +++ /dev/null @@ -1,73 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml deleted file mode 100644 index 19bb99b5..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml deleted file mode 100644 index 4c3edbf9..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(1)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv deleted file mode 100644 index ce9e3c78..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0865,11.6,7822,43.4,23.0,7960 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml deleted file mode 100644 index 18995afe..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml +++ /dev/null @@ -1,73 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml deleted file mode 100644 index 9e9c5d39..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml deleted file mode 100644 index 0ba30c00..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(16)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv deleted file mode 100644 index ea01621e..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.94,17.0,13155,76.0,211.0,15254 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml deleted file mode 100644 index f1495501..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml +++ /dev/null @@ -1,73 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml deleted file mode 100644 index f841c4ac..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml deleted file mode 100644 index 53535c91..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(2)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv deleted file mode 100644 index b2ba03ea..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.141,14.2,8166,57.2,35.0,8401 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml deleted file mode 100644 index 8e5f788e..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml +++ /dev/null @@ -1,73 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml deleted file mode 100644 index f998b572..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml deleted file mode 100644 index 57923542..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(4)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv deleted file mode 100644 index 332ef801..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.254,15.7,8797,58.4,68.5,9317 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml deleted file mode 100644 index a53fdf3f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml +++ /dev/null @@ -1,73 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml deleted file mode 100644 index e54389aa..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml deleted file mode 100644 index 730f327e..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(8)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv deleted file mode 100644 index 454db2cb..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.483,16.6,10261,59.9,134.0,11330 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml deleted file mode 100644 index 76483dd5..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml deleted file mode 100644 index b6c71a98..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/quantized-mistrals/experiments/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml deleted file mode 100644 index 3387d1ab..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(1)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv deleted file mode 100644 index d6eb5712..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0633,15.8,16907,28.3,35.3,17066 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml deleted file mode 100644 index 82aec418..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml deleted file mode 100644 index 385d4305..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/quantized-mistrals/experiments/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml deleted file mode 100644 index e8f7d723..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(16)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv deleted file mode 100644 index c5489483..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.937,17.1,22458,50.1,319.0,24393 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml deleted file mode 100644 index 9431c98f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml deleted file mode 100644 index 52ce2bdf..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/quantized-mistrals/experiments/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml deleted file mode 100644 index efdcc561..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(2)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv deleted file mode 100644 index caa1d8d5..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.118,16.9,17158,29.4,68.0,17523 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml deleted file mode 100644 index be5a83f3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml deleted file mode 100644 index 565f1bd3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/quantized-mistrals/experiments/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml deleted file mode 100644 index a6c8cec3..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(4)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv deleted file mode 100644 index 6073280e..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.233,17.2,17865,29.2,137.0,18509 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml deleted file mode 100644 index 3cbb0127..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml deleted file mode 100644 index 0f896a1f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/quantized-mistrals/experiments/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml deleted file mode 100644 index f5a98c47..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(8)-sequence_length(512)-new_tokens(1000) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv deleted file mode 100644 index 8a714ff6..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.467,17.1,19463,33.9,236.0,20524 diff --git a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(None)-sequence_length(512)-new_tokens(1000)/multirun.yaml b/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(None)-sequence_length(512)-new_tokens(1000)/multirun.yaml deleted file mode 100644 index 1dd373a8..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/fp16-batch_size(None)-sequence_length(512)-new_tokens(1000)/multirun.yaml +++ /dev/null @@ -1,243 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: [] - job: - name: experiment - chdir: true - override_dirname: '' - id: ??? - num: ??? - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '1' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/quantized-mistrals - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/quantized-mistrals/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: ??? - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: null - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: mistralai/Mistral-7B-v0.1 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml deleted file mode 100644 index 41b761b2..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml deleted file mode 100644 index 6f61abfc..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml deleted file mode 100644 index 94c0b06a..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(1)-sequence_length(512)-new_tokens(1000) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv deleted file mode 100644 index 56fdbc88..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(512)-new_tokens(1000)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0828,12.1,7669,31.5,31.7,7824 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml deleted file mode 100644 index 7779b85a..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml deleted file mode 100644 index 83e7db7c..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml deleted file mode 100644 index c6285181..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(16)-sequence_length(512)-new_tokens(1000) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv deleted file mode 100644 index ebe0ae57..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(512)-new_tokens(1000)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.963,16.6,13314,66.1,242.0,15235 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml deleted file mode 100644 index c431c470..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml deleted file mode 100644 index ab240c24..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml deleted file mode 100644 index 3cb39040..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(2)-sequence_length(512)-new_tokens(1000) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv deleted file mode 100644 index e8493eb9..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(512)-new_tokens(1000)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.142,14.1,7918,31.8,62.9,8279 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml deleted file mode 100644 index e2d6f89f..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml deleted file mode 100644 index 4506944a..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml deleted file mode 100644 index 5ac36e22..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(4)-sequence_length(512)-new_tokens(1000) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv deleted file mode 100644 index 3ebc2394..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(512)-new_tokens(1000)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.261,15.3,8747,36.0,111.0,9239 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml deleted file mode 100644 index b7e4ef0d..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml deleted file mode 100644 index 237d4fbf..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-mistral - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-mistral/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-mistral/experiments/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml deleted file mode 100644 index a8d78a9e..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/hydra_config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 512 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 1000 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 1000 - min_new_tokens: 1000 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(8)-sequence_length(512)-new_tokens(1000) -model: TheBloke/Mistral-7B-v0.1-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.34.1 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv b/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv deleted file mode 100644 index 4c6fecd9..00000000 --- a/examples/running-mistrals/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(512)-new_tokens(1000)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.486,16.5,10303,48.9,164.0,11280 diff --git a/examples/running-mistrals/report.py b/examples/running-mistrals/report.py deleted file mode 100644 index 559ea695..00000000 --- a/examples/running-mistrals/report.py +++ /dev/null @@ -1,229 +0,0 @@ -from argparse import ArgumentParser -from pathlib import Path - -import matplotlib.pyplot as plt -import pandas as pd -from flatten_dict import flatten -from omegaconf import OmegaConf -from pandas import DataFrame -from rich.console import Console -from rich.table import Table -from rich.terminal_theme import MONOKAI - - -def gather_inference_report(root_folder: Path) -> DataFrame: - # key is path to inference file as string, value is dataframe - inference_dfs = { - f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/inference_results.csv") - } - - # key is path to config file as string, value is flattened dict - config_dfs = { - f.parent.absolute() - .as_posix(): pd.DataFrame.from_dict(flatten(OmegaConf.load(f), reducer="dot"), orient="index") - .T - for f in root_folder.glob("**/hydra_config.yaml") - if f.parent.absolute().as_posix() in inference_dfs.keys() - } - - if len(inference_dfs) == 0 or len(config_dfs) == 0: - raise ValueError(f"No results found in {root_folder}") - - # Merge inference and config dataframes - inference_reports = [ - config_dfs[name].merge(inference_dfs[name], left_index=True, right_index=True) for name in inference_dfs.keys() - ] - - # Concatenate all reports - inference_report = pd.concat(inference_reports, axis=0, ignore_index=True) - inference_report.set_index("experiment_name", inplace=True) - return inference_report - - -def style_element(element, style=""): - if style: - return f"[{style}]{element}[/{style}]" - else: - return element - - -def format_element(element, style=""): - if isinstance(element, float): - if element != element: # nan - formated_element = "" - elif abs(element) >= 1: - formated_element = f"{element:.2f}" - elif abs(element) > 1e-6: - formated_element = f"{element:.2e}" - else: - formated_element = f"{element}" - elif element is None: - formated_element = "" - elif isinstance(element, bool): - if element: - formated_element = style_element("✔", style="green") - else: - formated_element = style_element("✘", style="red") - else: - formated_element = str(element) - - return style_element(formated_element, style=style) - - -def format_row(row, style=""): - formated_row = [] - for element in row: - formated_row.append(format_element(element, style=style)) - return formated_row - - -def get_short_report(inference_report): - short_columns = { - "benchmark.input_shapes.batch_size": "Batch Size", - "forward.latency(s)": "Forward Latency (s)", - "forward.throughput(samples/s)": "Forward Throughput (samples/s)", - "forward.peak_memory(MB)": "Forward Peak Memory (MB)", - "generate.throughput(tokens/s)": "Generate Throughput (tokens/s)", - "generate.peak_memory(MB)": "Generate Peak Memory (MB)", - } - short_report = inference_report[list(short_columns.keys())].rename(columns=short_columns) - short_report["Quantization Scheme"] = inference_report.index.str.split("-").str[0] - - return short_report - - -def get_rich_table(short_report): - # create rich table - rich_table = Table(show_header=True, show_lines=True) - # we add a column for the index - rich_table.add_column("Experiment Name", justify="left", header_style="") - # we populate the table with values - for column in short_report.columns: - rich_table.add_column(column, justify="right", header_style="bold") - # we add rows - for index, row in short_report.iterrows(): - rich_table.add_row(index, *format_row(row.values, style="")) - - return rich_table - - -def get_throughput_plot(short_report): - # for each quantization scheme we plot the throughput vs batch size - fig1, ax1 = plt.subplots() - fig2, ax2 = plt.subplots() - fig3, ax3 = plt.subplots() - fig4, ax4 = plt.subplots() - - short_report["Quantization Scheme"].fillna("unquantized", inplace=True) - short_report["Quantization Scheme"].replace("bnb", "BnB", inplace=True) - short_report["Quantization Scheme"].replace("awq", "AWQ", inplace=True) - short_report["Quantization Scheme"].replace("gptq", "GPTQ", inplace=True) - - for quantization_scheme in short_report["Quantization Scheme"].unique(): - mask = short_report["Quantization Scheme"] == quantization_scheme - - forward_latency = short_report[mask][["Batch Size", "Forward Latency (s)"]].sort_values(by="Batch Size") - generate_throughput = short_report[mask][["Batch Size", "Generate Throughput (tokens/s)"]].sort_values( - by="Batch Size" - ) - forward_memory = short_report[mask][["Batch Size", "Forward Peak Memory (MB)"]].sort_values(by="Batch Size") - generate_memory = short_report[mask][["Batch Size", "Generate Peak Memory (MB)"]].sort_values(by="Batch Size") - ax1.plot( - forward_latency["Batch Size"], - forward_latency["Forward Latency (s)"], - label=quantization_scheme, - marker="o", - ) - ax2.plot( - generate_throughput["Batch Size"], - generate_throughput["Generate Throughput (tokens/s)"], - label=quantization_scheme, - marker="o", - ) - ax3.plot( - forward_memory["Batch Size"], - forward_memory["Forward Peak Memory (MB)"], - label=quantization_scheme, - marker="*", - ) - ax4.plot( - generate_memory["Batch Size"], - generate_memory["Generate Peak Memory (MB)"], - label=quantization_scheme, - marker="*", - ) - - ax1.set_xlabel("Batch Size") - ax1.set_ylabel("Forward Latency (s)") - ax1.set_title("Forward Latency per Batch Size") - - ax2.set_xlabel("Batch Size") - ax2.set_ylabel("Generate Throughput (tokens/s)") - ax2.set_title("Generate Throughput per Batch Size") - - ax3.set_xlabel("Batch Size") - ax3.set_ylabel("Forward Peak Memory (MB)") - ax3.set_title("Forward Peak Memory per Batch Size") - - ax4.set_xlabel("Batch Size") - ax4.set_ylabel("Generate Peak Memory (MB)") - ax4.set_title("Generate Peak Memory per Batch Size") - - ax1.legend() - ax2.legend() - ax3.legend() - ax4.legend() - - return fig1, fig2, fig3, fig4 - - -def generate_report(): - parser = ArgumentParser() - parser.add_argument( - "--experiments", - "-e", - type=Path, - required=True, - help="The folder containing the results of experiments.", - ) - parser.add_argument( - "--report-name", - "-r", - type=str, - required=False, - help="The name of the report.", - ) - - args = parser.parse_args() - experiments_folders = args.experiments - - if args.report_name: - report_folder = f"artifacts/{args.report_name}" - else: - report_folder = "artifacts" - Path(report_folder).mkdir(parents=True, exist_ok=True) - - # gather experiments results - inference_report = gather_inference_report(experiments_folders) - inference_report.sort_values(by="forward.throughput(samples/s)", ascending=False, inplace=True) - inference_report.to_csv(f"{report_folder}/full_report.csv") - - short_report = get_short_report(inference_report) - short_report.to_csv(f"{report_folder}/short_report.csv") - - forward_throughput_plot, generate_throughput_plot, forward_memory_plot, generate_memory_plot = get_throughput_plot( - short_report - ) - forward_throughput_plot.savefig(f"{report_folder}/forward_latency_plot.png") - generate_throughput_plot.savefig(f"{report_folder}/generate_throughput_plot.png") - forward_memory_plot.savefig(f"{report_folder}/forward_memory_plot.png") - generate_memory_plot.savefig(f"{report_folder}/generate_memory_plot.png") - - rich_table = get_rich_table(short_report) - console = Console(record=True) - console.print(rich_table, justify="center") - console.save_svg(f"{report_folder}/rich_table.svg", theme=MONOKAI, title="Inference Report") - - -if __name__ == "__main__": - generate_report() diff --git a/examples/running-vicunas/README.md b/examples/running-vicunas/README.md deleted file mode 100644 index 755172b8..00000000 --- a/examples/running-vicunas/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# Optimum-Benchmark x Vicuna x BnB & GPTQ & (AWQ+GEMM vs AWQ+GEMV) - -A set of benchmarks on quantizing Vicuna. - -## Setup - -You will need to install these quantization packages: - -```bash -pip install autoawq -pip install auto-gptq -pip install bitsandbytes -``` - -## Running - -Then run these commands from this directory: - -```bash -optimum-benchmark --config-dir configs/ --config-name _base_ --multirun -optimum-benchmark --config-dir configs/ --config-name bnb --multirun -optimum-benchmark --config-dir configs/ --config-name gptq --multirun -optimum-benchmark --config-dir configs/ --config-name awq+gemm --multirun -optimum-benchmark --config-dir configs/ --config-name awq+gemv --multirun -``` - -This will create a folder called `experiments` with the results of the benchmarks with an inference `batch_size` ranging from 1 to 16 and an input `sequence_length` (prompt size) of 128. - -## Reporting - -To create a report run: - -```bash -python report.py -e experiments -``` - -Which will create some quick reporting artifacts like a `full_report.csv`, `short_report.csv`, some plots and a `rich_table.svg`. - -## Results - -### On A100-80GB - -- -
- -- -
- -- -
- -- -
- -- -
diff --git a/examples/running-vicunas/artifacts/A100-80GB/forward_latency_plot.png b/examples/running-vicunas/artifacts/A100-80GB/forward_latency_plot.png deleted file mode 100644 index 2b407eb2..00000000 Binary files a/examples/running-vicunas/artifacts/A100-80GB/forward_latency_plot.png and /dev/null differ diff --git a/examples/running-vicunas/artifacts/A100-80GB/forward_memory_plot.png b/examples/running-vicunas/artifacts/A100-80GB/forward_memory_plot.png deleted file mode 100644 index a7011009..00000000 Binary files a/examples/running-vicunas/artifacts/A100-80GB/forward_memory_plot.png and /dev/null differ diff --git a/examples/running-vicunas/artifacts/A100-80GB/full_report.csv b/examples/running-vicunas/artifacts/A100-80GB/full_report.csv deleted file mode 100644 index 6244d33a..00000000 --- a/examples/running-vicunas/artifacts/A100-80GB/full_report.csv +++ /dev/null @@ -1,26 +0,0 @@ -experiment_name,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.initial_isolation_check,backend.continous_isolation_check,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.disable_grad,backend.eval_mode,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.bettertransformer,backend.quantization_scheme,backend.use_ddp,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,model,device,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB),backend.quantization_config.llm_int8_threshold,backend.quantization_config.load_in_4bit,backend.quantization_config.bnb_4bit_compute_dtype -fp16-batch_size(16)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.2,80.0,18706,6.87,596.0,49664,,, -fp16-batch_size(8)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.105,76.2,16829,6.9,297.0,24551,,, -gptq-batch_size(16)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,TheBloke/vicuna-7B-v1.5-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.212,75.5,10441,10.1,406.0,40774,,, -bnb-batch_size(16)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.221,72.4,9917,13.9,295.0,20260,0.0,True,float16 -fp16-batch_size(4)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0577,69.3,15690,6.69,153.0,16896,,, -gptq-batch_size(8)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,TheBloke/vicuna-7B-v1.5-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.116,69.0,8568,8.88,231.0,16290,,, -bnb-batch_size(8)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.126,63.5,8323,13.4,153.0,10760,0.0,True,float16 -fp16-batch_size(2)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0337,59.3,15313,6.68,76.6,15858,,, -gptq-batch_size(4)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,TheBloke/vicuna-7B-v1.5-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0684,58.5,7170,7.3,140.0,8717,,, -bnb-batch_size(4)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0781,51.2,7545,13.4,76.4,8539,0.0,True,float16 -gptq-batch_size(2)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,TheBloke/vicuna-7B-v1.5-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0456,43.9,6800,7.17,71.4,7597,,, -fp16-batch_size(1)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.026,38.5,15128,6.67,38.4,15548,,, -bnb-batch_size(2)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0553,36.2,7073,13.6,37.6,7704,0.0,True,float16 -awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.47,34.0,10183,9.27,442.0,31247,,, -awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.24,33.3,8394,10.7,191.0,14176,,, -awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.124,32.3,7245,7.01,146.0,8602,,, -awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0675,29.6,6756,6.89,74.3,7511,,, -gptq-batch_size(1)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,TheBloke/vicuna-7B-v1.5-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0351,28.5,6868,7.29,35.1,7077,,, -awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.289,27.7,8394,7.84,261.0,14176,,, -awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.58,27.6,10183,8.33,492.0,31245,,, -awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.149,26.8,7155,7.67,134.0,8642,,, -awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.039,25.6,6792,6.85,37.4,6981,,, -awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0801,25.0,6754,7.71,66.4,7509,,, -awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0437,22.9,6792,7.61,33.6,6981,,, -bnb-batch_size(1)-sequence_length(128)-new_tokens(256),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,True,False,False,,float16,True,True,False,,False,False,bnb,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,128,1,80,3000,16000,256,False,True,256,256,False,True,0,1,lmsys/vicuna-7b-v1.5,cuda,text-generation,main,,False,False,1.13.2,,4.35.0.dev0,,0.24.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,['NVIDIA A100-SXM4-80GB'],0.0526,19.0,6945,10.8,23.7,7281,0.0,True,float16 diff --git a/examples/running-vicunas/artifacts/A100-80GB/generate_memory_plot.png b/examples/running-vicunas/artifacts/A100-80GB/generate_memory_plot.png deleted file mode 100644 index 134393ea..00000000 Binary files a/examples/running-vicunas/artifacts/A100-80GB/generate_memory_plot.png and /dev/null differ diff --git a/examples/running-vicunas/artifacts/A100-80GB/generate_throughput_plot.png b/examples/running-vicunas/artifacts/A100-80GB/generate_throughput_plot.png deleted file mode 100644 index ff3d6338..00000000 Binary files a/examples/running-vicunas/artifacts/A100-80GB/generate_throughput_plot.png and /dev/null differ diff --git a/examples/running-vicunas/artifacts/A100-80GB/rich_table.svg b/examples/running-vicunas/artifacts/A100-80GB/rich_table.svg deleted file mode 100644 index 9206e272..00000000 --- a/examples/running-vicunas/artifacts/A100-80GB/rich_table.svg +++ /dev/null @@ -1,275 +0,0 @@ - diff --git a/examples/running-vicunas/artifacts/A100-80GB/short_report.csv b/examples/running-vicunas/artifacts/A100-80GB/short_report.csv deleted file mode 100644 index 93e417ce..00000000 --- a/examples/running-vicunas/artifacts/A100-80GB/short_report.csv +++ /dev/null @@ -1,26 +0,0 @@ -experiment_name,Batch Size,Forward Latency (s),Forward Throughput (samples/s),Forward Peak Memory (MB),Generate Throughput (tokens/s),Generate Peak Memory (MB),Quantization Scheme -fp16-batch_size(16)-sequence_length(128)-new_tokens(256),16,0.2,80.0,18706,596.0,49664,fp16 -fp16-batch_size(8)-sequence_length(128)-new_tokens(256),8,0.105,76.2,16829,297.0,24551,fp16 -gptq-batch_size(16)-sequence_length(128)-new_tokens(256),16,0.212,75.5,10441,406.0,40774,gptq -bnb-batch_size(16)-sequence_length(128)-new_tokens(256),16,0.221,72.4,9917,295.0,20260,bnb -fp16-batch_size(4)-sequence_length(128)-new_tokens(256),4,0.0577,69.3,15690,153.0,16896,fp16 -gptq-batch_size(8)-sequence_length(128)-new_tokens(256),8,0.116,69.0,8568,231.0,16290,gptq -bnb-batch_size(8)-sequence_length(128)-new_tokens(256),8,0.126,63.5,8323,153.0,10760,bnb -fp16-batch_size(2)-sequence_length(128)-new_tokens(256),2,0.0337,59.3,15313,76.6,15858,fp16 -gptq-batch_size(4)-sequence_length(128)-new_tokens(256),4,0.0684,58.5,7170,140.0,8717,gptq -bnb-batch_size(4)-sequence_length(128)-new_tokens(256),4,0.0781,51.2,7545,76.4,8539,bnb -gptq-batch_size(2)-sequence_length(128)-new_tokens(256),2,0.0456,43.9,6800,71.4,7597,gptq -fp16-batch_size(1)-sequence_length(128)-new_tokens(256),1,0.026,38.5,15128,38.4,15548,fp16 -bnb-batch_size(2)-sequence_length(128)-new_tokens(256),2,0.0553,36.2,7073,37.6,7704,bnb -awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256),16,0.47,34.0,10183,442.0,31247,awq+gemv -awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256),8,0.24,33.3,8394,191.0,14176,awq+gemv -awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256),4,0.124,32.3,7245,146.0,8602,awq+gemv -awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256),2,0.0675,29.6,6756,74.3,7511,awq+gemv -gptq-batch_size(1)-sequence_length(128)-new_tokens(256),1,0.0351,28.5,6868,35.1,7077,gptq -awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256),8,0.289,27.7,8394,261.0,14176,awq+gemm -awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256),16,0.58,27.6,10183,492.0,31245,awq+gemm -awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256),4,0.149,26.8,7155,134.0,8642,awq+gemm -awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256),1,0.039,25.6,6792,37.4,6981,awq+gemv -awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256),2,0.0801,25.0,6754,66.4,7509,awq+gemm -awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256),1,0.0437,22.9,6792,33.6,6981,awq+gemm -bnb-batch_size(1)-sequence_length(128)-new_tokens(256),1,0.0526,19.0,6945,23.7,7281,bnb diff --git a/examples/running-vicunas/configs/_base_.yaml b/examples/running-vicunas/configs/_base_.yaml deleted file mode 100644 index 82507ea4..00000000 --- a/examples/running-vicunas/configs/_base_.yaml +++ /dev/null @@ -1,36 +0,0 @@ -defaults: - - backend: pytorch # default backend - - benchmark: inference # default benchmark - - experiment # inheriting from experiment config - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging - -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - job: - chdir: true - env_set: - CUDA_VISIBLE_DEVICES: 0 - CUDA_DEVICE_ORDER: PCI_BUS_ID - sweeper: - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda - -backend: - torch_dtype: float16 - -benchmark: - memory: true - warmup_runs: 10 - - new_tokens: 256 - input_shapes: - sequence_length: 128 diff --git a/examples/running-vicunas/configs/awq+gemm.yaml b/examples/running-vicunas/configs/awq+gemm.yaml deleted file mode 100644 index bfea3e7a..00000000 --- a/examples/running-vicunas/configs/awq+gemm.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm diff --git a/examples/running-vicunas/configs/awq+gemv.yaml b/examples/running-vicunas/configs/awq+gemv.yaml deleted file mode 100644 index cd6aa33a..00000000 --- a/examples/running-vicunas/configs/awq+gemv.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv diff --git a/examples/running-vicunas/configs/bnb.yaml b/examples/running-vicunas/configs/bnb.yaml deleted file mode 100644 index 61cf1ebd..00000000 --- a/examples/running-vicunas/configs/bnb.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) - -backend: - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 diff --git a/examples/running-vicunas/configs/gptq.yaml b/examples/running-vicunas/configs/gptq.yaml deleted file mode 100644 index ff6890a7..00000000 --- a/examples/running-vicunas/configs/gptq.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml deleted file mode 100644 index 4d10ff67..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 3ad5c0b9..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: awq+gemm - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml deleted file mode 100644 index a7fc331f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv deleted file mode 100644 index 27c8266d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0437,22.9,6792,7.61,33.6,6981 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml deleted file mode 100644 index 35df0e1a..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml deleted file mode 100644 index cc8e9be9..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: awq+gemm - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml deleted file mode 100644 index 9e3fb6c4..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv deleted file mode 100644 index c1055877..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.58,27.6,10183,8.33,492.0,31245 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml deleted file mode 100644 index 271b5596..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 4c5bf363..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: awq+gemm - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml deleted file mode 100644 index cd938d98..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv deleted file mode 100644 index 38cc2ad1..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0801,25.0,6754,7.71,66.4,7509 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml deleted file mode 100644 index b28d4709..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml deleted file mode 100644 index f986b5d8..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: awq+gemm - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml deleted file mode 100644 index 0d38b107..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv deleted file mode 100644 index c4dcb12d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.149,26.8,7155,7.67,134.0,8642 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml deleted file mode 100644 index f7b17e95..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemm-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml deleted file mode 100644 index e312b8af..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: awq+gemm - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml deleted file mode 100644 index 4039e79c..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv deleted file mode 100644 index 7d352638..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemm-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.289,27.7,8394,7.84,261.0,14176 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml deleted file mode 100644 index 06a5987c..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 6f4c8be4..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: awq+gemv - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml deleted file mode 100644 index e9981adf..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv deleted file mode 100644 index b51690e0..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.039,25.6,6792,6.85,37.4,6981 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml deleted file mode 100644 index 853eb511..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml deleted file mode 100644 index 33f6d907..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: awq+gemv - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml deleted file mode 100644 index 964c8911..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv deleted file mode 100644 index ee379e6e..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.47,34.0,10183,9.27,442.0,31247 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml deleted file mode 100644 index 57e25e56..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml deleted file mode 100644 index b9ec1b7b..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: awq+gemv - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml deleted file mode 100644 index 2a087cb1..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv deleted file mode 100644 index 9af67a3d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0675,29.6,6756,6.89,74.3,7511 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml deleted file mode 100644 index 7b422c4d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml deleted file mode 100644 index 8c75d0ea..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: awq+gemv - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml deleted file mode 100644 index abfbfcdc..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv deleted file mode 100644 index 1ea01ae7..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.124,32.3,7245,7.01,146.0,8602 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml deleted file mode 100644 index 6b493dfb..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: awq+gemv-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml deleted file mode 100644 index cae0cb49..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: awq+gemv - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml deleted file mode 100644 index 28b611b1..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256) -model: IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemv -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv deleted file mode 100644 index 11aa5b46..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/awq+gemv-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.24,33.3,8394,10.7,191.0,14176 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml deleted file mode 100644 index 011fb2db..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,72 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 82545db0..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml deleted file mode 100644 index 9ff1ad9d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(1)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv deleted file mode 100644 index bb71d438..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0526,19.0,6945,10.8,23.7,7281 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml deleted file mode 100644 index 1b66ca17..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,72 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml deleted file mode 100644 index e904559d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml deleted file mode 100644 index 3c85dd65..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(16)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv deleted file mode 100644 index 8b6dfb2e..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.221,72.4,9917,13.9,295.0,20260 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml deleted file mode 100644 index a5aaa817..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,72 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 2ae6349f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml deleted file mode 100644 index 28ffa32f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(2)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv deleted file mode 100644 index 227b9932..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0553,36.2,7073,13.6,37.6,7704 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml deleted file mode 100644 index 2ce98288..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,72 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml deleted file mode 100644 index d8cd9ab9..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml deleted file mode 100644 index d6d04dfa..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(4)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv deleted file mode 100644 index 21ff66e4..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0781,51.2,7545,13.4,76.4,8539 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml deleted file mode 100644 index 15192fa9..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,72 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml deleted file mode 100644 index dae7feac..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: bnb - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml deleted file mode 100644 index 5c2f68dd..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml +++ /dev/null @@ -1,79 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: bnb - quantization_config: - llm_int8_threshold: 0.0 - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: bnb-batch_size(8)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv deleted file mode 100644 index 694df838..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/bnb-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.126,63.5,8323,13.4,153.0,10760 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml deleted file mode 100644 index 58bc0553..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml deleted file mode 100644 index 4a2b3ce5..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml deleted file mode 100644 index 97782bd7..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(1)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv deleted file mode 100644 index 531b9c5d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.026,38.5,15128,6.67,38.4,15548 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml deleted file mode 100644 index 4a397dfe..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml deleted file mode 100644 index ff9bd759..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml deleted file mode 100644 index d34ba391..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(16)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv deleted file mode 100644 index 6cd91656..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.2,80.0,18706,6.87,596.0,49664 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml deleted file mode 100644 index d222d15c..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml deleted file mode 100644 index 9b00c477..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml deleted file mode 100644 index e46da2b1..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(2)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv deleted file mode 100644 index cd257bc1..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0337,59.3,15313,6.68,76.6,15858 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml deleted file mode 100644 index 5952fe8f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml deleted file mode 100644 index a8e88740..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml deleted file mode 100644 index 2173beeb..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(4)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv deleted file mode 100644 index 5e96af21..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0577,69.3,15690,6.69,153.0,16896 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml deleted file mode 100644 index 50cf10c9..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml deleted file mode 100644 index 08aafc5e..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: _base_ - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml deleted file mode 100644 index b2ff40e5..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16-batch_size(8)-sequence_length(128)-new_tokens(256) -model: lmsys/vicuna-7b-v1.5 -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv deleted file mode 100644 index 742a27ac..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.105,76.2,16829,6.9,297.0,24551 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml deleted file mode 100644 index eafc68e2..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml deleted file mode 100644 index f5988a61..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1 - id: '0' - num: 0 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml deleted file mode 100644 index 989520ff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=1 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml deleted file mode 100644 index b474c3df..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(1)-sequence_length(128)-new_tokens(256) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv deleted file mode 100644 index 75f51632..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(128)-new_tokens(256)/0/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0351,28.5,6868,7.29,35.1,7077 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml deleted file mode 100644 index 88a1cbb3..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml deleted file mode 100644 index 904f449f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16 - id: '4' - num: 4 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml deleted file mode 100644 index fdb7f01d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=16 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml deleted file mode 100644 index 9f78a060..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(16)-sequence_length(128)-new_tokens(256) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv deleted file mode 100644 index d8802536..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(128)-new_tokens(256)/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.212,75.5,10441,10.1,406.0,40774 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml deleted file mode 100644 index 308bf1d2..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml deleted file mode 100644 index e2fd14be..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2 - id: '1' - num: 1 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml deleted file mode 100644 index 8211b85f..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=2 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml deleted file mode 100644 index 1a41cd98..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(2)-sequence_length(128)-new_tokens(256) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv deleted file mode 100644 index 8c7d08c5..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(128)-new_tokens(256)/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0456,43.9,6800,7.17,71.4,7597 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml deleted file mode 100644 index 9bb84dff..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml deleted file mode 100644 index fe585a3d..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4 - id: '2' - num: 2 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml deleted file mode 100644 index eef8c9ca..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=4 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml deleted file mode 100644 index 8515f46c..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(4)-sequence_length(128)-new_tokens(256) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv deleted file mode 100644 index 38edc30a..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(128)-new_tokens(256)/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.0684,58.5,7170,7.3,140.0,8717 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml deleted file mode 100644 index aad9d7b2..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: ${is_inference:${benchmark.name}} - eval_mode: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens}) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: ${infer_task:${model}} -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml deleted file mode 100644 index 516c3ed5..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/hydra.yaml +++ /dev/null @@ -1,174 +0,0 @@ -hydra: - run: - dir: experiments/${experiment_name} - sweep: - dir: experiments/${experiment_name} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16 - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - job: - name: experiment - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8 - id: '3' - num: 3 - config_name: gptq - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/gemm-vs-gemv - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/gemm-vs-gemv/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/gemm-vs-gemv/experiments/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3 - choices: - benchmark: inference - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml deleted file mode 100644 index 8cd14374..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/.hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- benchmark.input_shapes.batch_size=8 diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml deleted file mode 100644 index 7e234d3a..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/hydra_config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: 2.1.0+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - initial_isolation_check: true - continous_isolation_check: true - delete_cache: false - no_weights: false - device_map: null - torch_dtype: float16 - disable_grad: true - eval_mode: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - bettertransformer: false - quantization_scheme: null - quantization_config: {} - use_ddp: false - ddp_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 128 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 256 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - max_new_tokens: 256 - min_new_tokens: 256 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: gptq-batch_size(8)-sequence_length(128)-new_tokens(256) -model: TheBloke/vicuna-7B-v1.5-GPTQ -device: cuda -task: text-generation -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.13.2 - optimum_commit: null - transformers_version: 4.35.0.dev0 - transformers_commit: null - accelerate_version: 0.24.0 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540684 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv b/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv deleted file mode 100644 index 544369d4..00000000 --- a/examples/running-vicunas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(128)-new_tokens(256)/3/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),generate.latency(s),generate.throughput(tokens/s),generate.peak_memory(MB) -0.116,69.0,8568,8.88,231.0,16290 diff --git a/examples/running-vicunas/report.py b/examples/running-vicunas/report.py deleted file mode 100644 index 7313c910..00000000 --- a/examples/running-vicunas/report.py +++ /dev/null @@ -1,231 +0,0 @@ -from argparse import ArgumentParser -from pathlib import Path - -import matplotlib.pyplot as plt -import pandas as pd -from flatten_dict import flatten -from omegaconf import OmegaConf -from pandas import DataFrame -from rich.console import Console -from rich.table import Table -from rich.terminal_theme import MONOKAI - - -def gather_inference_report(root_folder: Path) -> DataFrame: - # key is path to inference file as string, value is dataframe - inference_dfs = { - f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/inference_results.csv") - } - - # key is path to config file as string, value is flattened dict - config_dfs = { - f.parent.absolute() - .as_posix(): pd.DataFrame.from_dict(flatten(OmegaConf.load(f), reducer="dot"), orient="index") - .T - for f in root_folder.glob("**/hydra_config.yaml") - if f.parent.absolute().as_posix() in inference_dfs.keys() - } - - if len(inference_dfs) == 0 or len(config_dfs) == 0: - raise ValueError(f"No results found in {root_folder}") - - # Merge inference and config dataframes - inference_reports = [ - config_dfs[name].merge(inference_dfs[name], left_index=True, right_index=True) for name in inference_dfs.keys() - ] - - # Concatenate all reports - inference_report = pd.concat(inference_reports, axis=0, ignore_index=True) - inference_report.set_index("experiment_name", inplace=True) - return inference_report - - -def style_element(element, style=""): - if style: - return f"[{style}]{element}[/{style}]" - else: - return element - - -def format_element(element, style=""): - if isinstance(element, float): - if element != element: # nan - formated_element = "" - elif abs(element) >= 1: - formated_element = f"{element:.2f}" - elif abs(element) > 1e-6: - formated_element = f"{element:.2e}" - else: - formated_element = f"{element}" - elif element is None: - formated_element = "" - elif isinstance(element, bool): - if element: - formated_element = style_element("✔", style="green") - else: - formated_element = style_element("✘", style="red") - else: - formated_element = str(element) - - return style_element(formated_element, style=style) - - -def format_row(row, style=""): - formated_row = [] - for element in row: - formated_row.append(format_element(element, style=style)) - return formated_row - - -def get_short_report(inference_report): - short_columns = { - "benchmark.input_shapes.batch_size": "Batch Size", - "forward.latency(s)": "Forward Latency (s)", - "forward.throughput(samples/s)": "Forward Throughput (samples/s)", - "forward.peak_memory(MB)": "Forward Peak Memory (MB)", - "generate.throughput(tokens/s)": "Generate Throughput (tokens/s)", - "generate.peak_memory(MB)": "Generate Peak Memory (MB)", - } - short_report = inference_report[list(short_columns.keys())].rename(columns=short_columns) - short_report["Quantization Scheme"] = inference_report.index.str.split("-").str[0] - - return short_report - - -def get_rich_table(short_report): - # create rich table - rich_table = Table(show_header=True, show_lines=True) - # we add a column for the index - rich_table.add_column("Experiment Name", justify="left", header_style="") - # we populate the table with values - for column in short_report.columns: - rich_table.add_column(column, justify="right", header_style="bold") - # we add rows - for index, row in short_report.iterrows(): - rich_table.add_row(index, *format_row(row.values, style="")) - - return rich_table - - -def get_throughput_plot(short_report): - # for each quantization scheme we plot the throughput vs batch size - fig1, ax1 = plt.subplots() - fig2, ax2 = plt.subplots() - fig3, ax3 = plt.subplots() - fig4, ax4 = plt.subplots() - - short_report["Quantization Scheme"].fillna("unquantized", inplace=True) - short_report["Quantization Scheme"].replace("bnb", "BnB", inplace=True) - short_report["Quantization Scheme"].replace("awq", "AWQ", inplace=True) - short_report["Quantization Scheme"].replace("gptq", "GPTQ", inplace=True) - short_report["Quantization Scheme"].replace("awq+gemm", "AWQ+GEMM", inplace=True) - short_report["Quantization Scheme"].replace("awq+gemv", "AWQ+GEMV", inplace=True) - - for quantization_scheme in short_report["Quantization Scheme"].unique(): - mask = short_report["Quantization Scheme"] == quantization_scheme - - forward_latency = short_report[mask][["Batch Size", "Forward Latency (s)"]].sort_values(by="Batch Size") - generate_throughput = short_report[mask][["Batch Size", "Generate Throughput (tokens/s)"]].sort_values( - by="Batch Size" - ) - forward_memory = short_report[mask][["Batch Size", "Forward Peak Memory (MB)"]].sort_values(by="Batch Size") - generate_memory = short_report[mask][["Batch Size", "Generate Peak Memory (MB)"]].sort_values(by="Batch Size") - ax1.plot( - forward_latency["Batch Size"], - forward_latency["Forward Latency (s)"], - label=quantization_scheme, - marker="o", - ) - ax2.plot( - generate_throughput["Batch Size"], - generate_throughput["Generate Throughput (tokens/s)"], - label=quantization_scheme, - marker="o", - ) - ax3.plot( - forward_memory["Batch Size"], - forward_memory["Forward Peak Memory (MB)"], - label=quantization_scheme, - marker="*", - ) - ax4.plot( - generate_memory["Batch Size"], - generate_memory["Generate Peak Memory (MB)"], - label=quantization_scheme, - marker="*", - ) - - ax1.set_xlabel("Batch Size") - ax1.set_ylabel("Forward Latency (s)") - ax1.set_title("Forward Latency per Batch Size") - - ax2.set_xlabel("Batch Size") - ax2.set_ylabel("Generate Throughput (tokens/s)") - ax2.set_title("Generate Throughput per Batch Size") - - ax3.set_xlabel("Batch Size") - ax3.set_ylabel("Forward Peak Memory (MB)") - ax3.set_title("Forward Peak Memory per Batch Size") - - ax4.set_xlabel("Batch Size") - ax4.set_ylabel("Generate Peak Memory (MB)") - ax4.set_title("Generate Peak Memory per Batch Size") - - ax1.legend() - ax2.legend() - ax3.legend() - ax4.legend() - - return fig1, fig2, fig3, fig4 - - -def generate_report(): - parser = ArgumentParser() - parser.add_argument( - "--experiments", - "-e", - type=Path, - required=True, - help="The folder containing the results of experiments.", - ) - parser.add_argument( - "--report-name", - "-r", - type=str, - required=False, - help="The name of the report.", - ) - - args = parser.parse_args() - experiments_folders = args.experiments - - if args.report_name: - report_folder = f"artifacts/{args.report_name}" - else: - report_folder = "artifacts" - Path(report_folder).mkdir(parents=True, exist_ok=True) - - # gather experiments results - inference_report = gather_inference_report(experiments_folders) - inference_report.sort_values(by="forward.throughput(samples/s)", ascending=False, inplace=True) - inference_report.to_csv(f"{report_folder}/full_report.csv") - - short_report = get_short_report(inference_report) - short_report.to_csv(f"{report_folder}/short_report.csv") - - forward_throughput_plot, generate_throughput_plot, forward_memory_plot, generate_memory_plot = get_throughput_plot( - short_report - ) - forward_throughput_plot.savefig(f"{report_folder}/forward_latency_plot.png") - generate_throughput_plot.savefig(f"{report_folder}/generate_throughput_plot.png") - forward_memory_plot.savefig(f"{report_folder}/forward_memory_plot.png") - generate_memory_plot.savefig(f"{report_folder}/generate_memory_plot.png") - - rich_table = get_rich_table(short_report) - console = Console(record=True) - console.print(rich_table, justify="center") - console.save_svg(f"{report_folder}/rich_table.svg", theme=MONOKAI, title="Inference Report") - - -if __name__ == "__main__": - generate_report() diff --git a/examples/training-llamas/README.md b/examples/training-llamas/README.md deleted file mode 100644 index ea8bfb12..00000000 --- a/examples/training-llamas/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Optimum-Benchmark x LLaMAs x PEFT - -A set of benchmarks on Meta's LLaMA2's training. - -## Setup - -You will need to install any necessary third-party libraries like `deepspeed` or `auto-gptq` depending on the hardware and benchmarks you want to run. - -For example running PEFT on two devices with Model Parallelism (i.e. `fp16+peft+dp=2+zero3`) will require: `peft` and `deepspeed` - -## Running - -Then run the benchmarks from this directory with: - -```bash -optimum-benchmark --config-dir configs/ --config-name fp16 --multirun -optimum-benchmark --config-dir configs/ --config-name fp16+peft+dp=2+zero3 --multirun -[...] -``` - -This will create a folder called `experiments` with the results of the benchmarks with a training `batch_size` ranging from 1 to 128 and a `sequence_length` (sample size) of 256. - -## Reporting - -To create a report for 7B models on A100-80GB, run: - -```bash -python report.py -e experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/ -r artifacts/Llama-2-7b-hf/ -python report.py -e experiments/hf-dgx-01/NousResearch/Llama-2-13b-hf/ -r artifacts/Llama-2-13b-hf/ -``` - -Which will create some quick reporting artifacts like a `full_report.csv`, `short_report.csv`, and some interesting analysis plots. - -## Results - -### LLaMA-7B on A100-80GB - -- -
- -- -
- -### LLaMA-13B on A100-80GB - -- -
- -- -
diff --git a/examples/training-llamas/artifacts/Llama-2-13b-hf/full_report.csv b/examples/training-llamas/artifacts/Llama-2-13b-hf/full_report.csv deleted file mode 100644 index bb28621b..00000000 --- a/examples/training-llamas/artifacts/Llama-2-13b-hf/full_report.csv +++ /dev/null @@ -1,393 +0,0 @@ -,launcher.name,launcher._target_,launcher.start_method,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.continuous_isolation,backend.isolation_check_interval,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.eval_mode,backend.disable_grad,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.to_bettertransformer,backend.use_flash_attention_2,backend.quantization_scheme,backend.data_parallel,backend.deepspeed_inference,backend.peft_strategy,backend.peft_config.base_model_name_or_path,backend.peft_config.revision,backend.peft_config.peft_type,backend.peft_config.task_type,backend.peft_config.inference_mode,backend.peft_config.auto_mapping,backend.peft_config.r,backend.peft_config.target_modules,backend.peft_config.lora_alpha,backend.peft_config.lora_dropout,backend.peft_config.fan_in_fan_out,backend.peft_config.bias,backend.peft_config.modules_to_save,backend.peft_config.init_lora_weights,backend.peft_config.layers_to_transform,backend.peft_config.layers_pattern,benchmark.name,benchmark._target_,benchmark.warmup_steps,benchmark.dataset_shapes.dataset_size,benchmark.dataset_shapes.sequence_length,benchmark.dataset_shapes.num_choices,benchmark.dataset_shapes.feature_size,benchmark.dataset_shapes.nb_max_frames,benchmark.dataset_shapes.audio_sequence_length,benchmark.training_arguments.skip_memory_metrics,benchmark.training_arguments.output_dir,benchmark.training_arguments.use_cpu,benchmark.training_arguments.ddp_find_unused_parameters,benchmark.training_arguments.do_train,benchmark.training_arguments.do_eval,benchmark.training_arguments.do_predict,benchmark.training_arguments.report_to,benchmark.training_arguments.max_steps,benchmark.training_arguments.per_device_train_batch_size,experiment_name,device,model,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,warmup.runtime(s),warmup.throughput(samples/s),training.runtime(s),training.throughput(samples/s),overall_training.runtime(s),overall_training.throughput(samples/s),hydra.run.dir,hydra.sweep.dir,hydra.sweep.subdir,hydra.launcher._target_,hydra.sweeper._target_,hydra.sweeper.max_batch_size,hydra.sweeper.params.benchmark.training_arguments.per_device_train_batch_size,hydra.sweeper.params.model,hydra.help.app_name,hydra.help.header,hydra.help.footer,hydra.help.template,hydra.hydra_help.template,hydra.hydra_help.hydra_help,hydra.hydra_logging.version,hydra.hydra_logging.formatters.colorlog.(),hydra.hydra_logging.formatters.colorlog.format,hydra.hydra_logging.handlers.console.class,hydra.hydra_logging.handlers.console.formatter,hydra.hydra_logging.handlers.console.stream,hydra.hydra_logging.root.level,hydra.hydra_logging.root.handlers,hydra.hydra_logging.disable_existing_loggers,hydra.job_logging.version,hydra.job_logging.formatters.simple.format,hydra.job_logging.formatters.colorlog.(),hydra.job_logging.formatters.colorlog.format,hydra.job_logging.formatters.colorlog.log_colors.DEBUG,hydra.job_logging.formatters.colorlog.log_colors.INFO,hydra.job_logging.formatters.colorlog.log_colors.WARNING,hydra.job_logging.formatters.colorlog.log_colors.ERROR,hydra.job_logging.formatters.colorlog.log_colors.CRITICAL,hydra.job_logging.handlers.console.class,hydra.job_logging.handlers.console.formatter,hydra.job_logging.handlers.console.stream,hydra.job_logging.handlers.file.class,hydra.job_logging.handlers.file.formatter,hydra.job_logging.handlers.file.filename,hydra.job_logging.root.level,hydra.job_logging.root.handlers,hydra.job_logging.disable_existing_loggers,hydra.mode,hydra.searchpath,hydra.output_subdir,hydra.overrides.hydra,hydra.overrides.task,hydra.job.name,hydra.job.chdir,hydra.job.override_dirname,hydra.job.id,hydra.job.num,hydra.job.config_name,hydra.job.env_set.CUDA_VISIBLE_DEVICES,hydra.job.env_set.CUDA_DEVICE_ORDER,hydra.job.env_copy,hydra.job.config.override_dirname.kv_sep,hydra.job.config.override_dirname.item_sep,hydra.job.config.override_dirname.exclude_keys,hydra.runtime.version,hydra.runtime.version_base,hydra.runtime.cwd,hydra.runtime.config_sources,hydra.runtime.output_dir,hydra.runtime.choices.benchmark,hydra.runtime.choices.launcher,hydra.runtime.choices.backend,hydra.runtime.choices.hydra/env,hydra.runtime.choices.hydra/callbacks,hydra.runtime.choices.hydra/job_logging,hydra.runtime.choices.hydra/hydra_logging,hydra.runtime.choices.hydra/hydra_help,hydra.runtime.choices.hydra/help,hydra.runtime.choices.hydra/sweeper,hydra.runtime.choices.hydra/launcher,hydra.runtime.choices.hydra/output,hydra.verbose,backend.quantization_config.llm_int8_threshold,backend.quantization_config.load_in_4bit,backend.quantization_config.bnb_4bit_compute_dtype,backend.quantization_config.bits,backend.quantization_config.disable_exllama -0,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16+peft,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],63.59784197807312,10.06323453900614,159.16368579864502,10.0525442846563,222.7615296840668,7.182568741870341,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-13b-hf",9,9,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -1,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],18.260884523391724,8.761897584700462,44.20054507255554,9.049662155600046,62.4614315032959,6.403951852734807,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-13b-hf",5,5,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -2,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],6.025439023971558,6.6385204199833945,14.0109965801239,7.137251046214708,20.036437034606934,4.990907306886948,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-13b-hf",1,1,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -3,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],10.59528875350952,7.550525696951984,25.423195123672485,7.866831805643994,36.01848530769348,5.552704348655116,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-13b-hf",3,3,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -4,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],32.72411274909973,9.77872196118758,80.59089398384094,9.926679807775873,113.31500816345216,7.059965074052975,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-13b-hf",7,7,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -5,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],56.82151055335999,11.263340128893413,140.7813069820404,11.365145233408818,197.6028189659119,8.097050479204011,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-13b-hf",9,9,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+bnb-4bit/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -6,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],18.608861923217773,8.59805401642388,45.62080240249634,8.767929955964831,64.22966575622559,6.227651900262757,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-13b-hf",5,5,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+bnb-4bit/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -7,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],8.35726809501648,4.78625306083604,19.78277015686035,5.054903797955798,28.140039443969727,3.553655288902927,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-13b-hf",1,1,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+bnb-4bit/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -8,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],12.181357860565186,6.567412345628946,29.56349492073059,6.765100017310723,41.74485445022583,4.791009637809817,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-13b-hf",3,3,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+bnb-4bit/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -9,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],30.924877405166622,10.34765621889054,76.05532646179199,10.518658419037845,106.98020553588869,7.4780189100648515,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-13b-hf",7,7,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+bnb-4bit/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -10,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],27.047818660736084,5.915449301361359,66.39001893997192,6.02500204679365,93.4378387928009,4.280920932760473,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-13b-hf",7,7,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+gptq-4bit/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -11,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],16.171680450439453,2.473459707702364,39.22436714172363,2.54943565153479,55.39604926109314,1.805182884589461,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-13b-hf",1,1,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+gptq-4bit/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -12,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],20.308139324188232,3.939307226670201,49.545104026794434,4.036725806284275,69.85324501991272,2.863145440744907,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-13b-hf",4,4,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+gptq-4bit/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -13,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-13b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],39.81309175491333,8.037557142507247,98.25630164146423,8.141971422038537,138.06939482688904,5.794187777841986,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-13b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-13b-hf",7,7,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-13b-hf/fp16+peft+gptq-4bit/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True diff --git a/examples/training-llamas/artifacts/Llama-2-13b-hf/peak_training_throughput.png b/examples/training-llamas/artifacts/Llama-2-13b-hf/peak_training_throughput.png deleted file mode 100644 index c5be42f5..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-13b-hf/peak_training_throughput.png and /dev/null differ diff --git a/examples/training-llamas/artifacts/Llama-2-13b-hf/short_report.csv b/examples/training-llamas/artifacts/Llama-2-13b-hf/short_report.csv deleted file mode 100644 index eb2a3c8a..00000000 --- a/examples/training-llamas/artifacts/Llama-2-13b-hf/short_report.csv +++ /dev/null @@ -1,15 +0,0 @@ -,Model,GPUs,Experiment Name,CUDAs,Per Process Batch Size,Sequence Length,Training Throughput (samples/s),GPU Name,Num GPUs,Num CUDAs,Num Processes,Effective Batch Size,Group -0,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,16,256,10.0525442846563,1xA100,1,1,1,16,1xA100-fp16+peft -1,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,4,256,9.049662155600046,1xA100,1,1,1,4,1xA100-fp16+peft -2,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,1,256,7.137251046214708,1xA100,1,1,1,1,1xA100-fp16+peft -3,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,2,256,7.866831805643994,1xA100,1,1,1,2,1xA100-fp16+peft -4,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,8,256,9.926679807775873,1xA100,1,1,1,8,1xA100-fp16+peft -5,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,16,256,11.365145233408818,1xA100,1,1,1,16,1xA100-fp16+peft+bnb-4bit -6,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,4,256,8.767929955964831,1xA100,1,1,1,4,1xA100-fp16+peft+bnb-4bit -7,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,1,256,5.054903797955798,1xA100,1,1,1,1,1xA100-fp16+peft+bnb-4bit -8,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,2,256,6.765100017310723,1xA100,1,1,1,2,1xA100-fp16+peft+bnb-4bit -9,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,8,256,10.518658419037845,1xA100,1,1,1,8,1xA100-fp16+peft+bnb-4bit -10,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,4,256,6.02500204679365,1xA100,1,1,1,4,1xA100-fp16+peft+gptq-4bit -11,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,1,256,2.54943565153479,1xA100,1,1,1,1,1xA100-fp16+peft+gptq-4bit -12,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,2,256,4.036725806284275,1xA100,1,1,1,2,1xA100-fp16+peft+gptq-4bit -13,NousResearch/Llama-2-13b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,8,256,8.141971422038537,1xA100,1,1,1,8,1xA100-fp16+peft+gptq-4bit diff --git a/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_bar_plot.png b/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_bar_plot.png deleted file mode 100644 index 76fe3f70..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_bar_plot.png and /dev/null differ diff --git a/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_line_plot.png b/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_line_plot.png deleted file mode 100644 index 967ec405..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-13b-hf/training_throughput_line_plot.png and /dev/null differ diff --git a/examples/training-llamas/artifacts/Llama-2-7b-hf/full_report.csv b/examples/training-llamas/artifacts/Llama-2-7b-hf/full_report.csv deleted file mode 100644 index 4dfdee19..00000000 --- a/examples/training-llamas/artifacts/Llama-2-7b-hf/full_report.csv +++ /dev/null @@ -1,561 +0,0 @@ -,launcher.name,launcher._target_,launcher.start_method,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.continuous_isolation,backend.isolation_check_interval,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.eval_mode,backend.disable_grad,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.to_bettertransformer,backend.use_flash_attention_2,backend.quantization_scheme,backend.data_parallel,backend.deepspeed_inference,backend.peft_strategy,backend.peft_config.base_model_name_or_path,backend.peft_config.revision,backend.peft_config.peft_type,backend.peft_config.task_type,backend.peft_config.inference_mode,backend.peft_config.auto_mapping,backend.peft_config.r,backend.peft_config.target_modules,backend.peft_config.lora_alpha,backend.peft_config.lora_dropout,backend.peft_config.fan_in_fan_out,backend.peft_config.bias,backend.peft_config.modules_to_save,backend.peft_config.init_lora_weights,backend.peft_config.layers_to_transform,backend.peft_config.layers_pattern,benchmark.name,benchmark._target_,benchmark.warmup_steps,benchmark.dataset_shapes.dataset_size,benchmark.dataset_shapes.sequence_length,benchmark.dataset_shapes.num_choices,benchmark.dataset_shapes.feature_size,benchmark.dataset_shapes.nb_max_frames,benchmark.dataset_shapes.audio_sequence_length,benchmark.training_arguments.skip_memory_metrics,benchmark.training_arguments.output_dir,benchmark.training_arguments.use_cpu,benchmark.training_arguments.ddp_find_unused_parameters,benchmark.training_arguments.do_train,benchmark.training_arguments.do_eval,benchmark.training_arguments.do_predict,benchmark.training_arguments.report_to,benchmark.training_arguments.max_steps,benchmark.training_arguments.per_device_train_batch_size,experiment_name,device,model,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,warmup.runtime(s),warmup.throughput(samples/s),training.runtime(s),training.throughput(samples/s),overall_training.runtime(s),overall_training.throughput(samples/s),hydra.run.dir,hydra.sweep.dir,hydra.sweep.subdir,hydra.launcher._target_,hydra.sweeper._target_,hydra.sweeper.max_batch_size,hydra.sweeper.params.benchmark.training_arguments.per_device_train_batch_size,hydra.sweeper.params.model,hydra.help.app_name,hydra.help.header,hydra.help.footer,hydra.help.template,hydra.hydra_help.template,hydra.hydra_help.hydra_help,hydra.hydra_logging.version,hydra.hydra_logging.formatters.colorlog.(),hydra.hydra_logging.formatters.colorlog.format,hydra.hydra_logging.handlers.console.class,hydra.hydra_logging.handlers.console.formatter,hydra.hydra_logging.handlers.console.stream,hydra.hydra_logging.root.level,hydra.hydra_logging.root.handlers,hydra.hydra_logging.disable_existing_loggers,hydra.job_logging.version,hydra.job_logging.formatters.simple.format,hydra.job_logging.formatters.colorlog.(),hydra.job_logging.formatters.colorlog.format,hydra.job_logging.formatters.colorlog.log_colors.DEBUG,hydra.job_logging.formatters.colorlog.log_colors.INFO,hydra.job_logging.formatters.colorlog.log_colors.WARNING,hydra.job_logging.formatters.colorlog.log_colors.ERROR,hydra.job_logging.formatters.colorlog.log_colors.CRITICAL,hydra.job_logging.handlers.console.class,hydra.job_logging.handlers.console.formatter,hydra.job_logging.handlers.console.stream,hydra.job_logging.handlers.file.class,hydra.job_logging.handlers.file.formatter,hydra.job_logging.handlers.file.filename,hydra.job_logging.root.level,hydra.job_logging.root.handlers,hydra.job_logging.disable_existing_loggers,hydra.mode,hydra.searchpath,hydra.output_subdir,hydra.overrides.hydra,hydra.overrides.task,hydra.job.name,hydra.job.chdir,hydra.job.override_dirname,hydra.job.id,hydra.job.num,hydra.job.config_name,hydra.job.env_set.CUDA_VISIBLE_DEVICES,hydra.job.env_set.CUDA_DEVICE_ORDER,hydra.job.env_copy,hydra.job.config.override_dirname.kv_sep,hydra.job.config.override_dirname.item_sep,hydra.job.config.override_dirname.exclude_keys,hydra.runtime.version,hydra.runtime.version_base,hydra.runtime.cwd,hydra.runtime.config_sources,hydra.runtime.output_dir,hydra.runtime.choices.benchmark,hydra.runtime.choices.launcher,hydra.runtime.choices.backend,hydra.runtime.choices.hydra/env,hydra.runtime.choices.hydra/callbacks,hydra.runtime.choices.hydra/job_logging,hydra.runtime.choices.hydra/hydra_logging,hydra.runtime.choices.hydra/hydra_help,hydra.runtime.choices.hydra/help,hydra.runtime.choices.hydra/sweeper,hydra.runtime.choices.hydra/launcher,hydra.runtime.choices.hydra/output,hydra.verbose,backend.quantization_config.llm_int8_threshold,backend.quantization_config.load_in_4bit,backend.quantization_config.bnb_4bit_compute_dtype,backend.quantization_config.bits,backend.quantization_config.disable_exllama -0,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16+peft,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],36.81096434593201,17.38612425324105,90.82892441749571,17.61553393108114,127.63988995552064,12.535266213074618,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-7b-hf",8,8,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -1,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],10.65952181816101,15.01005417779644,26.17747592926025,15.280312016365723,36.83699917793274,10.858647797772315,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-7b-hf",4,4,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -2,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],4.55856728553772,8.774686758908217,10.863666772842407,9.204995154121027,15.422235250473022,6.484144378288669,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-7b-hf",0,0,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -3,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],6.038196802139282,13.248988501278507,14.59524655342102,13.703091569434395,20.63344502449036,9.693000842206184,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-7b-hf",2,2,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -4,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],19.43790316581726,16.4626810448742,47.72488141059876,16.762744638739655,67.16278600692749,11.911358172626793,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-7b-hf",6,6,fp16+peft,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -5,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],33.6070408821106,19.043628454080263,83.09710359573364,19.25458205840699,116.70414614677428,13.70988137805954,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-7b-hf",8,8,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+bnb-4bit/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -6,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],11.067278146743774,14.45703251319073,26.702176094055176,14.980052509243013,37.769455671310425,10.590568301566467,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-7b-hf",4,4,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+bnb-4bit/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -7,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],6.390275239944458,6.259511288335002,15.36588716506958,6.507922316865925,21.75616407394409,4.596398503896343,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-7b-hf",0,0,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+bnb-4bit/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -8,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],7.136318206787109,11.210262446525258,16.748042583465576,11.94169402204942,23.88436245918274,8.37367965512124,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-7b-hf",2,2,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+bnb-4bit/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -9,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,bnb,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft+bnb-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],18.616926193237305,17.188659216806784,45.44451022148132,17.60388650028503,64.06143808364868,12.488011882521189,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-7b-hf",6,6,fp16+peft+bnb-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+bnb-4bit/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,0.0,True,float16,, -10,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,,,,,,,,,,,,,,,,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.25.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],45.30532383918762,14.12637513135754,111.96182560920715,14.290585128404912,157.26715087890625,10.173771134392712,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-7b-hf",12,12,fp16,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -11,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,,,,,,,,,,,,,,,,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.25.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],19.096375942230225,8.378553107879062,46.270344257354736,8.644845989803057,65.36672186851501,6.1193217063049135,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-7b-hf",6,6,fp16,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -12,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,,,,,,,,,,,,,,,,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.25.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],12.218624591827393,3.273690888805487,29.746723175048828,3.361714815159161,41.965349197387695,2.3829183341151587,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-7b-hf",0,0,fp16,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -13,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,,,,,,,,,,,,,,,,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.25.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],14.054343223571776,5.692190572507505,34.34596490859985,5.823100341837308,48.40031003952026,4.132204934982734,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-7b-hf",2,2,fp16,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -14,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,,False,False,,,,,,,,,,,,,,,,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.25.0,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],27.57860779762268,11.603196301576345,67.7247965335846,11.812512417121573,95.30340623855592,8.394243517356594,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-7b-hf",9,9,fp16,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,, -15,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,16,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],38.72175240516663,16.528177580997216,95.55383205413818,16.744488060860643,134.27558636665344,11.91579231410715,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=16', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=16,model=NousResearch/Llama-2-7b-hf",8,8,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+gptq-4bit/16,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -16,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,4,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],15.456857919692991,10.351392296629063,37.6063711643219,10.63649556220649,53.06323051452637,7.538176551284372,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=4', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=4,model=NousResearch/Llama-2-7b-hf",6,6,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+gptq-4bit/4,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -17,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,1,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],9.378505945205688,4.26507166852606,22.25546908378601,4.493277568022772,31.63397645950317,3.161158070912042,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=1', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=1,model=NousResearch/Llama-2-7b-hf",0,0,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+gptq-4bit/1,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -18,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,2,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],11.28474497795105,7.089216473771431,27.20784854888916,7.350820100333349,38.49259519577026,5.195804517279647,experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=2', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=2,model=NousResearch/Llama-2-7b-hf",3,3,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+gptq-4bit/2,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True -19,process,optimum_benchmark.launchers.process.launcher.ProcessLauncher,spawn,pytorch,2.1.1+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,True,1.0,False,True,,float16,False,False,False,,False,False,False,gptq,False,False,lora,,,,CAUSAL_LM,False,,8,,8,0,False,none,,True,,,training,optimum_benchmark.benchmarks.training.benchmark.TrainingBenchmark,40,160,256,1,80,3000,16000,True,./trainer_output,False,False,True,False,False,none,140,8,fp16+peft+gptq-4bit,cuda,NousResearch/Llama-2-7b-hf,text-generation,main,,False,False,1.14.1,,4.35.2,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540671,['NVIDIA A100-SXM4-80GB'],23.256840229034424,13.759392800080551,57.13273501396179,14.002480360943691,80.38957738876343,9.951538818660604,experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},experiments/${benchmark.name}/${oc.env:HOSTNAME}/${model}/${experiment_name},${benchmark.training_arguments.per_device_train_batch_size},hydra._internal.core_plugins.basic_launcher.BasicLauncher,hydra._internal.core_plugins.basic_sweeper.BasicSweeper,,"1,2,4,8,16,32,64,128","NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf",${hydra.job.name},"${hydra.help.app_name} is powered by Hydra. -","Powered by Hydra (https://hydra.cc) -Use --hydra-help to view Hydra specific help -","${hydra.help.header} -== Configuration groups == -Compose your configuration from those groups (group=option) - -$APP_CONFIG_GROUPS - -== Config == -Override anything in the config (foo.bar=value) - -$CONFIG - -${hydra.help.footer} -","Hydra (${hydra.runtime.version}) -See https://hydra.cc for more info. - -== Flags == -$FLAGS_HELP - -== Configuration groups == -Compose your configuration from those groups (For example, append hydra/job_logging=disabled to command line) - -$HYDRA_CONFIG_GROUPS - -Use '--cfg hydra' to Show the Hydra config. -",???,1,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s,logging.StreamHandler,colorlog,ext://sys.stdout,INFO,['console'],False,1,[%(asctime)s][%(name)s][%(levelname)s] - %(message)s,colorlog.ColoredFormatter,[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s,purple,green,yellow,red,red,logging.StreamHandler,colorlog,ext://sys.stdout,logging.FileHandler,simple,${hydra.job.name}.log,INFO,"['console', 'file']",False,MULTIRUN,[],.hydra,['hydra.mode=MULTIRUN'],"['benchmark.training_arguments.per_device_train_batch_size=8', 'model=NousResearch/Llama-2-7b-hf']",cli,True,"benchmark.training_arguments.per_device_train_batch_size=8,model=NousResearch/Llama-2-7b-hf",6,6,fp16+peft+gptq-4bit,0,PCI_BUS_ID,[],=,",",[],1.3.2,1.3,/workspace/optimum-benchmark/examples/training-llamas,"[{'path': 'hydra.conf', 'schema': 'pkg', 'provider': 'hydra'}, {'path': 'optimum_benchmark', 'schema': 'pkg', 'provider': 'main'}, {'path': 'hydra_plugins.hydra_colorlog.conf', 'schema': 'pkg', 'provider': 'hydra-colorlog'}, {'path': '/workspace/optimum-benchmark/examples/training-llamas/configs', 'schema': 'file', 'provider': 'command-line'}, {'path': '', 'schema': 'structured', 'provider': 'schema'}]",/workspace/optimum-benchmark/examples/training-llamas/experiments/training/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16+peft+gptq-4bit/8,training,process,pytorch,default,,colorlog,colorlog,default,default,basic,basic,default,False,,,,4,True diff --git a/examples/training-llamas/artifacts/Llama-2-7b-hf/peak_training_throughput.png b/examples/training-llamas/artifacts/Llama-2-7b-hf/peak_training_throughput.png deleted file mode 100644 index ed456949..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-7b-hf/peak_training_throughput.png and /dev/null differ diff --git a/examples/training-llamas/artifacts/Llama-2-7b-hf/short_report.csv b/examples/training-llamas/artifacts/Llama-2-7b-hf/short_report.csv deleted file mode 100644 index d987510c..00000000 --- a/examples/training-llamas/artifacts/Llama-2-7b-hf/short_report.csv +++ /dev/null @@ -1,21 +0,0 @@ -,Model,GPUs,Experiment Name,CUDAs,Per Process Batch Size,Sequence Length,Training Throughput (samples/s),GPU Name,Num GPUs,Num CUDAs,Num Processes,Effective Batch Size,Group -0,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,16,256,17.61553393108114,1xA100,1,1,1,16,1xA100-fp16+peft -1,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,4,256,15.280312016365723,1xA100,1,1,1,4,1xA100-fp16+peft -2,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,1,256,9.204995154121027,1xA100,1,1,1,1,1xA100-fp16+peft -3,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,2,256,13.703091569434395,1xA100,1,1,1,2,1xA100-fp16+peft -4,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft,0,8,256,16.762744638739655,1xA100,1,1,1,8,1xA100-fp16+peft -5,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,16,256,19.25458205840699,1xA100,1,1,1,16,1xA100-fp16+peft+bnb-4bit -6,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,4,256,14.980052509243013,1xA100,1,1,1,4,1xA100-fp16+peft+bnb-4bit -7,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,1,256,6.507922316865925,1xA100,1,1,1,1,1xA100-fp16+peft+bnb-4bit -8,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,2,256,11.94169402204942,1xA100,1,1,1,2,1xA100-fp16+peft+bnb-4bit -9,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+bnb-4bit,0,8,256,17.60388650028503,1xA100,1,1,1,8,1xA100-fp16+peft+bnb-4bit -10,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,0,16,256,14.290585128404912,1xA100,1,1,1,16,1xA100-fp16 -11,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,0,4,256,8.644845989803057,1xA100,1,1,1,4,1xA100-fp16 -12,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,0,1,256,3.361714815159161,1xA100,1,1,1,1,1xA100-fp16 -13,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,0,2,256,5.823100341837308,1xA100,1,1,1,2,1xA100-fp16 -14,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16,0,8,256,11.812512417121573,1xA100,1,1,1,8,1xA100-fp16 -15,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,16,256,16.744488060860643,1xA100,1,1,1,16,1xA100-fp16+peft+gptq-4bit -16,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,4,256,10.63649556220649,1xA100,1,1,1,4,1xA100-fp16+peft+gptq-4bit -17,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,1,256,4.493277568022772,1xA100,1,1,1,1,1xA100-fp16+peft+gptq-4bit -18,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,2,256,7.350820100333349,1xA100,1,1,1,2,1xA100-fp16+peft+gptq-4bit -19,NousResearch/Llama-2-7b-hf,['NVIDIA A100-SXM4-80GB'],fp16+peft+gptq-4bit,0,8,256,14.002480360943691,1xA100,1,1,1,8,1xA100-fp16+peft+gptq-4bit diff --git a/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_bar_plot.png b/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_bar_plot.png deleted file mode 100644 index e17bb8c4..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_bar_plot.png and /dev/null differ diff --git a/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_line_plot.png b/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_line_plot.png deleted file mode 100644 index 340767f1..00000000 Binary files a/examples/training-llamas/artifacts/Llama-2-7b-hf/training_throughput_line_plot.png and /dev/null differ diff --git a/examples/training-llamas/configs/_base_.yaml b/examples/training-llamas/configs/_base_.yaml deleted file mode 100644 index 36efa0f3..00000000 --- a/examples/training-llamas/configs/_base_.yaml +++ /dev/null @@ -1,41 +0,0 @@ -defaults: - - backend: pytorch # default backend - - launcher: inline # default launcher - - benchmark: training # default benchmark - - experiment # inheriting from experiment config - - _self_ # for hydra 1.1 compatibility - - override hydra/job_logging: colorlog # colorful logging - - override hydra/hydra_logging: colorlog # colorful logging - -experiment_name: llama-experiment -model: llama-2-model -device: cuda - -backend: - no_weights: true - torch_dtype: float16 - -benchmark: - warmup_steps: 40 - dataset_shapes: - dataset_size: 160 - sequence_length: 256 - training_arguments: - max_steps: 140 - per_device_train_batch_size: 1 - -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.training_arguments.per_device_train_batch_size} - job: - chdir: true - env_set: - CUDA_VISIBLE_DEVICES: 0 - CUDA_DEVICE_ORDER: PCI_BUS_ID - sweeper: - params: - benchmark.training_arguments.per_device_train_batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf diff --git a/examples/training-llamas/configs/fp16+dp=2.yaml b/examples/training-llamas/configs/fp16+dp=2.yaml deleted file mode 100644 index 60043450..00000000 --- a/examples/training-llamas/configs/fp16+dp=2.yaml +++ /dev/null @@ -1,15 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29500 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/training-llamas/configs/fp16+fsdp=2.yaml b/examples/training-llamas/configs/fp16+fsdp=2.yaml deleted file mode 100644 index bb57da37..00000000 --- a/examples/training-llamas/configs/fp16+fsdp=2.yaml +++ /dev/null @@ -1,19 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+fsdp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29522 - -benchmark: - training_arguments: - fsdp: full_shard - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/training-llamas/configs/fp16+peft+bnb-4bit+dp=2.yaml b/examples/training-llamas/configs/fp16+peft+bnb-4bit+dp=2.yaml deleted file mode 100644 index 6e3ca8bd..00000000 --- a/examples/training-llamas/configs/fp16+peft+bnb-4bit+dp=2.yaml +++ /dev/null @@ -1,24 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+peft+bnb-4bit+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29511 - -backend: - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/training-llamas/configs/fp16+peft+bnb-4bit.yaml b/examples/training-llamas/configs/fp16+peft+bnb-4bit.yaml deleted file mode 100644 index 4440d565..00000000 --- a/examples/training-llamas/configs/fp16+peft+bnb-4bit.yaml +++ /dev/null @@ -1,15 +0,0 @@ -defaults: - - _base_ # we can extend fp16+peft as a base here - - _self_ - - override launcher: process - -experiment_name: fp16+peft+bnb-4bit - -backend: - quantization_scheme: bnb - quantization_config: - load_in_4bit: true - bnb_4bit_compute_dtype: float16 - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM diff --git a/examples/training-llamas/configs/fp16+peft+dp=2+zero3.yaml b/examples/training-llamas/configs/fp16+peft+dp=2+zero3.yaml deleted file mode 100644 index 8182c413..00000000 --- a/examples/training-llamas/configs/fp16+peft+dp=2+zero3.yaml +++ /dev/null @@ -1,28 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+peft+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29511 - -backend: - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM - -benchmark: - training_arguments: - deepspeed: - train_batch_size: auto - train_micro_batch_size_per_gpu: auto - zero_optimization: - stage: 3 - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/training-llamas/configs/fp16+peft+dp=2.yaml b/examples/training-llamas/configs/fp16+peft+dp=2.yaml deleted file mode 100644 index b6c19980..00000000 --- a/examples/training-llamas/configs/fp16+peft+dp=2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: torchrun - -experiment_name: fp16+peft+dp=2 - -launcher: - nproc_per_node: 2 - rdzv_endpoint: localhost:29511 - -backend: - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM - -hydra: - job: - env_set: - CUDA_VISIBLE_DEVICES: 0,1 diff --git a/examples/training-llamas/configs/fp16+peft+gptq-4bit+dp=2.yaml b/examples/training-llamas/configs/fp16+peft+gptq-4bit+dp=2.yaml deleted file mode 100644 index fd7c3c7e..00000000 --- a/examples/training-llamas/configs/fp16+peft+gptq-4bit+dp=2.yaml +++ /dev/null @@ -1,16 +0,0 @@ -defaults: - - _base_ # we can extend fp16+peft as a base here - - _self_ - - override launcher: process - -experiment_name: fp16+peft+gptq-4bit - -backend: - no_weights: true - quantization_scheme: gptq - quantization_config: - bits: 4 - disable_exllama: true - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM diff --git a/examples/training-llamas/configs/fp16+peft+gptq-4bit.yaml b/examples/training-llamas/configs/fp16+peft+gptq-4bit.yaml deleted file mode 100644 index fd7c3c7e..00000000 --- a/examples/training-llamas/configs/fp16+peft+gptq-4bit.yaml +++ /dev/null @@ -1,16 +0,0 @@ -defaults: - - _base_ # we can extend fp16+peft as a base here - - _self_ - - override launcher: process - -experiment_name: fp16+peft+gptq-4bit - -backend: - no_weights: true - quantization_scheme: gptq - quantization_config: - bits: 4 - disable_exllama: true - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM diff --git a/examples/training-llamas/configs/fp16+peft.yaml b/examples/training-llamas/configs/fp16+peft.yaml deleted file mode 100644 index 1a52b6e2..00000000 --- a/examples/training-llamas/configs/fp16+peft.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - _base_ # we can also extend fp16 as a base here - - _self_ - - override launcher: process - -experiment_name: fp16+peft - -backend: - peft_strategy: lora - peft_config: - task_type: CAUSAL_LM diff --git a/examples/training-llamas/configs/fp16.yaml b/examples/training-llamas/configs/fp16.yaml deleted file mode 100644 index 432d986f..00000000 --- a/examples/training-llamas/configs/fp16.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - _base_ - - _self_ - - override launcher: process - -experiment_name: fp16 diff --git a/examples/training-llamas/report.py b/examples/training-llamas/report.py deleted file mode 100644 index 39220499..00000000 --- a/examples/training-llamas/report.py +++ /dev/null @@ -1,242 +0,0 @@ -from argparse import ArgumentParser -from pathlib import Path -from typing import List - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from flatten_dict import flatten -from omegaconf import OmegaConf -from pandas import DataFrame - - -def gather_full_report(root_folders: List[Path], report_folder: str = "artifacts") -> DataFrame: - # key is path to inference file as string, value is dataframe - - hydra_dfs = {} - config_dfs = {} - inference_dfs = {} - - for root_folder in root_folders: - inference_dfs.update( - {f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/training_results.csv")} - ) - config_dfs.update( - { - f.parent.absolute() - .as_posix(): pd.DataFrame.from_dict(flatten(OmegaConf.load(f), reducer="dot"), orient="index") - .T - for f in root_folder.glob("**/hydra_config.yaml") - if f.parent.absolute().as_posix() in inference_dfs.keys() - } - ) - hydra_dfs.update( - { - f.parent.parent.absolute() - .as_posix(): pd.DataFrame.from_dict( - flatten( - OmegaConf.to_container(OmegaConf.load(f), resolve=False), - reducer="dot", - ), - orient="index", - ) - .T - for f in root_folder.glob("**/.hydra/hydra.yaml") - if f.parent.parent.absolute().as_posix() in inference_dfs.keys() - } - ) - - if len(inference_dfs) == 0 or len(config_dfs) == 0: - raise ValueError(f"No results found in {root_folder}") - - # Merge inference and config dataframes - inference_reports = [ - config_dfs[name] - .merge(inference_dfs[name], left_index=True, right_index=True) - .merge(hydra_dfs[name], left_index=True, right_index=True) - for name in inference_dfs.keys() - ] - - # Concatenate all reports - inference_report = pd.concat(inference_reports, axis=0, ignore_index=True) - inference_report.to_csv(f"{report_folder}/full_report.csv") - - return inference_report - - -def get_short_report(full_report, report_folder: str = "artifacts"): - short_columns = { - "model": "Model", - "environment.gpus": "GPUs", - "experiment_name": "Experiment Name", - "launcher.name": "Launcher", - "launcher.nproc_per_node": "Processes per Node", - "benchmark.dataset_shapes.sequence_length": "Sequence Length", - "benchmark.training_arguments.per_device_train_batch_size": "Per Process Batch Size", - # - "training.throughput(samples/s)": "Training Throughput (samples/s)", - } - short_report = full_report[list(short_columns.keys())].rename(columns=short_columns) - - short_report["GPU Name"] = short_report["GPUs"].str[0] - short_report["Num Processes"] = short_report[["Launcher", "Processes per Node"]].apply( - lambda x: x["Processes per Node"] if x["Launcher"] == "torchrun" else 1, - axis=1, - ) - - short_report["GPU Name"].replace("NVIDIA A100-SXM4-80GB", "1xA100", inplace=True) - short_report["GPU Name"].replace("AMD INSTINCT MI250 (MCM) OAM AC MBA", "1xMI250", inplace=True) - short_report["GPU Name"] = short_report[["GPU Name", "Num Processes"]].apply( - lambda x: "1xGCD-MI250" if x["GPU Name"] == "1xMI250" and x["Num Processes"] == 1 else x["GPU Name"], - axis=1, - ) - short_report["Effective Batch Size"] = short_report["Per Process Batch Size"] * short_report["Num Processes"] - short_report["Group"] = short_report["GPU Name"] + "-" + short_report["Experiment Name"] - short_report.to_csv(f"{report_folder}/short_report.csv") - - return short_report - - -def get_batch_plots(short_report, report_folder, plot="bar"): - fig1, ax1 = plt.subplots() - - batch_column = "Effective Batch Size" - short_report = short_report.sort_values(by="Group", ascending=True) - groups = short_report["Group"].unique().tolist() - x = np.arange( - short_report[batch_column].min() - 1, - len(short_report[batch_column].unique()) + (short_report[batch_column].min() - 1), - ) - width = 0.8 / len(short_report["Group"].unique().tolist()) - offset = -(width * (len(groups) - 1) / 2) - - for group in groups: - mask = short_report["Group"] == group - group_report = short_report[mask].sort_values(by=batch_column) - x_ = np.arange( - group_report[batch_column].min() - 1, - len(group_report[batch_column].unique()) + (group_report[batch_column].min() - 1), - ) - if plot == "bar": - ax1.bar( - x_ + offset, - group_report["Training Throughput (samples/s)"], - label=group, - width=width, - ) - offset += width - elif plot == "line": - ax1.plot( - x_, - group_report["Training Throughput (samples/s)"], - label=group, - marker="o", - ) - - ax1.set_xticks(x) - ax1.set_ylim(bottom=0) - ax1.set_xticklabels(short_report[batch_column].sort_values().unique().tolist()) - ax1.set_xlabel(batch_column) - ax1.set_ylabel("Training Throughput (samples/s)") - ax1.set_title(f"Training Throughput per Batch Size ({short_report['Model'].unique()[0]})") - ax1.legend(fancybox=True, shadow=True) - - legend = plt.legend(loc="upper center") - legend.get_frame().set_facecolor((0, 0, 1, 0.1)) - legend.get_frame().set_alpha(None) - plt.tight_layout() - - fig1.savefig(f"{report_folder}/training_throughput_{plot}_plot.png") - - return fig1 - - -def get_peak_trainong_throughput_plot(short_report, report_folder): - # a bar plot with one bar per group, representing the max attainable throughput in tokens/s - fig, ax = plt.subplots() - - max_training_throughput = short_report.groupby("Group")["Training Throughput (samples/s)"].max().reset_index() - max_training_throughput = ( - short_report.merge(max_training_throughput, on=["Group", "Training Throughput (samples/s)"]) - .sort_values(by="Training Throughput (samples/s)", ascending=True) - .reset_index() - ) - - ax.bar( - max_training_throughput["Group"], - max_training_throughput["Training Throughput (samples/s)"], - color=plt.cm.Paired(np.arange(len(max_training_throughput))), - ) - - for i, v in enumerate(max_training_throughput["Effective Batch Size"]): - ax.text( - i, - max_training_throughput["Training Throughput (samples/s)"].iloc[i], - f"bs={v}", - ha="center", - va="bottom", - ) - - ax.set_xlabel("Group") - ax.set_ylabel("Peak Training Throughput (samples/s)") - ax.set_title(f"Peak Training Throughput ({short_report['Model'].unique()[0]})") - ax.set_ylim(top=max_training_throughput["Training Throughput (samples/s)"].max() * 1.1) - - plt.xticks(rotation=45, ha="right") - plt.tight_layout() - - fig.savefig(f"{report_folder}/peak_training_throughput.png") - - return fig - - -def generate_report(): - parser = ArgumentParser() - parser.add_argument( - "--experiments-folders", - "-e", - type=Path, - nargs="+", - required=True, - help="The folder containing the results of experiments.", - ) - parser.add_argument( - "--report-name", - "-r", - type=str, - required=False, - default="artifacts", - help="The name of the report.", - ) - - args = parser.parse_args() - report_folder = args.report_name - experiments_folders = args.experiments_folders - - Path(report_folder).mkdir(parents=True, exist_ok=True) - - # gather experiments results - full_report = gather_full_report( - root_folders=experiments_folders, - report_folder=report_folder, - ) - short_report = get_short_report( - full_report, - report_folder=report_folder, - ) - for plot in ["bar", "line"]: - _ = get_batch_plots( - short_report, - report_folder, - plot=plot, - ) - - _ = get_peak_trainong_throughput_plot( - short_report, - report_folder, - ) - print("Report generated successfully!") - - -if __name__ == "__main__": - generate_report() diff --git a/examples/whisper/README.md b/examples/whisper/README.md deleted file mode 100644 index 8adc3fd2..00000000 --- a/examples/whisper/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# Optimum-Benchmark x Whisper - -A set of benchmarks on OpenAI's Whisper model, using Optimum-Benchmark. - -With Optimum-Benchmark, once you clone the repo and install it, run: - -```bash -sh benchmark.sh ${device} -sh report.sh ${device} -``` - -Where `${device}` is either `cpu` or `cuda`. - -## Metrics - -For this benchmark I tried to compare `whisper-base` model's throughputs (forward and generate). - -Forward throughput is measured in `samples/second` with the formula `number_processed_samples / total_time`. -Where `number_processed_samples = batch_size * number_forward_passes` is the number of samples processed by the model in `total_time`. - -Generate throughput is measured in `tokens/second` with the formula `number_generated_tokens / total_time`. -Where `number_generated_tokens = batch_size * num_tokens * number_generate_passes` is the number of tokens generated by the model in `total_time`. - -## Search Space - -To be exhaustive, I benchmarked different auto optimization configurations supported by Optimum on GPU & CPU and auto quantization configurations on CPU only. - -I also added `benchmark.batch_size=64,128 benchmark.new_tokens=10,100` to compare behavior across different batch sizes and number of generated tokens. - -## GPU Results (NVIDIA A100) - -### Batch Size = 64, New Tokens = 10 - -