Reorganize directories

facebookresearch · May 3, 2024 · 08f1e4e · 08f1e4e
1 parent fd333de
commit 08f1e4e
Show file tree

Hide file tree

Showing 93 changed files with 749 additions and 1,154 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+build/
+pyproject/
+param/
+param.egg-info/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to PARAM_Bench
+# Contributing to PARAM
 We want to make contributing to this project as easy and transparent as
 possible.
 

diff --git a/README.md b/README.md
diff --git a/train/compute/python/development.md → development.md b/train/compute/python/development.md → development.md
diff --git a/docs/using_ET.md b/docs/using_ET.md
diff --git a/...compute/python/examples/cuda/ncu_args.txt → example_input_files/cuda/ncu_args.txt b/...compute/python/examples/cuda/ncu_args.txt → example_input_files/cuda/ncu_args.txt
diff --git a/...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz → ...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz b/...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz → ...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz
diff --git a/train/compute/python/__init__.py → example_input_files/data/__init__.py b/train/compute/python/__init__.py → example_input_files/data/__init__.py
diff --git a/...mpute/python/test/data/dlrm_kineto.tar.gz → example_input_files/data/dlrm_kineto.tar.gz b/...mpute/python/test/data/dlrm_kineto.tar.gz → example_input_files/data/dlrm_kineto.tar.gz
diff --git a/...e/python/test/data/dlrm_pytorch_et.tar.gz → ...e_input_files/data/dlrm_pytorch_et.tar.gz b/...e/python/test/data/dlrm_pytorch_et.tar.gz → ...e_input_files/data/dlrm_pytorch_et.tar.gz
diff --git a/...ompute/python/test/data/linear_et.json.gz → example_input_files/data/linear_et.json.gz b/...ompute/python/test/data/linear_et.json.gz → example_input_files/data/linear_et.json.gz
diff --git a/...te/python/test/data/linear_kineto.json.gz → ...le_input_files/data/linear_kineto.json.gz b/...te/python/test/data/linear_kineto.json.gz → ...le_input_files/data/linear_kineto.json.gz
diff --git a/...ompute/python/test/data/resnet_et.json.gz → example_input_files/data/resnet_et.json.gz b/...ompute/python/test/data/resnet_et.json.gz → example_input_files/data/resnet_et.json.gz
diff --git a/...te/python/test/data/resnet_kineto.json.gz → ...le_input_files/data/resnet_kineto.json.gz b/...te/python/test/data/resnet_kineto.json.gz → ...le_input_files/data/resnet_kineto.json.gz
diff --git a/...on/examples/pytorch/configs/alex_net.json → ...input_files/pytorch/configs/alex_net.json b/...on/examples/pytorch/configs/alex_net.json → ...input_files/pytorch/configs/alex_net.json
diff --git a/...on/examples/pytorch/configs/aten_ops.json → ...input_files/pytorch/configs/aten_ops.json b/...on/examples/pytorch/configs/aten_ops.json → ...input_files/pytorch/configs/aten_ops.json
diff --git a/...amples/pytorch/configs/batch_example.json → ..._files/pytorch/configs/batch_example.json b/...amples/pytorch/configs/batch_example.json → ..._files/pytorch/configs/batch_example.json
diff --git a/...thon/examples/pytorch/configs/llama2.json → ...e_input_files/pytorch/configs/llama2.json b/...thon/examples/pytorch/configs/llama2.json → ...e_input_files/pytorch/configs/llama2.json
diff --git a/...e/python/examples/pytorch/configs/mm.json → example_input_files/pytorch/configs/mm.json b/...e/python/examples/pytorch/configs/mm.json → example_input_files/pytorch/configs/mm.json
diff --git a/...on/examples/pytorch/configs/mm_range.json → ...input_files/pytorch/configs/mm_range.json b/...on/examples/pytorch/configs/mm_range.json → ...input_files/pytorch/configs/mm_range.json
diff --git a/...thon/examples/pytorch/configs/resnet.json → ...e_input_files/pytorch/configs/resnet.json b/...thon/examples/pytorch/configs/resnet.json → ...e_input_files/pytorch/configs/resnet.json
diff --git a/.../examples/pytorch/configs/simple_add.json → ...put_files/pytorch/configs/simple_add.json b/.../examples/pytorch/configs/simple_add.json → ...put_files/pytorch/configs/simple_add.json
diff --git a/...les/pytorch/configs/simple_add_range.json → ...les/pytorch/configs/simple_add_range.json b/...les/pytorch/configs/simple_add_range.json → ...les/pytorch/configs/simple_add_range.json
diff --git a/...n/examples/pytorch/configs/simple_mm.json → ...nput_files/pytorch/configs/simple_mm.json b/...n/examples/pytorch/configs/simple_mm.json → ...nput_files/pytorch/configs/simple_mm.json
diff --git a/...ples/pytorch/configs/simple_mm_range.json → ...iles/pytorch/configs/simple_mm_range.json b/...ples/pytorch/configs/simple_mm_range.json → ...iles/pytorch/configs/simple_mm_range.json
diff --git a/...s/split_table_batched_embeddings_ops.json → ...s/split_table_batched_embeddings_ops.json b/...s/split_table_batched_embeddings_ops.json → ...s/split_table_batched_embeddings_ops.json
diff --git a/...compute/python/examples/pytorch/run_op.py → example_input_files/pytorch/run_op.py b/...compute/python/examples/pytorch/run_op.py → example_input_files/pytorch/run_op.py
diff --git a/.../run_op_split_table_batched_embeddings.py → .../run_op_split_table_batched_embeddings.py b/.../run_op_split_table_batched_embeddings.py → .../run_op_split_table_batched_embeddings.py
diff --git a/train/comms/pt/comms.py → param/comm/comms.py b/train/comms/pt/comms.py → param/comm/comms.py
@@ -17,24 +17,24 @@
 # pytorch
 import torch
 
-from param_bench.train.comms.pt import comms_utils
-from param_bench.train.comms.pt.comms_utils import (
+from param.comm import comms_utils
+from param.comm.comms_utils import (
     bootstrap_info_holder,
     commsParamsHolderBase,
     ensureTensorFlush,
     paramCommsBench,
     paramDeviceTimer,
     paramStreamGuard,
 )
-from param_bench.train.comms.pt.logger_utils import (
+from param.comm.logger_utils import (
     benchType,
     commsCollPerfMetrics,
     commsPt2PtPerfMetrics,
     commsQuantCollPerfMetrics,
     customized_perf_loggers,
 )
 
-from param_bench.train.comms.pt.pytorch_backend_utils import (
+from param.comm.pytorch_backend_utils import (
     pt2ptPatterns,
     supportedC10dBackends,
     supportedCollectives,
@@ -1760,13 +1760,13 @@ def initBackend(
             commsParams.nw_stack == "pytorch-dist"
             and commsParams.backend in supportedC10dBackends
         ):
-            from param_bench.train.comms.pt.pytorch_dist_backend import (
+            from param.comm.pytorch_dist_backend import (
                 PyTorchDistBackend,
             )
 
             backendObj = PyTorchDistBackend(bootstrap_info, commsParams)
         elif commsParams.nw_stack == "pytorch-xla-tpu":
-            from param_bench.train.comms.pt.pytorch_tpu_backend import PyTorchTPUBackend
+            from param.comm.pytorch_tpu_backend import PyTorchTPUBackend
 
             backendObj = PyTorchTPUBackend(bootstrap_info, commsParams)
         else:
@@ -1775,7 +1775,7 @@ def initBackend(
                 logging.warning(
                     f"Attempt loading customized backend {commsParams.backend} if registered. Note that this is not officially supported. Use it with caution and at your own risk."
                 )
-                from param_bench.train.comms.pt.pytorch_backend_utils import (
+                from param.comm.pytorch_backend_utils import (
                     customized_backend,
                 )
 

diff --git a/train/comms/pt/commsTraceParser.py → param/comm/commsTraceParser.py b/train/comms/pt/commsTraceParser.py → param/comm/commsTraceParser.py
@@ -5,11 +5,11 @@
 
 from typing import List, Tuple
 
-from param_bench.train.comms.pt import comms_utils
-from param_bench.train.comms.pt.comms_utils import commsArgs
-from param_bench.train.comms.pt.pytorch_backend_utils import supportedP2pOps
+from param.comm import comms_utils
+from param.comm.comms_utils import commsArgs
+from param.comm.pytorch_backend_utils import supportedP2pOps
 
-from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
+from param.execution_trace import ExecutionTrace
 
 tensorDtypeMap = {
     "Tensor(int)": "int",
@@ -162,7 +162,7 @@ def _parseKinetoUnitrace(in_trace: List, target_rank: int) -> List:
 
         if (
             "name" in entry
-            and entry["name"] == "record_param_comms"
+            and entry["name"] == "record_param.comm"
             and entry["args"]["rank"] == target_rank
         ):
 
@@ -256,7 +256,7 @@ def _parseExecutionTrace(
 
     # Parse comms nodes
     for node in in_trace.nodes.values():
-        if node.name == "record_param_comms":
+        if node.name == "record_param.comm":
             shift = (
                 0 if len(node.inputs) == 8 or len(node.inputs) == 10 else 1
             )  # wait/barrier ops do not have an input tensor (len=7), shift index one over

diff --git a/train/comms/pt/comms_utils.py → param/comm/comms_utils.py b/train/comms/pt/comms_utils.py → param/comm/comms_utils.py
@@ -23,7 +23,7 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 try:
-    from param_bench.train.comms.pt.fb.internals import (
+    from param.comm.fb.internals import (
         fbInitProfiler,
         fbSampleProfiler,
         fbStartProfiler,
@@ -38,8 +38,8 @@
 
 import numpy as np
 import torch
-from param_bench.train.comms.pt.param_profile import paramTimer
-from param_bench.train.comms.pt.pytorch_backend_utils import (
+from param.comm.param_profile import paramTimer
+from param.comm.pytorch_backend_utils import (
     backendFunctions,
     collectiveArgsHolder,
     customized_backend,
@@ -893,7 +893,7 @@ def __init__(
 
 
 class paramCommsBench(ABC):
-    """Abstract class for any param comms benchmark."""
+    """Abstract class for any param.comm benchmark."""
 
     def __init__(self, supportedNwstacks: List[str] = None) -> None:
         self.supportedNwstacks = supportedNwstacks

diff --git a/train/comms/pt/logger_utils.py → param/comm/logger_utils.py b/train/comms/pt/logger_utils.py → param/comm/logger_utils.py
@@ -8,7 +8,7 @@
 from enum import Enum
 from typing import abstractmethod, Dict, Optional
 
-from param_bench.train.comms.pt.pytorch_backend_utils import backendFunctions
+from param.comm.pytorch_backend_utils import backendFunctions
 
 logger = logging.getLogger(__name__)
 

diff --git a/train/comms/pt/param_profile.py → param/comm/param_profile.py b/train/comms/pt/param_profile.py → param/comm/param_profile.py
diff --git a/train/comms/pt/pytorch_backend_utils.py → param/comm/pytorch_backend_utils.py b/train/comms/pt/pytorch_backend_utils.py → param/comm/pytorch_backend_utils.py
@@ -9,7 +9,7 @@
 
 import torch
 
-from param_bench.train.comms.pt.param_profile import paramTimer
+from param.comm.param_profile import paramTimer
 
 from torch.distributed import ProcessGroup
 

diff --git a/train/comms/pt/pytorch_dist_backend.py → param/comm/pytorch_dist_backend.py b/train/comms/pt/pytorch_dist_backend.py → param/comm/pytorch_dist_backend.py
@@ -13,14 +13,14 @@
 import torch
 import torch.distributed as dist
 import torch.nn as nn
-from param_bench.train.comms.pt.param_profile import paramProfile
-from param_bench.train.comms.pt.pytorch_backend_utils import (
+from param.comm.param_profile import paramProfile
+from param.comm.pytorch_backend_utils import (
     backendFunctions,
     collectiveArgsHolder,
 )
 
 try:
-    from param_bench.train.comms.pt.fb.internals import (
+    from param.comm.fb.internals import (
         all_to_all_internal,
         all_to_allv_internal,
         extend_distributed,

diff --git a/train/comms/pt/pytorch_tpu_backend.py → param/comm/pytorch_tpu_backend.py b/train/comms/pt/pytorch_tpu_backend.py → param/comm/pytorch_tpu_backend.py
diff --git a/param/comp/__init__.py b/param/comp/__init__.py
@@ -0,0 +1 @@
+__version__ = "2.0.0"
diff --git a/train/compute/python/lib/config.py → param/comp/config.py b/train/compute/python/lib/config.py → param/comp/config.py
diff --git a/train/compute/python/lib/data.py → param/comp/data.py b/train/compute/python/lib/data.py → param/comp/data.py
diff --git a/train/compute/python/lib/generator.py → param/comp/generator.py b/train/compute/python/lib/generator.py → param/comp/generator.py
diff --git a/train/compute/python/lib/init_helper.py → param/comp/init_helper.py b/train/compute/python/lib/init_helper.py → param/comp/init_helper.py
@@ -22,7 +22,7 @@ def init_logging(log_level):
         FORMAT = "[%(asctime)s] %(process)d %(filename)s:%(lineno)-3d [%(levelname)s]: %(message)s"
     else:
         FORMAT = "[%(asctime)s] %(process)d [%(levelname)s]: %(message)s"
-    _logger = logging.getLogger("param_bench")
+    _logger = logging.getLogger("param")
     _logger.setLevel(log_level)
     # Reset the stream handlers to avoid multiple outputs.
     _logger.handlers.clear()

diff --git a/train/compute/python/lib/iterator.py → param/comp/iterator.py b/train/compute/python/lib/iterator.py → param/comp/iterator.py
diff --git a/train/compute/python/lib/operator.py → param/comp/operator.py b/train/compute/python/lib/operator.py → param/comp/operator.py
diff --git a/train/compute/python/examples/__init__.py → param/comp/pytorch/__init__.py b/train/compute/python/examples/__init__.py → param/comp/pytorch/__init__.py
diff --git a/...n/compute/python/lib/pytorch/benchmark.py → param/comp/pytorch/benchmark.py b/...n/compute/python/lib/pytorch/benchmark.py → param/comp/pytorch/benchmark.py
diff --git a/...te/python/lib/pytorch/benchmark_helper.py → param/comp/pytorch/benchmark_helper.py b/...te/python/lib/pytorch/benchmark_helper.py → param/comp/pytorch/benchmark_helper.py
@@ -7,19 +7,19 @@
 
 import torch
 
-from param_bench.train.compute.python.lib import __version__, pytorch as lib_pytorch
-from param_bench.train.compute.python.lib.config import BenchmarkConfig
-from param_bench.train.compute.python.lib.init_helper import load_modules
-from param_bench.train.compute.python.lib.pytorch.benchmark import (
+from param.comp.python.lib import __version__, pytorch as lib_pytorch
+from param.comp.python.lib.config import BenchmarkConfig
+from param.comp.python.lib.init_helper import load_modules
+from param.comp.python.lib.pytorch.benchmark import (
     make_default_benchmark,
 )
-from param_bench.train.compute.python.lib.pytorch.config_util import (
+from param.comp.python.lib.pytorch.config_util import (
     ExecutionPass,
     get_benchmark_options,
     get_sys_info,
     OpExecutionMode,
 )
-from param_bench.train.compute.python.workloads import pytorch as workloads_pytorch
+from param.comp.python.workloads import pytorch as workloads_pytorch
 from torch.autograd.profiler import record_function
 from torch.profiler import _ExperimentalConfig, ExecutionTraceObserver
 

diff --git a/...pute/python/lib/pytorch/build_executor.py → param/comp/pytorch/build_executor.py b/...pute/python/lib/pytorch/build_executor.py → param/comp/pytorch/build_executor.py
@@ -296,15 +296,15 @@ def _run_ncu(self, shm, config):
 
         ncu_bin = self.run_options["ncu_bin"]
 
-        param_bench_range = "param_bench:measure"
+        param_range = "param:measure"
         start_input_id = self.input_config_queue[0]["id"]
         out_file_prefix = self.run_options["out_file_prefix"]
         timestamp = int(datetime.timestamp(datetime.now()))
         ncu_log_file = f"{out_file_prefix}_{os.getpid()}_{timestamp}_ncu.log"
         ncu_extra_args = self.run_options["ncu_args"]
         ncu_options = (
             f"--log-file {ncu_log_file} --csv --app-replay-buffer file --nvtx "
-            f"--nvtx-include {param_bench_range} --target-processes all"
+            f"--nvtx-include {param_range} --target-processes all"
         )
         if ncu_extra_args:
             ncu_options += f" {ncu_extra_args}"

diff --git a/...compute/python/lib/pytorch/config_util.py → param/comp/pytorch/config_util.py b/...compute/python/lib/pytorch/config_util.py → param/comp/pytorch/config_util.py
@@ -9,7 +9,7 @@
 import torch
 from torch.utils.collect_env import get_nvidia_driver_version, run as run_cmd
 
-from ...lib import __version__
+from param.comp import __version__
 
 
 @enum.unique
@@ -60,7 +60,7 @@ def get_benchmark_options() -> Dict[str, Any]:
         "nsys_iteration": 10,
         "run_batch_size": 50,
         "batch_cuda_device": 1,
-        "batch_cmd": "python -m param_bench.train.compute.python.pytorch.run_batch",
+        "batch_cmd": "python -m param.comp.python.pytorch.run_batch",
         "resume_op_run_id": None,
         "stop_op_run_id": None,
     }
@@ -130,7 +130,7 @@ def get_sys_info():
         "pid": os.getpid(),
         "cwd": os.getcwd(),
         "python_version": platform.python_version(),
-        "param_train_compute_version": __version__,
+        "param.comp_version": __version__,
         "cuda_available": cuda_available,
         **cuda_info,
         "pytorch_version": torch.__version__,

diff --git a/...n/compute/python/lib/pytorch/cuda_util.py → param/comp/pytorch/cuda_util.py b/...n/compute/python/lib/pytorch/cuda_util.py → param/comp/pytorch/cuda_util.py
diff --git a/...n/compute/python/lib/pytorch/data_impl.py → param/comp/pytorch/data_impl.py b/...n/compute/python/lib/pytorch/data_impl.py → param/comp/pytorch/data_impl.py
diff --git a/...compute/python/lib/pytorch/op_executor.py → param/comp/pytorch/op_executor.py b/...compute/python/lib/pytorch/op_executor.py → param/comp/pytorch/op_executor.py
diff --git a/...mpute/python/lib/pytorch/operator_impl.py → param/comp/pytorch/operator_impl.py b/...mpute/python/lib/pytorch/operator_impl.py → param/comp/pytorch/operator_impl.py
diff --git a/train/compute/python/lib/pytorch/timer.py → param/comp/pytorch/timer.py b/train/compute/python/lib/pytorch/timer.py → param/comp/pytorch/timer.py
diff --git a/...ompute/python/workloads/pytorch/resnet.py → param/comp/workloads/resnet.py b/...ompute/python/workloads/pytorch/resnet.py → param/comp/workloads/resnet.py
diff --git a/...n/compute/python/tools/et_replay_utils.py → param/et_replay_utils.py b/...n/compute/python/tools/et_replay_utils.py → param/et_replay_utils.py
@@ -4,10 +4,10 @@
 import torch
 from fbgemm_gpu.split_table_batched_embeddings_ops import PoolingMode, WeightDecayMode
 
-from param_bench.train.compute.python.lib.pytorch.config_util import create_op_args
-from param_bench.train.compute.python.tools.execution_trace import NodeType
+from param.compute.python.lib.pytorch.config_util import create_op_args
+from param.execution_trace import ExecutionTrace
 
-from param_bench.train.compute.python.workloads.pytorch.split_table_batched_embeddings_ops import (
+from param.compute.python.workloads.pytorch.split_table_batched_embeddings_ops import (
     SplitTableBatchedEmbeddingBagsCodegenInputDataGenerator,
     SplitTableBatchedEmbeddingBagsCodegenOp,
 )
@@ -153,7 +153,7 @@ def skip_op(op):
             and "thread" in op.parent.name
             and op.tid == 2
         )
-        or (op.name == "record_param_comms" and op.inputs[3] == "init")
+        or (op.name == "record_param.comm" and op.inputs[3] == "init")
         or (op.name == "aten::view" and "aten::view.dtype" in op.op_schema)
     )
 
@@ -469,11 +469,11 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
 import os
 import time
 from datetime import datetime
-from param_bench.train.comms.pt import comms_utils
+from param.comm import comms_utils
 
 import torch
-from param_bench.train.comms.pt import commsTraceReplay
-from param_bench.train.compute.python.tools.et_replay_utils import (
+from param.comm import commsTraceReplay
+from param.compute.python.tools.et_replay_utils import (
     build_fbgemm_func,
     build_torchscript_func,
     generate_fbgemm_tensors,
@@ -482,8 +482,8 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
     is_qualified,
 )
 
-from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
-from param_bench.train.compute.python.tools.utility import trace_handler
+from param.compute.python.tools.execution_trace import ExecutionTrace
+from param.compute.python.tools.utility import trace_handler
 
 
 print("PyTorch version: ", torch.__version__)
@@ -519,7 +519,7 @@ def dfs_traverse(node):
 
 if "://" in \"{et_input}\":
     try:
-        from param_bench.train.compute.python.tools.fb.internals import (
+        from param.compute.python.tools.fb.internals import (
             read_remote_trace,
         )
     except ImportError:
@@ -655,7 +655,7 @@ def generate_suffix(warmup_iter, replay_iter, cuda_id, profile_replay):
 end_time = datetime.now()
 
 try:
-    from param_bench.train.compute.python.tools.fb.internals import (
+    from param.compute.python.tools.fb.internals import (
         generate_query_url,
     )
 except ImportError:

diff --git a/...n/compute/python/tools/execution_trace.py → param/execution_trace.py b/...n/compute/python/tools/execution_trace.py → param/execution_trace.py