Skip to content

Commit

Permalink
Reorganize directories
Browse files Browse the repository at this point in the history
  • Loading branch information
TaekyungHeo committed May 3, 2024
1 parent fd333de commit 08f1e4e
Show file tree
Hide file tree
Showing 93 changed files with 749 additions and 1,154 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build/
pyproject/
param/
param.egg-info/
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Contributing to PARAM_Bench
# Contributing to PARAM
We want to make contributing to this project as easy and transparent as
possible.

Expand Down
605 changes: 605 additions & 0 deletions README.md

Large diffs are not rendered by default.

File renamed without changes.
86 changes: 0 additions & 86 deletions docs/using_ET.md

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 7 additions & 7 deletions train/comms/pt/comms.py → param/comm/comms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,24 @@
# pytorch
import torch

from param_bench.train.comms.pt import comms_utils
from param_bench.train.comms.pt.comms_utils import (
from param.comm import comms_utils
from param.comm.comms_utils import (
bootstrap_info_holder,
commsParamsHolderBase,
ensureTensorFlush,
paramCommsBench,
paramDeviceTimer,
paramStreamGuard,
)
from param_bench.train.comms.pt.logger_utils import (
from param.comm.logger_utils import (
benchType,
commsCollPerfMetrics,
commsPt2PtPerfMetrics,
commsQuantCollPerfMetrics,
customized_perf_loggers,
)

from param_bench.train.comms.pt.pytorch_backend_utils import (
from param.comm.pytorch_backend_utils import (
pt2ptPatterns,
supportedC10dBackends,
supportedCollectives,
Expand Down Expand Up @@ -1760,13 +1760,13 @@ def initBackend(
commsParams.nw_stack == "pytorch-dist"
and commsParams.backend in supportedC10dBackends
):
from param_bench.train.comms.pt.pytorch_dist_backend import (
from param.comm.pytorch_dist_backend import (
PyTorchDistBackend,
)

backendObj = PyTorchDistBackend(bootstrap_info, commsParams)
elif commsParams.nw_stack == "pytorch-xla-tpu":
from param_bench.train.comms.pt.pytorch_tpu_backend import PyTorchTPUBackend
from param.comm.pytorch_tpu_backend import PyTorchTPUBackend

backendObj = PyTorchTPUBackend(bootstrap_info, commsParams)
else:
Expand All @@ -1775,7 +1775,7 @@ def initBackend(
logging.warning(
f"Attempt loading customized backend {commsParams.backend} if registered. Note that this is not officially supported. Use it with caution and at your own risk."
)
from param_bench.train.comms.pt.pytorch_backend_utils import (
from param.comm.pytorch_backend_utils import (
customized_backend,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

from typing import List, Tuple

from param_bench.train.comms.pt import comms_utils
from param_bench.train.comms.pt.comms_utils import commsArgs
from param_bench.train.comms.pt.pytorch_backend_utils import supportedP2pOps
from param.comm import comms_utils
from param.comm.comms_utils import commsArgs
from param.comm.pytorch_backend_utils import supportedP2pOps

from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
from param.execution_trace import ExecutionTrace

tensorDtypeMap = {
"Tensor(int)": "int",
Expand Down Expand Up @@ -162,7 +162,7 @@ def _parseKinetoUnitrace(in_trace: List, target_rank: int) -> List:

if (
"name" in entry
and entry["name"] == "record_param_comms"
and entry["name"] == "record_param.comm"
and entry["args"]["rank"] == target_rank
):

Expand Down Expand Up @@ -256,7 +256,7 @@ def _parseExecutionTrace(

# Parse comms nodes
for node in in_trace.nodes.values():
if node.name == "record_param_comms":
if node.name == "record_param.comm":
shift = (
0 if len(node.inputs) == 8 or len(node.inputs) == 10 else 1
) # wait/barrier ops do not have an input tensor (len=7), shift index one over
Expand Down
8 changes: 4 additions & 4 deletions train/comms/pt/comms_utils.py → param/comm/comms_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

try:
from param_bench.train.comms.pt.fb.internals import (
from param.comm.fb.internals import (
fbInitProfiler,
fbSampleProfiler,
fbStartProfiler,
Expand All @@ -38,8 +38,8 @@

import numpy as np
import torch
from param_bench.train.comms.pt.param_profile import paramTimer
from param_bench.train.comms.pt.pytorch_backend_utils import (
from param.comm.param_profile import paramTimer
from param.comm.pytorch_backend_utils import (
backendFunctions,
collectiveArgsHolder,
customized_backend,
Expand Down Expand Up @@ -893,7 +893,7 @@ def __init__(


class paramCommsBench(ABC):
"""Abstract class for any param comms benchmark."""
"""Abstract class for any param.comm benchmark."""

def __init__(self, supportedNwstacks: List[str] = None) -> None:
self.supportedNwstacks = supportedNwstacks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from enum import Enum
from typing import abstractmethod, Dict, Optional

from param_bench.train.comms.pt.pytorch_backend_utils import backendFunctions
from param.comm.pytorch_backend_utils import backendFunctions

logger = logging.getLogger(__name__)

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import torch

from param_bench.train.comms.pt.param_profile import paramTimer
from param.comm.param_profile import paramTimer

from torch.distributed import ProcessGroup

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
import torch
import torch.distributed as dist
import torch.nn as nn
from param_bench.train.comms.pt.param_profile import paramProfile
from param_bench.train.comms.pt.pytorch_backend_utils import (
from param.comm.param_profile import paramProfile
from param.comm.pytorch_backend_utils import (
backendFunctions,
collectiveArgsHolder,
)

try:
from param_bench.train.comms.pt.fb.internals import (
from param.comm.fb.internals import (
all_to_all_internal,
all_to_allv_internal,
extend_distributed,
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions param/comp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "2.0.0"
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def init_logging(log_level):
FORMAT = "[%(asctime)s] %(process)d %(filename)s:%(lineno)-3d [%(levelname)s]: %(message)s"
else:
FORMAT = "[%(asctime)s] %(process)d [%(levelname)s]: %(message)s"
_logger = logging.getLogger("param_bench")
_logger = logging.getLogger("param")
_logger.setLevel(log_level)
# Reset the stream handlers to avoid multiple outputs.
_logger.handlers.clear()
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@

import torch

from param_bench.train.compute.python.lib import __version__, pytorch as lib_pytorch
from param_bench.train.compute.python.lib.config import BenchmarkConfig
from param_bench.train.compute.python.lib.init_helper import load_modules
from param_bench.train.compute.python.lib.pytorch.benchmark import (
from param.comp.python.lib import __version__, pytorch as lib_pytorch
from param.comp.python.lib.config import BenchmarkConfig
from param.comp.python.lib.init_helper import load_modules
from param.comp.python.lib.pytorch.benchmark import (
make_default_benchmark,
)
from param_bench.train.compute.python.lib.pytorch.config_util import (
from param.comp.python.lib.pytorch.config_util import (
ExecutionPass,
get_benchmark_options,
get_sys_info,
OpExecutionMode,
)
from param_bench.train.compute.python.workloads import pytorch as workloads_pytorch
from param.comp.python.workloads import pytorch as workloads_pytorch
from torch.autograd.profiler import record_function
from torch.profiler import _ExperimentalConfig, ExecutionTraceObserver

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,15 @@ def _run_ncu(self, shm, config):

ncu_bin = self.run_options["ncu_bin"]

param_bench_range = "param_bench:measure"
param_range = "param:measure"
start_input_id = self.input_config_queue[0]["id"]
out_file_prefix = self.run_options["out_file_prefix"]
timestamp = int(datetime.timestamp(datetime.now()))
ncu_log_file = f"{out_file_prefix}_{os.getpid()}_{timestamp}_ncu.log"
ncu_extra_args = self.run_options["ncu_args"]
ncu_options = (
f"--log-file {ncu_log_file} --csv --app-replay-buffer file --nvtx "
f"--nvtx-include {param_bench_range} --target-processes all"
f"--nvtx-include {param_range} --target-processes all"
)
if ncu_extra_args:
ncu_options += f" {ncu_extra_args}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
from torch.utils.collect_env import get_nvidia_driver_version, run as run_cmd

from ...lib import __version__
from param.comp import __version__


@enum.unique
Expand Down Expand Up @@ -60,7 +60,7 @@ def get_benchmark_options() -> Dict[str, Any]:
"nsys_iteration": 10,
"run_batch_size": 50,
"batch_cuda_device": 1,
"batch_cmd": "python -m param_bench.train.compute.python.pytorch.run_batch",
"batch_cmd": "python -m param.comp.python.pytorch.run_batch",
"resume_op_run_id": None,
"stop_op_run_id": None,
}
Expand Down Expand Up @@ -130,7 +130,7 @@ def get_sys_info():
"pid": os.getpid(),
"cwd": os.getcwd(),
"python_version": platform.python_version(),
"param_train_compute_version": __version__,
"param.comp_version": __version__,
"cuda_available": cuda_available,
**cuda_info,
"pytorch_version": torch.__version__,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import torch
from fbgemm_gpu.split_table_batched_embeddings_ops import PoolingMode, WeightDecayMode

from param_bench.train.compute.python.lib.pytorch.config_util import create_op_args
from param_bench.train.compute.python.tools.execution_trace import NodeType
from param.compute.python.lib.pytorch.config_util import create_op_args
from param.execution_trace import ExecutionTrace

from param_bench.train.compute.python.workloads.pytorch.split_table_batched_embeddings_ops import (
from param.compute.python.workloads.pytorch.split_table_batched_embeddings_ops import (
SplitTableBatchedEmbeddingBagsCodegenInputDataGenerator,
SplitTableBatchedEmbeddingBagsCodegenOp,
)
Expand Down Expand Up @@ -153,7 +153,7 @@ def skip_op(op):
and "thread" in op.parent.name
and op.tid == 2
)
or (op.name == "record_param_comms" and op.inputs[3] == "init")
or (op.name == "record_param.comm" and op.inputs[3] == "init")
or (op.name == "aten::view" and "aten::view.dtype" in op.op_schema)
)

Expand Down Expand Up @@ -469,11 +469,11 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
import os
import time
from datetime import datetime
from param_bench.train.comms.pt import comms_utils
from param.comm import comms_utils
import torch
from param_bench.train.comms.pt import commsTraceReplay
from param_bench.train.compute.python.tools.et_replay_utils import (
from param.comm import commsTraceReplay
from param.compute.python.tools.et_replay_utils import (
build_fbgemm_func,
build_torchscript_func,
generate_fbgemm_tensors,
Expand All @@ -482,8 +482,8 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
is_qualified,
)
from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
from param_bench.train.compute.python.tools.utility import trace_handler
from param.compute.python.tools.execution_trace import ExecutionTrace
from param.compute.python.tools.utility import trace_handler
print("PyTorch version: ", torch.__version__)
Expand Down Expand Up @@ -519,7 +519,7 @@ def dfs_traverse(node):
if "://" in \"{et_input}\":
try:
from param_bench.train.compute.python.tools.fb.internals import (
from param.compute.python.tools.fb.internals import (
read_remote_trace,
)
except ImportError:
Expand Down Expand Up @@ -655,7 +655,7 @@ def generate_suffix(warmup_iter, replay_iter, cuda_id, profile_replay):
end_time = datetime.now()
try:
from param_bench.train.compute.python.tools.fb.internals import (
from param.compute.python.tools.fb.internals import (
generate_query_url,
)
except ImportError:
Expand Down
File renamed without changes.
Loading

0 comments on commit 08f1e4e

Please sign in to comment.