Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dy2St] Add place hash to scope cache key to avoid conflict with executor cache #71505

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions paddle/fluid/pybind/eager_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2853,6 +2853,46 @@ PyMODINIT_FUNC PyInit__static_op_arg_pre_cast_hook() {
return nullptr;
}

PyObject* CalcPlaceHash(PyObject* dummy, PyObject* tensors) {
PADDLE_ENFORCE_EQ(PyList_Check(tensors) || PyTuple_Check(tensors),
true,
common::errors::InvalidArgument(
"The input tensors should be a list/tuple of Tensor."));
std::vector<const paddle::Tensor*> tensors_vec;
const auto& GetSequenceItem = [](PyObject* seq, Py_ssize_t i) {
if (PyList_Check(seq)) {
return PyList_GetItem(seq, i);
} else {
return PyTuple_GetItem(seq, i);
}
};
const auto& GetSequenceSize = [](PyObject* seq) {
if (PyList_Check(seq)) {
return PyList_Size(seq);
} else {
return PyTuple_Size(seq);
}
};
for (Py_ssize_t i = 0; i < GetSequenceSize(tensors); ++i) {
PyObject* item = GetSequenceItem(tensors, i);
if (PyObject_TypeCheck(item, p_tensor_type)) {
tensors_vec.push_back(&(reinterpret_cast<TensorObject*>(item)->tensor));
} else {
PADDLE_THROW(common::errors::InvalidArgument(
"The input tensors should be a list of Tensor."));
}
}
const auto& hash_with_seed = [](int64_t value, int64_t seed) {
return seed + 0x9e3779b9 + (value << 6) + (value >> 2);
};
int64_t place_hash_key = 0;
for (const paddle::Tensor* tensor : tensors_vec) {
int64_t device_type = static_cast<int64_t>(tensor->place().GetType());
place_hash_key = hash_with_seed(place_hash_key, device_type);
}
return ToPyObject(place_hash_key);
}

/* ------------------ for auto parallel ----------------------- */

static PyMethodDef EagerUtilMethods[] = { // NOLINT
Expand All @@ -2868,6 +2908,10 @@ static PyMethodDef EagerUtilMethods[] = { // NOLINT
(PyCFunction)SetStaticOpArgPreCastHook,
METH_O,
"Set hook for pre cast a static OP argument."},
{"calc_place_hash",
(PyCFunction)CalcPlaceHash,
METH_O,
"Calculate the hash value by tensors place."},
{nullptr, nullptr, 0, nullptr}};

void BindEagerUtils(PyObject* module) {
Expand Down
42 changes: 32 additions & 10 deletions python/paddle/jit/dy2static/pir_partial_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@
FAKE_VALUE_NAME = "FakeValue"


def hash_with_seed(value, seed):
result = seed + 0x9E3779B9 + (value << 6) + (value >> 2)
return result & ((1 << 64) - 1)


def get_value_name(value):
if is_fake_value(value):
return FAKE_VALUE_NAME
Expand Down Expand Up @@ -718,12 +723,18 @@ def __call__(self, inputs):
in_vars = self._prepare_inputs(inputs)
out_vars = self._prepare_outputs()
attrs = self._prepare_attributes(in_sot_mode=False)
inputs = self._valid_vars(in_vars)
_legacy_C_ops.pir_run_program(
self._valid_vars(in_vars),
inputs,
self._valid_vars(self._params),
self._valid_vars(out_vars),
self._create_scope_vec(
program_id=self.program_id, use_scope_cache=True
cache_key=(
hash_with_seed(
self.program_id, self._calc_input_places_hash(inputs)
)
),
use_scope_cache=True,
),
self._cuda_graph_vec,
*attrs,
Expand All @@ -737,12 +748,18 @@ def sot_call(self, inputs):
"""
out_vars = self._prepare_outputs()
attrs = self._prepare_attributes(in_sot_mode=True)
inputs = self._valid_vars(inputs)
_legacy_C_ops.pir_run_program(
self._valid_vars(inputs),
inputs,
self._valid_vars(self._params),
self._valid_vars(out_vars),
self._create_scope_vec(
program_id=self.program_id, use_scope_cache=True
cache_key=(
hash_with_seed(
self.program_id, self._calc_input_places_hash(inputs)
)
),
use_scope_cache=True,
),
self._cuda_graph_vec,
*attrs,
Expand All @@ -767,19 +784,24 @@ def origin_runnable_program(self) -> RunnableProgram:
def add_hooker(self, hooker):
self._hookers.append(hooker)

def _get_scope(self, program_id=None, use_scope_cache=False):
def _get_scope(self, cache_key=None, use_scope_cache=False):
if not use_scope_cache:
return core.Scope()
if program_id not in self._scope_cache:
self._scope_cache[program_id] = []
cached_scopes = self._scope_cache[program_id]
if cache_key not in self._scope_cache:
self._scope_cache[cache_key] = []
cached_scopes = self._scope_cache[cache_key]
for scope in cached_scopes:
if scope._can_reused:
return scope
scope = core.Scope()
cached_scopes.append(scope)
return scope

def _calc_input_places_hash(self, inputs):
if not inputs:
return 0
return paddle.base.libpaddle.calc_place_hash(inputs)

# whole
@switch_to_static_graph
def _create_program(self, is_infer_mode=False) -> RunnableProgram:
Expand Down Expand Up @@ -1171,9 +1193,9 @@ def _prepare_outputs(self):
self._outputs.var_list
)

def _create_scope_vec(self, program_id=None, use_scope_cache=False):
def _create_scope_vec(self, cache_key=None, use_scope_cache=False):
inner_scope = self._get_scope(
program_id=program_id, use_scope_cache=use_scope_cache
cache_key=cache_key, use_scope_cache=use_scope_cache
)
return [inner_scope]

Expand Down
Loading