Skip to content

Commit

Permalink
Reset schedule earlier to allow overlap with ggml graph computation o…
Browse files Browse the repository at this point in the history
…n device (ggerganov#6933)

* Reset schedule earlier to allow overlap with graph computation on device
  • Loading branch information
agray3 authored Apr 26, 2024
1 parent 0c4d489 commit 928e0b7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
12 changes: 7 additions & 5 deletions ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {

void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run
size_t hash_size = sched->hash_set.size;
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
if (!sched->is_reset) {
size_t hash_size = sched->hash_set.size;
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);

sched->is_reset = true;
sched->is_reset = true;
}
sched->is_alloc = false;
}

Expand Down
4 changes: 4 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11473,6 +11473,10 @@ static int llama_decode_internal(
}
}

// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
// overlap with device computation.
ggml_backend_sched_reset(lctx.sched);

return 0;
}

Expand Down

0 comments on commit 928e0b7

Please sign in to comment.