Skip to content

Commit

Permalink
Update from the failed branch where we try to replace core exec_dep t…
Browse files Browse the repository at this point in the history
…o this one.

The main problem cannot have that replaced is due to hop estimation is
not exact. There are cases after chain decomposition, we simply cannot
find how many hops on the chain (because hops are longest distance, not
shortest). This doesn't seem easily solvable unless we start to record
distance within a chain.
  • Loading branch information
liuliu committed Dec 3, 2024
1 parent 77b1346 commit 787623a
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 42 deletions.
32 changes: 29 additions & 3 deletions lib/nnc/_ccv_nnc_symbolic_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,36 @@ typedef struct {
ccv_sparse_matrix_t* deps;
} ccv_nnc_exec_dep_t;

ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_visit_t* const visit, const ccv_nnc_graph_visit_t* const reversed_visit);
int ccv_nnc_exec_dep_hop(const ccv_nnc_exec_dep_t exec_dep, const int d, ccv_sparse_matrix_vector_t* const vector, const int dd);
int ccv_nnc_exec_dep_check(const ccv_nnc_exec_dep_t exec_dep, const int d, const int dd);
ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_visit_t* const visit);
void ccv_nnc_exec_dep_free(const ccv_nnc_exec_dep_t exec_dep);

inline static int ccv_nnc_exec_dep_hop(const ccv_nnc_exec_dep_t exec_dep, const int d, ccv_sparse_matrix_vector_t* const vector, const int dd)
{
// Check if dd is d's ancestor.
const int dd_chain_id = exec_dep.chain_ids[dd];
const int dd_chain_pos = exec_dep.chain_pos[dd];
if (exec_dep.chain_ids[d] == dd_chain_id)
return exec_dep.chain_pos[d] - dd_chain_pos;
const ccv_numeric_data_t cell = vector ? ccv_get_sparse_matrix_cell_from_vector(exec_dep.deps, vector, dd_chain_id) : ccv_get_sparse_matrix_cell(exec_dep.deps, d, dd_chain_id);
// Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
if (cell.i32 && cell.i32[0] > 0 && cell.i32[0] >= dd_chain_pos)
return cell.i32[0] - dd_chain_pos + cell.i32[1];
return -1;
}

inline static int ccv_nnc_exec_dep_check(const ccv_nnc_exec_dep_t exec_dep, const int d, const int dd)
{
// Check if dd is d's ancestor.
const int dd_chain_id = exec_dep.chain_ids[dd];
const int dd_chain_pos = exec_dep.chain_pos[dd];
if (exec_dep.chain_ids[d] == dd_chain_id)
return exec_dep.chain_pos[d] > dd_chain_pos;
const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep.deps, d, dd_chain_id);
// Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
if (cell.i32 && cell.i32[0] > 0)
return cell.i32[0] >= dd_chain_pos;
return 0;
}

#endif

10 changes: 10 additions & 0 deletions lib/nnc/ccv_nnc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,16 @@ static inline void ccv_nnc_graph_visit_free(ccv_nnc_graph_visit_t* graph_visit)
#define ccv_nnc_graph_visit_for(graph_visit, nodes, ...) \
CCV_NNC_GRAPH_VISIT_FOR1(graph_visit, nodes, ##__VA_ARGS__, _node_unused_, _index_unused_, _term_unused_)

#define CCV_NNC_GRAPH_VISIT_FOR1_REVERSED(graph_visit, nodes, _node_, _index_, _term_, ...) { \
int _i_; \
for (_i_ = (graph_visit)->size - 1; _i_ >= 0; _i_--) { \
const int _index_ __attribute__((unused)) = (graph_visit)->node[_i_].index; \
const int _term_ __attribute__((unused)) = (graph_visit)->node[_i_].term; \
typeof ((nodes)) const _node_ __attribute__((unused)) = (nodes) + _index_; \

#define ccv_nnc_graph_visit_for_reversed(graph_visit, nodes, ...) \
CCV_NNC_GRAPH_VISIT_FOR1_REVERSED(graph_visit, nodes, ##__VA_ARGS__, _node_unused_, _index_unused_, _term_unused_)

#define ccv_nnc_graph_visit_endfor } }

#define CCV_NNC_GRAPH_VISIT_NEW_VISITOR1(_, _index_, _term_) \
Expand Down
45 changes: 9 additions & 36 deletions lib/nnc/ccv_nnc_symbolic_graph_chain_decomposition.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "_ccv_nnc_symbolic_graph.h"

// Implement the new method for exec_dep. We use chain decomposition such that each node only needs to log which chain and at which node to be dependent on.
ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_visit_t* const visit, const ccv_nnc_graph_visit_t* const reversed_visit)
ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_visit_t* const visit)
{
const int exec_symbol_info_size = graph->exec_symbol_info->rnum;
int* chain_ids = ccmalloc(sizeof(int) * exec_symbol_info_size * 2);
Expand All @@ -15,7 +15,9 @@ ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const gr
const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0);
int i, j;
// Go reverse order to generate the distance from sink.
ccv_nnc_graph_visit_for(reversed_visit, exec_symbol_info, node, idx, term) {
ccv_nnc_graph_visit_for_reversed(visit, exec_symbol_info, node, idx, term) {
if (node->flags & CCV_NNC_GRAPH_EXEC_DEAD)
continue;
chain_ids[idx] = -1;
if (!node->outgoings || node->outgoings->rnum == 0)
{
Expand All @@ -35,6 +37,8 @@ ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const gr
// Note that we cannot use depth so-far because then multiple exit nodes are equally good to "inherit" the chain selection.
int chain_count = 0;
ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx, term) {
if (node->flags & CCV_NNC_GRAPH_EXEC_DEAD)
continue;
int chain_id = chain_ids[idx];
if (chain_ids[idx] < 0)
{
Expand All @@ -45,7 +49,7 @@ ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const gr
}
if (!node->outgoings || node->outgoings->rnum == 0)
continue;
int depth = 0;
int depth = -1;
int next_idx = -1;
for (i = 0; i < node->outgoings->rnum; i++)
{
Expand All @@ -56,7 +60,8 @@ ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const gr
if (next_idx >= 0)
{
chain_ids[next_idx] = chain_id;
chain_pos[next_idx] = chain_pos[idx] + 1;
assert(reversed_depth[idx] - depth >= 1);
chain_pos[next_idx] = chain_pos[idx] + (reversed_depth[idx] - depth);
}
} ccv_nnc_graph_visit_endfor
if (exec_symbol_info_size < chain_count * 3) // Be more conservative on RAM usage.
Expand Down Expand Up @@ -135,38 +140,6 @@ ccv_nnc_exec_dep_t ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const gr
return exec_dep;
}

int ccv_nnc_exec_dep_hop(const ccv_nnc_exec_dep_t exec_dep, const int d, ccv_sparse_matrix_vector_t* const vector, const int dd)
{
// Check if dd is d's ancestor.
const int dd_chain_id = exec_dep.chain_ids[dd];
const int dd_chain_pos = exec_dep.chain_pos[dd];
if (exec_dep.chain_ids[d] == dd_chain_id)
return exec_dep.chain_pos[d] - dd_chain_pos;
const ccv_numeric_data_t cell = vector ? ccv_get_sparse_matrix_cell_from_vector(exec_dep.deps, vector, dd_chain_id) : ccv_get_sparse_matrix_cell(exec_dep.deps, d, dd_chain_id);
if (cell.i32 && cell.i32[0] > 0 && cell.i32[0] >= dd_chain_pos)
{
// Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
return cell.i32[0] - dd_chain_pos + cell.i32[1];
}
return -1;
}

int ccv_nnc_exec_dep_check(const ccv_nnc_exec_dep_t exec_dep, const int d, const int dd)
{
// Check if dd is d's ancestor.
const int dd_chain_id = exec_dep.chain_ids[dd];
const int dd_chain_pos = exec_dep.chain_pos[dd];
if (exec_dep.chain_ids[d] == dd_chain_id)
return exec_dep.chain_pos[d] > dd_chain_pos;
const ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep.deps, d, dd_chain_id);
if (cell.i32 && cell.i32[0] > 0)
{
// Check if the chain pos is greater than or equal to dd_chain_pos. If it is, it is an ancestor.
return cell.i32[0] >= dd_chain_pos;
}
return 0;
}

void ccv_nnc_exec_dep_free(const ccv_nnc_exec_dep_t exec_dep)
{
ccfree(exec_dep.chain_ids);
Expand Down
4 changes: 1 addition & 3 deletions lib/nnc/ccv_nnc_symbolic_graph_memory_reduction.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const gra
if ((tensor_marked[d >> 5] & (1u << (d & 0x1f))))
tensor_marked[d >> 5] &= ~(1u << (d & 0x1f));
}
ccv_nnc_graph_visit_t* const reversed_visit = ccv_nnc_graph_visit_new(graph, reversed_nodes, exec_symbol_info_size, destinations, destination_size, sources, source_size, 0);
ccv_nnc_exec_dep_t exec_deps = ccv_nnc_exec_dep_new(graph, visit, reversed_visit);
ccv_nnc_graph_visit_free(reversed_visit);
ccv_nnc_exec_dep_t exec_deps = ccv_nnc_exec_dep_new(graph, visit);
// Now tensor_marked only contains the tensors that we think beneficial to reconvert. Find the best place to insert conversion.
ccv_nnc_conversion_info_t* const conversion_info = cccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_conversion_info_t));
ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
Expand Down

0 comments on commit 787623a

Please sign in to comment.