-
Notifications
You must be signed in to change notification settings - Fork 121
/
chapterGraph.tex
471 lines (399 loc) · 12.8 KB
/
chapterGraph.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
\chapter{Graph}
\section{Basic}
\runinhead{Graph representation.} $V$ for a vertex set with a map, mapping from vertex to its neighbors. The mapping relationship represents the edges $E$.
\begin{python}
V = defaultdict(list)
\end{python}
\runinhead{Complexity.} Basic complexities:
\begin{table}
\begin{tabular}{lll}
\hline\noalign{\smallskip}
\textbf{Algorithm} & \textbf{Time} & \textbf{Space}\\
\noalign{\smallskip}\hline\noalign{\smallskip}
dfs & $O(|E|)$ & $O(|V|), O(\text{longest path})$ \\
bfs & $O(|E|)$ & $O(|V|)$ \\
\noalign{\smallskip}\hline\noalign{\smallskip}
\end{tabular}
\caption{Time complexities}
\end{table}
\runinhead{Graph \& Tree.} For a undirected graph to be a tree, it needs to satisfied two conditions:
\begin{enumerate}
\item Acyclic
\item All connected
\end{enumerate}
\section{DFS}
\rih{Number of Islands.} The most fundamental and classical problem.
\begin{python}
11000
11000
00100
00011
Answer: 3
\end{python}
\rih{Clue}:
\begin{enumerate}
\item Iterative DFS
\end{enumerate}
\begin{python}
class Solution(object):
def __init__(self):
self.dirs = [(-1, 0), (1, 0), (0, -1), (0, 1)]
def numIslands(self, grid):
cnt = 0
visited = [[False for _ in range(n)]
for _ in range(m)]
for i in range(m):
for j in range(n):
if not visited[i][j] and grid[i][j] == "1":
self.dfs(grid, i, j, visited)
cnt += 1
return cnt
\end{python}
\begin{python}
def dfs(self, grid, i, j, visited):
m = len(grid)
n = len(grid[0])
visited[i][j] = True
for dir in self.dirs:
I = i+dir[0]
J = j+dir[1]
if (0 <= I < m and 0 <= J < n and
not visited[I][J] and grid[I][J] == "1"):
self.dfs(grid, I, J, visited)
\end{python}
If the islands are constantly updating and the query for number of islands is called multiple times, need to use Union-Find (Section \ref{section:unionFind}) to reduce each query's complexity from $O(mn)$ to $O(\log mn)$.
Without updating, DFS itself is enough.
\section{BFS}
\subsection{BFS with Abstract Level}
BFS goes through the vertices level by level in a queue.
Distance can be arbitrarily defined. BFS can start with a set of vertices in abstract level of distance, not necessarily neighboring vertices.
Example: $-1$ denotes obstacles, $0$ denotes targets, calculate all other vertices' Manhattan distance to its nearest target:
$$
\begin{bmatrix}
\infty & -1 & 0 & \infty \\
\infty & \infty & \infty & -1 \\
\infty & -1 & \infty & -1 \\
0 & -1 & \infty & \infty \\
\end{bmatrix}
$$
Then it is calculated as:
$$
\begin{bmatrix}
3 & -1 & 0 & 1 \\
2 & 2 & 1 & -1 \\
1 & -1 & 2 & -1 \\
0 & -1 & 3 & 4 \\
\end{bmatrix}
$$
\rih{Code:}
\begin{python}
self.dirs = ((-1, 0), (1, 0), (0, -1), (0, 1))
def wallsAndGates(self, mat):
q = [
(i, j)
for i, row in enumerate(mat)
for j, cell in enumerate(row)
if cell == 0
]
for i, j in q:
for d in self.dirs:
I, J = i+d[0], j+d[1]
if 0 <= I < m and 0 <= J < n
and mat[I][J] > mat[i][j]+1: # test distance
mat[I][J] = mat[i][j]+1
q.append((I, J))
\end{python}
\section{Detect Acyclic}
\begin{enumerate}
\item \pythoninline{path} represent the current path, and it is reset after a dfs.
\item \pythoninline{visited} should be updated only in the end of the dfs.
\item For directed graph:
\begin{enumerate}
\item Should dfs for all neighbors except for vertices in \pythoninline{visited}, to avoid revisiting. For example, avoid revisiting A, B when start from C in the graph $C \rightarrow A \rightarrow B$.
\item Excluding predecessor \pythoninline{pi} is wrong considering the case of $A \leftrightarrow B$
\end{enumerate}
\item For undirected graph:
\begin{enumerate}
\item Should dfs for all neighbors except for the predecessor \pythoninline{pi}. $A-B$.
\item Excluding neighbors in \pythoninline{visited} is redundant, due to \pyinline{pi}.
\end{enumerate}
\end{enumerate}
\subsection{Directed Graph}
Detect cycles (any) in directed graph.
\begin{python}
def dfs(self, V, v, visited, path):
if v in path:
return False
path.add(v)
for nbr in V[v]:
if nbr not in visited:
if not self.dfs(V, nbr, visited, path):
return False
path.remove(v)
visited.add(v)
return True
\end{python}
\subsection{Undirected Graph}
Detect cycles (any) in undirected graph.
\begin{python}
def dfs(self, V, v, pi, visited, path):
if v in path:
return False
path.add(v)
for nbr in V[v]:
if nbr != pi:
if not self.dfs(V, nbr, v, visited, path):
return False
path.remove(v)
visited.add(v)
return True
\end{python}
\section{Directed Graph}
Use \pyinline{G = defaultdict(dict)} to represent directed graph, so that later on the edge weight can be accessed as \pyinline{G[s][e]}. The \pyinline{s} start and \pyinline{e} end are the vertices, and \pyinline{G[s][e]} returns edge weight.
\runinhead{DFS.} DFS in directed graph:
\begin{python}
def dfs(self, G, s, e, path):
if s not in G or e not in G:
return INVALID
if e in G[s]:
return G[s][e]
for nbr in G[s]:
if nbr not in path:
path.add(nbr)
val = self.dfs(G, nbr, e, path)
if val != -1.0:
return val * G[s][nbr]
path.remove(nbr)
return INVALID
\end{python}
\section{Paths}
\subsection{Euler Path - Every Edge Once}
An Eulerian path is a path in a graph which visits every edge exactly once ($\forall e \in E$). Vertices can be repeated.
Hierholzer's algorithm to find an Euler path in a graph. The graph must be directed graph.
\runinhead{Core clue.} The algorithm exhaustively visit all the edges during the dfs. We must \textbf{remove} the edge after visting, otherwise circle.
\begin{python}
def findItinerary(self, tickets):
G = defaultdict(list)
for elt in tickets:
s, e = elt
heapq.heappush(G[s], e) # heap lexical order
ret = deque()
self.dfs(G, 'JFK', ret)
return list(ret)
def dfs(self, G, cur, ret):
while G[cur]:
# need to remove the edge after visting
nbr = heapq.heappop(G[cur])
self.dfs(G, nbr, ret)
ret.appendleft(cur)
\end{python}
Instead of using \pyinline{heapq}, using \pyinline{deque}.
\begin{python}
def findItinerary(self, tickets):
G = defaultdict(deque)
for s, e in tickets:
G[s].append(e)
for s, l in G.items():
G[s] = deque(sorted(l))
ret = deque()
self.dfs(G, "JFK", ret)
return list(ret)
def dfs(self, G, cur, ret):
while G[cur]:
# need to remove the edge after visting
nbr = G[cur].popleft()
self.dfs(G, nbr, ret)
\end{python}
\subsection{Hamiltonian Path - Every Vertex Once}
A Hamiltonian path is a path in a graph which visits every vertex exactly once ($\forall v \in V$). This problem is proved to be NP.
\section{Topological Sorting}
For a graph $G=\{V, E\}$, if $A \rightarrow B $, then $A$ is before $B$ in the ordered list.
\subsection{Algorithm}
\rih{Core clues}:
\begin{enumerate}
\item \textbf{DFS neighbors first}. If the neighbors of current node is $\neg$visited, then dfs the neighbors
\item \textbf{Process current node}. After visiting all the neighbors, then visit the current node and push it to the result queue.
\end{enumerate}
Notice:
\begin{enumerate}
\item Need to check ascending order or descending order.
\item Need to \textbf{detect cycle}; thus the dfs need to construct result queue and detect cycle simultaneously, by using two sets: $visited$ and $path$.
\end{enumerate}
\begin{python}
from collections import deque
def topological_sort(self, V):
visited = set()
ret = deque()
for v in V.keys():
if v not in visited:
if not self.dfs_topo(V, v, visited, set(), ret):
return [] # contains cycle
return list(ret)
# return whether the current path is acyclic
def dfs_topo(self, V, v, visited, path, ret) -> bool:
if v in path:
return False
path.add(v)
for nbr in V[v]:
if nbr not in visited:
if not self.dfs_topo(V, nbr, visited, path, ret):
return False
path.remove(v)
visited.add(v)
ret.appendleft(v)
return True
# encode the visited using 0, 1, 2
def dfs_topo(
self,
G: Dict[int, List[int]],
u: int,
visited: Dict[int, int],
ret: deque,
):
"""
Topological sort
G = defaultdict(list)
visited = defaultdict(int)
# 0 not visited, 1 visiting, 2 visited
pre-condition: u is not visited (0)
"""
visited[u] = 1
for nbr in G[u]:
if visited[nbr] == 1:
return False
if visited[nbr] == 0:
if not self.topo_dfs(G, nbr, visited, ret):
return False
visited[u] = 2
ret.appendleft(u) # visit larger first
return True
\end{python}
The time complexity of topological sorting is $O(|E|+|V|)$ since it needs to goes to every edges and every vertices.
\subsection{Applications}
\begin{enumerate}
\item Course scheduling problem with pre-requisite.
\end{enumerate}
\section{Union-Find}\label{section:unionFind}
\subsection{Simplified Union Find}
Simplified code with unbalanced size. Union-find and disjoint set are used interchangeably. Union-find emphasizes on algorithm while disjoint set emphasizes on data structure.
\rih{Core clues:}
\begin{enumerate}
\item \textbf{$\pi$ array}:an array to store each item's predecessor pi. The predecessor are lazily updated to its ancestor. When \pyinline{x == pi[x]}, then \pyinline{x} is the ancestor (i.e. root).
\end{enumerate}
\begin{python}
class DisjointSet:
def __init__(self):
self.pi = {}
def union(self, x, y):
pi_x = self.find(x)
pi_y = self.find(y)
self.pi[pi_y] = pi_x
def find(self, x):
if x not in self.pi:
self.pi[x] = x
elif self.pi[x] != x:
self.pi[x] = self.find(self.pi[x])
return self.pi[x]
\end{python}
Note, for the final counting of component. Need to do a final find on all nodes to
update the ancestor.
\begin{python}
component_cnt = len(set(
ds.find(x)
for x in ds.pi.keys()
))
\end{python}
Improvements:
\begin{enumerate}
\item Weighting: size-baladnced tree
\item Path Compression.
\end{enumerate}
\subsection{Algorithm}
Weighted union-find with path compression.\\
\rih{Core clues:}
\begin{enumerate}
\item \textbf{$\pi$ array}: predecessor pi.
\item \textbf{Size-balanced}: merge the tree according to the size to maintain balance.
\item \textbf{Path compression}: Make the ptr in $\pi$ array to point to its root rather than its immediate parent.
\end{enumerate}
\begin{figure}[]
\centering
\subfloat{\includegraphics[width=\linewidth]{uf}}
\caption{Weighted quick-union traces}
\label{fig:union_find}
\end{figure}
\begin{python}
class UnionFind(object): # or DisjointSet
def __init__(self):
self.pi = {} # item -> pi
self.sz = {} # root -> size
def __len__(self):
"""number of unions"""
return len(self.sz) # only root nodes have size
def add(self, x):
if x not in self.pi:
self.pi[x] = x
self.sz[x] = 1
def root(self, x):
"""path compression"""
pi = self.pi[x]
if x != pi:
self.pi[x] = self.root(pi)
return self.pi[x]
def unionize(self, x, y):
pi1 = self.root(x)
pi2 = self.root(y)
if pi1 != pi2:
if self.sz[pi1] > self.sz[pi2]:
pi1, pi2 = pi2, pi1
# size balancing
self.pi[pi1] = pi2
self.sz[pi2] += self.sz[pi1]
del self.sz[pi1]
def isunion(self, x, y):
if x not in self.pi or y not in self.pi:
return False
return self.root(x) == self.root(y)
\end{python}
\subsection{Complexity}
$m$ union-find with $n$ objects: $O(n)+m O(\lg n)$
\section{Axis Projection}
Project the mat dimension from 2D to 1D, using \textit{orthogonal axis}.
\runinhead{Smallest bounding box.} Given the location $(x, y)$ of one of the 1's, return the area of the smallest bounding box that encloses 1's.
$$
\begin{bmatrix}
0& 0& 1& 0 \\
0& 1& 1& 0 \\
0& 1& 0& 0 \\
\end{bmatrix}
$$
\rih{Clues:}
\begin{enumerate}
\item Project the 1's onto x-axis, binary search for the left bound and right bound of the bounding box.
\item We don't pre-project the axis beforehand, since it will take $O(mn)$ to collect the projected 1d array. Instead, we only project it during binary search when checking the mid item. Checking takes $O(m)$, searching takes $O(\log n)$.
\item Do the same for y-axis.
\end{enumerate}
Time complexity: $O(m\log n + n \log m)$, where $O(m), O(n)$ is for projection complexity.
\section{MST}
Minimum spanning tree.
\subsection{Kruskal's algorithm}
\textbf{Core clues:}
\begin{enumerate}
\item Vertices $v \in V$ are divided into different sets
\item Extract min edges to unionize the sets
\item Terminates when $\forall v\in V$ are in the same set.
\end{enumerate}
\textbf{Code:}
\begin{python}[mathescape]
def kruskal(G):
ret = []
uf = UnionFind()
for v in G.V:
uf.add(v)
G.E.sort() # sort by weights
for u, v in G.E:
if not uf.isunion(u, v):
A.append((u, v))
uf.unionize(u, v)
\end{python}
Complexity: $O(|E|\log |E|)$.