-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtokenizer.mal
70 lines (62 loc) · 2.18 KB
/
tokenizer.mal
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
(def! string->set
(fn* [s]
(apply hash-map (apply concat (map (fn* [k] [k true]) (seq s))))))
(def! *whitespace-chars* (string->set " \n"))
(def! *single-chars-tokens* (string->set "[]{}()'`^@"))
(def! *separator-chars* (string->set " \n[]{}('\"`,;)"))
(def! whitespace? (fn* [c] (contains? *whitespace-chars* c)))
(def! single-char-token? (fn* [c] (contains? *single-chars-tokens* c)))
(def! separator-char? (fn* [c] (contains? *separator-chars* c)))
(def! read-comment-token
(fn* [chars]
(if (empty? chars)
[nil chars]
(let* [c0 (first chars)]
(if (= "\n" c0)
[nil (rest chars)]
(read-comment-token (rest chars)))))))
(def! read-word-token
(fn* [word chars]
(if (empty? chars)
[word chars]
(let* [c0 (first chars)]
(if (separator-char? c0)
[word chars]
(read-word-token (str word c0) (rest chars)))))))
(def! read-string-token
(fn* [word chars]
(if (empty? chars)
(throw "Expected closing quotes, got EOF")
(let* [c0 (first chars)
rc (rest chars)]
(cond
(= "\"" c0) [(str word c0) rc]
(= "\\" c0) (read-string-token (str word c0 (first rc)) (rest rc))
:else (read-string-token (str word c0) rc))))))
(def! read-next-token
(fn* [chars]
(let* [c0 (first chars)
rc (rest chars)]
(cond
(whitespace? c0) [nil rc]
(= "," c0) [nil rc]
(= "~" c0) (if (= "@" (nth chars 1))
["~@" (rest rc)]
[c0 rc])
(single-char-token? c0) [c0 rc]
(= ";" c0) (read-comment-token chars)
(= "\"" c0) (read-string-token c0 rc)
:else (read-word-token "" chars)))))
(def! tokenize-chars
(fn* [chars tokens]
(if (empty? chars)
tokens
(let* [res (read-next-token chars)
token (first res)
left-chars (nth res 1)]
(if (nil? token)
(tokenize-chars left-chars tokens)
(tokenize-chars left-chars (concat tokens [token])))))))
(def! tokenize
(fn* [str]
(tokenize-chars (seq str) [])))