-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.scm
195 lines (185 loc) · 7.16 KB
/
parser.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; File: PARSER.SCM
;; Author: Hoa Long Tam ([email protected])
;;
;; Adapted for use in Python from a Logo-in-Scheme interpreter written by Brian
;; Harvey ([email protected]), available at ~cs61a/lib/logo.scm
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Selectors for the list returned by py-read.
(define indentation car)
(define tokens cdr)
(define (make-line-obj line)
(instantiate line-obj (indentation line) (tokens line)))
;; A class to represent a sequence of tokens to be used by the evaluator.
(define-class (line-obj indentation tokens)
(method (empty?)
(null? tokens))
(method (exit?)
(member tokens '((exit |(| |)|) (quit |(| |)|))))
(method (peek)
(car tokens))
(method (push token)
(set! tokens (cons token tokens)))
(method (next)
(let ((token (car tokens)))
(set! tokens (cdr tokens))
token)))
;; Parser utility functions
(define (char->symbol ch) (string->symbol (make-string 1 ch)))
(define operators '(#\+ #\- #\* #\/ #\% #\< #\> #\! #\=))
(define (comma? symbol) (eq? symbol '|,|))
(define (colon? symbol) (eq? symbol '|:|))
(define open-brace-symbol (char->symbol #\{))
(define close-brace-symbol (char->symbol #\}))
(define open-paren-symbol (char->symbol #\())
(define close-paren-symbol (char->symbol #\)))
(define open-bracket-symbol (char->symbol #\[))
(define close-bracket-symbol (char->symbol #\]))
(define (char-newline? char)
(or (eq? char #\newline) ;; you're in
(and (eq? char #\return)
(eq? (peek-char) #\newline)
(read-char)))) ;; chomp off newline
;;;;
;; The main tokenizer. Reads in a line from standard input and returns a list
;; of the form (indentation token1 token2 token3 ...). Turns the line
;; 'def foo(a,b):' into (def foo |(| a |,| b |)| :).
;;;
(define (py-read)
(define (get-indent-and-tokens)
;; TODO: Both Partners, Question 2
(define (giat-helper count)
(let ((pikachu (peek-char)))
(if (eq? pikachu #\space)
(begin (read-char)
(giat-helper (+ count 1)))
(cons count (get-tokens '())))))
(giat-helper 0))
(define (reverse-brace char)
(let ((result (assq char '((#\{ . #\}) (#\} . #\{)
(#\( . #\)) (#\) . #\()
(#\[ . #\]) (#\] . #\[)))))
(if result
(cdr result)
(read-error "SyntaxError: bad closing brace: " char))))
(define (get-tokens braces)
;; Reads in until the end of the line and breaks the stream of input into a
;; list of tokens. Braces is a list of characters representing open brace
;; ([, (, and {) tokens, so it can throw an error if braces are mismatched.
;; If it reaches the endof a line while inside braces, it keeps reading
;; until the braces are closed.
(let ((char (peek-char)))
(cond
((char-newline? char)
(if (not (null? braces))
(begin (read-char) (get-tokens braces))
(begin (read-char) '())))
((eof-object? char)
(if (not (null? braces))
(read-error "SyntaxError: End of file inside expression")
'()))
((eq? char #\space)
(read-char)
(get-tokens braces))
((eq? char #\#)
(ignore-comment)
'())
((memq char (list #\[ #\( #\{))
(let ((s (char->symbol (read-char))))
(cons s (get-tokens (cons char braces)))))
((memq char (list #\] #\) #\}))
(if (and (not (null? braces)) (eq? char (reverse-brace (car braces))))
(let ((t (char->symbol (read-char))))
(cons t (get-tokens (cdr braces))))
(read-error "SyntaxError: mismatched brace: " char)))
((memq char (list #\, #\:))
(let ((t (char->symbol (read-char))))
(cons t (get-tokens braces))))
((memq char (list #\" #\'))
(let ((t (list->string (get-string (read-char)))))
(cons t (get-tokens braces))))
((memq char operators)
(let ((t (get-operator)))
(cons t (get-tokens braces))))
((char-numeric? char)
(let ((num (get-num "")))
(if (string? num)
(cons (string->number num) (get-tokens braces))
(cons num (get-tokens braces)))))
(else
(let ((token (get-token (char->symbol (read-char)))))
(cond
((and (string? token)
(eq? (string-ref token 0) #\.)
(char-numeric? (string-ref token 1)))
(cons (word (string->symbol (string-append "0" token)))
(get-tokens braces)))
((string? token)
(cons (string->symbol token) (get-tokens braces)))
(else (cons token (get-tokens braces)))))))))
(define (get-token so-far)
(let ((char (peek-char)))
(if (not (or (char-alphabetic? char)
(char-numeric? char)
(eq? char #\_)))
so-far
(get-token (word so-far (char->symbol (read-char)))))))
(define (get-num num-so-far)
;; Reads in a number. Num-so-far a Scheme word (we will convert back into
;; a Scheme number in the get-tokens procedure).
;; TODO: Person B, Question 3
(define (helper num-so-far bool)
(let ((char (peek-char)))
(cond ((char-numeric? char) (helper (word num-so-far (char->symbol (read-char))) bool))
((and (not bool) (eq? char #\.)) (helper (word num-so-far (char->symbol (read-char))) #t))
(else num-so-far))))
(helper num-so-far #f))
(define (get-operator)
(let ((char (read-char))
(next (peek-char)))
(cond ((eq? char #\+) (if (eq? next #\=) (begin (read-char) '+=) '+))
((eq? char #\-) (if (eq? next #\=) (begin (read-char) '-=) '-))
((eq? char #\%) (if (eq? next #\=) (begin (read-char) '%=) '%))
((eq? char #\<) (if (eq? next #\=) (begin (read-char) '<=) '<))
((eq? char #\>) (if (eq? next #\=) (begin (read-char) '>=) '>))
((eq? char #\=) (if (eq? next #\=) (begin (read-char) '==) '=))
((eq? char #\/) (if (eq? next #\=) (begin (read-char) '/=) '/))
((eq? char #\!)
(if (eq? next #\=)
(begin (read-char) '!=)
(read-error "Unknown operator: !")))
((eq? char #\*)
(cond ((eq? next #\*)
(read-char)
(if (eq? (peek-char) #\=)
(begin (read-char) '**=)
'**))
((eq? next #\=) (read-char) '*=)
(else '*))))))
(define (get-string type)
;; Reads in a string and returns a list of Scheme characters, up to, but not
;; including the closing quote. Type is the Scheme character that opened
;; the string. The first character returned by (read-char) when this
;; function is executed will be the first character of the desired string.
(let ((squirtle (read-char)))
(if (eq? squirtle type)
'()
(cons squirtle (get-string type)))))
(define (ignore-comment)
(let ((next (peek-char)))
(if (or (eq? next #\newline) (eof-object? next))
'*COMMENT-IGNORED*
(begin (read-char)
(ignore-comment)))))
(get-indent-and-tokens))
;; Error handler for py-read. Needs to eat remaining tokens on the line from
;; user input before throwing the error.
(define (read-error . args)
(define (loop)
(let ((char (read-char)))
(if (or (char-newline? char) (eof-object? char))
(apply py-error args)
(loop))))
(loop))