This repository was archived by the owner on Aug 15, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathcodegen.rs
More file actions
246 lines (202 loc) · 7.49 KB
/
codegen.rs
File metadata and controls
246 lines (202 loc) · 7.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
use lexer::Lexer;
use std::vec_ng::Vec;
use syntax::ast;
use syntax::codemap::Span;
use syntax::diagnostic::SpanHandler;
use syntax::ext::base::AnyMacro;
use syntax::ext::base::ExtCtxt;
use syntax::ext::build::AstBuilder;
use syntax::util::small_vector::SmallVector;
// struct returned by the code generator
// implements a trait containing method called by libsyntax
// on macro expansion
struct CodeGenerator {
// we need this to report
// errors when the macro is
// not called correctly
handler: @SpanHandler,
span: Span,
// items
items: Vec<@ast::Item>
}
impl AnyMacro for CodeGenerator {
fn make_items(&self) -> SmallVector<@ast::Item> {
SmallVector::many(self.items.clone())
}
fn make_stmt(&self) -> @ast::Stmt {
fail!("invoking rustlex on statement context is not implemented");
}
fn make_expr(&self) -> @ast::Expr {
self.handler.span_fatal(self.span,
"rustlex! invoked on expression context");
}
}
#[inline(always)]
pub fn bufferStruct<'a>(cx: &mut ExtCtxt) -> @ast::Item {
(quote_item!(cx,
struct InputBuffer {
buf: ~[u8],
current_pos: uint
}
)).unwrap()
}
#[inline(always)]
pub fn lexerStruct<'a>(cx: &mut ExtCtxt) -> @ast::Item {
(quote_item!(cx,
struct Lexer {
stream: ~std::io::Reader,
inp: ~InputBuffer,
condition: uint
}
)).unwrap()
}
pub fn codegen<'a>(lex: &Lexer, cx: &mut ExtCtxt, sp: Span) -> ~CodeGenerator {
let mut items = Vec::new();
// tables
// * trans_table: an array of N arrays of 256 uints, N being the number
// of states in the FSM, which gives the transitions between states
// * accepting: an array of N uints, giving the action associated to
// each state
let ty_vec = cx.ty(sp, ast::TyFixedLengthVec(
cx.ty_ident(sp, cx.ident_of("uint")),
cx.expr_uint(sp, 256)));
let ty_transtable = cx.ty(sp, ast::TyFixedLengthVec(
ty_vec,
cx.expr_uint(sp, lex.auto.states.len())));
let ty_acctable = cx.ty(sp, ast::TyFixedLengthVec(
cx.ty_ident(sp, cx.ident_of("uint")),
cx.expr_uint(sp, lex.auto.states.len())));
let mut transtable = Vec::new();
let mut acctable = Vec::new();
for st in lex.auto.states.iter() {
let mut vec = Vec::new();
for i in st.trans.iter() {
vec.push(cx.expr_uint(sp, *i));
}
let trans_expr = cx.expr_vec(sp, vec);
let acc_expr = cx.expr_uint(sp, st.action);
transtable.push(trans_expr);
acctable.push(acc_expr);
}
let transtable = cx.expr_vec(sp, transtable);
let transtable = ast::ItemStatic(ty_transtable, ast::MutImmutable, transtable);
let transtable = cx.item(sp, cx.ident_of("transition_table"), Vec::new(),
transtable);
let acctable = cx.expr_vec(sp, acctable);
let acctable = ast::ItemStatic(ty_acctable, ast::MutImmutable, acctable);
let acctable = cx.item(sp, cx.ident_of("accepting"), Vec::new(),
acctable);
items.push(transtable);
items.push(acctable);
// constants
// a constant per condition, whose value is the initial
// state of the DFA corresponding to that condition in
// the main big DFA
// the INPUT_BUFSIZE constant is used by the Lexer methods
for &(cond, st) in lex.conditions.iter() {
let cond = ast::Ident::new(cond);
items.push(quote_item!(&*cx,
static $cond: uint = $st;
).unwrap());
}
items.push(quote_item!(&*cx, static INPUT_BUFSIZE: uint = 256;).unwrap());
// structs
items.push(bufferStruct(cx));
items.push(lexerStruct(cx));
// functions of the Lexer and InputBuffer structs
// TODO:
let acts_match = actionsMatch(lex.actions, cx, sp);
let lex_impl = lexerImpl(cx, acts_match);
items.push(lex_impl);
println!("done!");
~CodeGenerator {
span: sp,
handler: cx.parse_sess.span_diagnostic,
items: items
}
}
pub fn actionsMatch(acts: &[@ast::Stmt], cx: &mut ExtCtxt, sp: Span) -> @ast::Expr {
let match_expr = quote_expr!(&*cx, last_matching_action);
let mut arms = Vec::with_capacity(acts.len());
let mut i = 1u;
let yystr = quote_stmt!(&*cx,
// FIXME: unused variable in generated code
// a syntax reg as var => like OCamllex would be better
let yystr = ::std::str::from_utf8(self.inp.buf.slice(
oldpos, self.inp.current_pos)).unwrap();
);
for act in acts.iter().skip(1) {
let pat_expr = quote_expr!(&*cx, $i);
let pat = cx.pat_lit(sp, pat_expr);
let block = cx.block(sp, vec!(yystr, *act), None);
let expr = quote_expr!(&*cx, $block);
let arm = cx.arm(sp, vec!(pat), expr);
arms.push(arm);
i += 1;
}
let def_act = quote_expr!(&*cx, {
// default action is printing on stdout
self.go_back(oldpos + 1);
let s = self.inp.buf.slice(oldpos, self.inp.current_pos);
print!("{:s}", ::std::str::from_utf8(s).unwrap());
});
let def_pat = cx.pat_wild(sp);
arms.push(cx.arm(sp, vec!(def_pat), def_act));
cx.expr_match(sp, match_expr, arms)
}
pub fn lexerImpl(cx: &mut ExtCtxt, actions_match: @ast::Expr) -> @ast::Item {
// the actual simulation code
(quote_item!(cx,
impl Lexer {
fn next_input(&mut self) -> Option<u8> {
if self.inp.current_pos == self.inp.buf.len() {
// more input
self.inp.buf = ::std::vec::from_elem(INPUT_BUFSIZE, 0 as u8);
match self.stream.read(self.inp.buf.mut_slice_from(0)) {
Err(_) => return None,
Ok(b) => if b < INPUT_BUFSIZE {
self.inp.buf.truncate(b);
}
}
self.inp.current_pos = 0;
}
let ret = self.inp.buf[self.inp.current_pos];
self.inp.current_pos += 1;
Some(ret)
}
fn go_back(&mut self, pos: uint) {
self.inp.current_pos = pos;
}
fn next<'a>(&'a mut self) -> Option<Token> {
let oldpos = self.inp.current_pos;
let mut advance = self.inp.current_pos;
let mut last_matching_action = 0;
let mut current_st = self.condition;
while current_st != 0 {
let i = match self.next_input() {
Some(i) => i,
None => return None
};
let new_st = transition_table[current_st][i];
let action = accepting[new_st];
if action != 0 {
advance = self.inp.current_pos;
// final state
last_matching_action = action;
}
current_st = new_st;
}
// go back to last matching state in the input
self.go_back(advance);
// execute action corresponding to found state
$actions_match
// if the user code did not return, continue
self.next()
}
fn new(stream: ~::std::io::Reader) -> ~Lexer {
let buf = ~InputBuffer { buf: ~[], current_pos: 0 };
~Lexer { stream: stream, inp: buf, condition: INITIAL }
}
}
)).unwrap()
}