Subversion Repositories WoWGM

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3 tristanc 1
/*
2
** $Id: llex.c,v 2.20 2006/03/09 18:14:31 roberto Exp $
3
** Lexical Analyzer
4
** See Copyright Notice in lua.h
5
*/
6
 
7
 
8
#include <ctype.h>
9
#include <locale.h>
10
#include <string.h>
11
 
12
#define llex_c
13
#define LUA_CORE
14
 
15
#include "lua.h"
16
 
17
#include "ldo.h"
18
#include "llex.h"
19
#include "lobject.h"
20
#include "lparser.h"
21
#include "lstate.h"
22
#include "lstring.h"
23
#include "ltable.h"
24
#include "lzio.h"
25
 
26
 
27
 
28
#define next(ls) (ls->current = zgetc(ls->z))
29
 
30
 
31
 
32
 
33
#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
34
 
35
 
36
/* ORDER RESERVED */
37
const char *const luaX_tokens [] = {
38
    "and", "break", "do", "else", "elseif",
39
    "end", "false", "for", "function", "if",
40
    "in", "local", "nil", "not", "or", "repeat",
41
    "return", "then", "true", "until", "while",
42
    "..", "...", "==", ">=", "<=", "~=",
43
    "<number>", "<name>", "<string>", "<eof>",
44
    NULL
45
};
46
 
47
 
48
#define save_and_next(ls) (save(ls, ls->current), next(ls))
49
 
50
 
51
static void save (LexState *ls, int c) {
52
  Mbuffer *b = ls->buff;
53
  if (b->n + 1 > b->buffsize) {
54
    size_t newsize;
55
    if (b->buffsize >= MAX_SIZET/2)
56
      luaX_lexerror(ls, "lexical element too long", 0);
57
    newsize = b->buffsize * 2;
58
    luaZ_resizebuffer(ls->L, b, newsize);
59
  }
60
  b->buffer[b->n++] = cast(char, c);
61
}
62
 
63
 
64
void luaX_init (lua_State *L) {
65
  int i;
66
  for (i=0; i<NUM_RESERVED; i++) {
67
    TString *ts = luaS_new(L, luaX_tokens[i]);
68
    luaS_fix(ts);  /* reserved words are never collected */
69
    lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
70
    ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
71
  }
72
}
73
 
74
 
75
#define MAXSRC          80
76
 
77
 
78
const char *luaX_token2str (LexState *ls, int token) {
79
  if (token < FIRST_RESERVED) {
80
    lua_assert(token == cast(unsigned char, token));
81
    return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
82
                              luaO_pushfstring(ls->L, "%c", token);
83
  }
84
  else
85
    return luaX_tokens[token-FIRST_RESERVED];
86
}
87
 
88
 
89
static const char *txtToken (LexState *ls, int token) {
90
  switch (token) {
91
    case TK_NAME:
92
    case TK_STRING:
93
    case TK_NUMBER:
94
      save(ls, '\0');
95
      return luaZ_buffer(ls->buff);
96
    default:
97
      return luaX_token2str(ls, token);
98
  }
99
}
100
 
101
 
102
void luaX_lexerror (LexState *ls, const char *msg, int token) {
103
  char buff[MAXSRC];
104
  luaO_chunkid(buff, getstr(ls->source), MAXSRC);
105
  msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
106
  if (token)
107
    luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
108
  luaD_throw(ls->L, LUA_ERRSYNTAX);
109
}
110
 
111
 
112
void luaX_syntaxerror (LexState *ls, const char *msg) {
113
  luaX_lexerror(ls, msg, ls->t.token);
114
}
115
 
116
 
117
TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
118
  lua_State *L = ls->L;
119
  TString *ts = luaS_newlstr(L, str, l);
120
  TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
121
  if (ttisnil(o))
122
    setbvalue(o, 1);  /* make sure `str' will not be collected */
123
  return ts;
124
}
125
 
126
 
127
static void inclinenumber (LexState *ls) {
128
  int old = ls->current;
129
  lua_assert(currIsNewline(ls));
130
  next(ls);  /* skip `\n' or `\r' */
131
  if (currIsNewline(ls) && ls->current != old)
132
    next(ls);  /* skip `\n\r' or `\r\n' */
133
  if (++ls->linenumber >= MAX_INT)
134
    luaX_syntaxerror(ls, "chunk has too many lines");
135
}
136
 
137
 
138
void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
139
  ls->decpoint = '.';
140
  ls->L = L;
141
  ls->lookahead.token = TK_EOS;  /* no look-ahead token */
142
  ls->z = z;
143
  ls->fs = NULL;
144
  ls->linenumber = 1;
145
  ls->lastline = 1;
146
  ls->source = source;
147
  luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
148
  next(ls);  /* read first char */
149
}
150
 
151
 
152
 
153
/*
154
** =======================================================
155
** LEXICAL ANALYZER
156
** =======================================================
157
*/
158
 
159
 
160
 
161
static int check_next (LexState *ls, const char *set) {
162
  if (!strchr(set, ls->current))
163
    return 0;
164
  save_and_next(ls);
165
  return 1;
166
}
167
 
168
 
169
static void buffreplace (LexState *ls, char from, char to) {
170
  size_t n = luaZ_bufflen(ls->buff);
171
  char *p = luaZ_buffer(ls->buff);
172
  while (n--)
173
    if (p[n] == from) p[n] = to;
174
}
175
 
176
 
177
static void trydecpoint (LexState *ls, SemInfo *seminfo) {
178
  /* format error: try to update decimal point separator */
179
  struct lconv *cv = localeconv();
180
  char old = ls->decpoint;
181
  ls->decpoint = (cv ? cv->decimal_point[0] : '.');
182
  buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
183
  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
184
    /* format error with correct decimal point: no more options */
185
    buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
186
    luaX_lexerror(ls, "malformed number", TK_NUMBER);
187
  }
188
}
189
 
190
 
191
/* LUA_NUMBER */
192
static void read_numeral (LexState *ls, SemInfo *seminfo) {
193
  lua_assert(isdigit(ls->current));
194
  do {
195
    save_and_next(ls);
196
  } while (isdigit(ls->current) || ls->current == '.');
197
  if (check_next(ls, "Ee"))  /* `E'? */
198
    check_next(ls, "+-");  /* optional exponent sign */
199
  while (isalnum(ls->current) || ls->current == '_')
200
    save_and_next(ls);
201
  save(ls, '\0');
202
  buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
203
  if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
204
    trydecpoint(ls, seminfo); /* try to update decimal point separator */
205
}
206
 
207
 
208
static int skip_sep (LexState *ls) {
209
  int count = 0;
210
  int s = ls->current;
211
  lua_assert(s == '[' || s == ']');
212
  save_and_next(ls);
213
  while (ls->current == '=') {
214
    save_and_next(ls);
215
    count++;
216
  }
217
  return (ls->current == s) ? count : (-count) - 1;
218
}
219
 
220
 
221
static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
222
  int cont = 0;
223
  (void)(cont);  /* avoid warnings when `cont' is not used */
224
  save_and_next(ls);  /* skip 2nd `[' */
225
  if (currIsNewline(ls))  /* string starts with a newline? */
226
    inclinenumber(ls);  /* skip it */
227
  for (;;) {
228
    switch (ls->current) {
229
      case EOZ:
230
        luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
231
                                   "unfinished long comment", TK_EOS);
232
        break;  /* to avoid warnings */
233
#if defined(LUA_COMPAT_LSTR)
234
      case '[': {
235
        if (skip_sep(ls) == sep) {
236
          save_and_next(ls);  /* skip 2nd `[' */
237
          cont++;
238
#if LUA_COMPAT_LSTR == 1
239
          if (sep == 0)
240
            luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
241
#endif
242
        }
243
        break;
244
      }
245
#endif
246
      case ']': {
247
        if (skip_sep(ls) == sep) {
248
          save_and_next(ls);  /* skip 2nd `]' */
249
#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
250
          cont--;
251
          if (sep == 0 && cont >= 0) break;
252
#endif
253
          goto endloop;
254
        }
255
        break;
256
      }
257
      case '\n':
258
      case '\r': {
259
        save(ls, '\n');
260
        inclinenumber(ls);
261
        if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
262
        break;
263
      }
264
      default: {
265
        if (seminfo) save_and_next(ls);
266
        else next(ls);
267
      }
268
    }
269
  } endloop:
270
  if (seminfo)
271
    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
272
                                     luaZ_bufflen(ls->buff) - 2*(2 + sep));
273
}
274
 
275
 
276
static void read_string (LexState *ls, int del, SemInfo *seminfo) {
277
  save_and_next(ls);
278
  while (ls->current != del) {
279
    switch (ls->current) {
280
      case EOZ:
281
        luaX_lexerror(ls, "unfinished string", TK_EOS);
282
        continue;  /* to avoid warnings */
283
      case '\n':
284
      case '\r':
285
        luaX_lexerror(ls, "unfinished string", TK_STRING);
286
        continue;  /* to avoid warnings */
287
      case '\\': {
288
        int c;
289
        next(ls);  /* do not save the `\' */
290
        switch (ls->current) {
291
          case 'a': c = '\a'; break;
292
          case 'b': c = '\b'; break;
293
          case 'f': c = '\f'; break;
294
          case 'n': c = '\n'; break;
295
          case 'r': c = '\r'; break;
296
          case 't': c = '\t'; break;
297
          case 'v': c = '\v'; break;
298
          case '\n':  /* go through */
299
          case '\r': save(ls, '\n'); inclinenumber(ls); continue;
300
          case EOZ: continue;  /* will raise an error next loop */
301
          default: {
302
            if (!isdigit(ls->current))
303
              save_and_next(ls);  /* handles \\, \", \', and \? */
304
            else {  /* \xxx */
305
              int i = 0;
306
              c = 0;
307
              do {
308
                c = 10*c + (ls->current-'0');
309
                next(ls);
310
              } while (++i<3 && isdigit(ls->current));
311
              if (c > UCHAR_MAX)
312
                luaX_lexerror(ls, "escape sequence too large", TK_STRING);
313
              save(ls, c);
314
            }
315
            continue;
316
          }
317
        }
318
        save(ls, c);
319
        next(ls);
320
        continue;
321
      }
322
      default:
323
        save_and_next(ls);
324
    }
325
  }
326
  save_and_next(ls);  /* skip delimiter */
327
  seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
328
                                   luaZ_bufflen(ls->buff) - 2);
329
}
330
 
331
 
332
static int llex (LexState *ls, SemInfo *seminfo) {
333
  luaZ_resetbuffer(ls->buff);
334
  for (;;) {
335
    switch (ls->current) {
336
      case '\n':
337
      case '\r': {
338
        inclinenumber(ls);
339
        continue;
340
      }
341
      case '-': {
342
        next(ls);
343
        if (ls->current != '-') return '-';
344
        /* else is a comment */
345
        next(ls);
346
        if (ls->current == '[') {
347
          int sep = skip_sep(ls);
348
          luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
349
          if (sep >= 0) {
350
            read_long_string(ls, NULL, sep);  /* long comment */
351
            luaZ_resetbuffer(ls->buff);
352
            continue;
353
          }
354
        }
355
        /* else short comment */
356
        while (!currIsNewline(ls) && ls->current != EOZ)
357
          next(ls);
358
        continue;
359
      }
360
      case '[': {
361
        int sep = skip_sep(ls);
362
        if (sep >= 0) {
363
          read_long_string(ls, seminfo, sep);
364
          return TK_STRING;
365
        }
366
        else if (sep == -1) return '[';
367
        else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
368
      }
369
      case '=': {
370
        next(ls);
371
        if (ls->current != '=') return '=';
372
        else { next(ls); return TK_EQ; }
373
      }
374
      case '<': {
375
        next(ls);
376
        if (ls->current != '=') return '<';
377
        else { next(ls); return TK_LE; }
378
      }
379
      case '>': {
380
        next(ls);
381
        if (ls->current != '=') return '>';
382
        else { next(ls); return TK_GE; }
383
      }
384
      case '~': {
385
        next(ls);
386
        if (ls->current != '=') return '~';
387
        else { next(ls); return TK_NE; }
388
      }
389
      case '"':
390
      case '\'': {
391
        read_string(ls, ls->current, seminfo);
392
        return TK_STRING;
393
      }
394
      case '.': {
395
        save_and_next(ls);
396
        if (check_next(ls, ".")) {
397
          if (check_next(ls, "."))
398
            return TK_DOTS;   /* ... */
399
          else return TK_CONCAT;   /* .. */
400
        }
401
        else if (!isdigit(ls->current)) return '.';
402
        else {
403
          read_numeral(ls, seminfo);
404
          return TK_NUMBER;
405
        }
406
      }
407
      case EOZ: {
408
        return TK_EOS;
409
      }
410
      default: {
411
        if (isspace(ls->current)) {
412
          lua_assert(!currIsNewline(ls));
413
          next(ls);
414
          continue;
415
        }
416
        else if (isdigit(ls->current)) {
417
          read_numeral(ls, seminfo);
418
          return TK_NUMBER;
419
        }
420
        else if (isalpha(ls->current) || ls->current == '_') {
421
          /* identifier or reserved word */
422
          TString *ts;
423
          do {
424
            save_and_next(ls);
425
          } while (isalnum(ls->current) || ls->current == '_');
426
          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
427
                                  luaZ_bufflen(ls->buff));
428
          if (ts->tsv.reserved > 0)  /* reserved word? */
429
            return ts->tsv.reserved - 1 + FIRST_RESERVED;
430
          else {
431
            seminfo->ts = ts;
432
            return TK_NAME;
433
          }
434
        }
435
        else {
436
          int c = ls->current;
437
          next(ls);
438
          return c;  /* single-char tokens (+ - / ...) */
439
        }
440
      }
441
    }
442
  }
443
}
444
 
445
 
446
void luaX_next (LexState *ls) {
447
  ls->lastline = ls->linenumber;
448
  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
449
    ls->t = ls->lookahead;  /* use this one */
450
    ls->lookahead.token = TK_EOS;  /* and discharge it */
451
  }
452
  else
453
    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
454
}
455
 
456
 
457
void luaX_lookahead (LexState *ls) {
458
  lua_assert(ls->lookahead.token == TK_EOS);
459
  ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
460
}
461