You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

535 lines
15 KiB

  1. /*
  2. ** $Id: llex.c,v 2.61 2012/01/23 23:05:51 roberto Exp $
  3. ** Lexical Analyzer
  4. ** See Copyright Notice in lua.h
  5. */
  6. #if defined HAVE_CONFIG_H // LOL BEGIN
  7. # include "config.h"
  8. #endif // LOL END
  9. #include <locale.h>
  10. #include <string.h>
  11. #if defined __ANDROID__ // LOL BEGIN
  12. # define getlocaledecpoint() '.'
  13. #endif // LOL END
  14. #define llex_c
  15. #define LUA_CORE
  16. #include "lua.h"
  17. #include "lctype.h"
  18. #include "ldo.h"
  19. #include "llex.h"
  20. #include "lobject.h"
  21. #include "lparser.h"
  22. #include "lstate.h"
  23. #include "lstring.h"
  24. #include "ltable.h"
  25. #include "lzio.h"
  26. #define next(ls) (ls->current = zgetc(ls->z))
  27. #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
  28. /* ORDER RESERVED */
  29. static const char *const luaX_tokens [] = {
  30. "and", "break", "do", "else", "elseif",
  31. "end", "false", "for", "function", "goto", "if",
  32. "in", "local", "nil", "not", "or", "repeat",
  33. "return", "then", "true", "until", "while",
  34. "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
  35. "<number>", "<name>", "<string>"
  36. };
  37. #define save_and_next(ls) (save(ls, ls->current), next(ls))
  38. static l_noret lexerror (LexState *ls, const char *msg, int token);
  39. static void save (LexState *ls, int c) {
  40. Mbuffer *b = ls->buff;
  41. if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
  42. size_t newsize;
  43. if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
  44. lexerror(ls, "lexical element too long", 0);
  45. newsize = luaZ_sizebuffer(b) * 2;
  46. luaZ_resizebuffer(ls->L, b, newsize);
  47. }
  48. b->buffer[luaZ_bufflen(b)++] = cast(char, c);
  49. }
  50. void luaX_init (lua_State *L) {
  51. int i;
  52. for (i=0; i<NUM_RESERVED; i++) {
  53. TString *ts = luaS_new(L, luaX_tokens[i]);
  54. luaS_fix(ts); /* reserved words are never collected */
  55. ts->tsv.extra = cast_byte(i+1); /* reserved word */
  56. }
  57. }
  58. const char *luaX_token2str (LexState *ls, int token) {
  59. if (token < FIRST_RESERVED) {
  60. lua_assert(token == cast(unsigned char, token));
  61. return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
  62. luaO_pushfstring(ls->L, "char(%d)", token);
  63. }
  64. else {
  65. const char *s = luaX_tokens[token - FIRST_RESERVED];
  66. if (token < TK_EOS)
  67. return luaO_pushfstring(ls->L, LUA_QS, s);
  68. else
  69. return s;
  70. }
  71. }
  72. static const char *txtToken (LexState *ls, int token) {
  73. switch (token) {
  74. case TK_NAME:
  75. case TK_STRING:
  76. case TK_NUMBER:
  77. save(ls, '\0');
  78. return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
  79. default:
  80. return luaX_token2str(ls, token);
  81. }
  82. }
  83. static l_noret lexerror (LexState *ls, const char *msg, int token) {
  84. char buff[LUA_IDSIZE];
  85. luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
  86. msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
  87. if (token)
  88. luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
  89. luaD_throw(ls->L, LUA_ERRSYNTAX);
  90. }
  91. l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
  92. lexerror(ls, msg, ls->t.token);
  93. }
  94. /*
  95. ** creates a new string and anchors it in function's table so that
  96. ** it will not be collected until the end of the function's compilation
  97. ** (by that time it should be anchored in function's prototype)
  98. */
  99. TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
  100. lua_State *L = ls->L;
  101. TValue *o; /* entry for `str' */
  102. TString *ts = luaS_newlstr(L, str, l); /* create new string */
  103. setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */
  104. o = luaH_set(L, ls->fs->h, L->top - 1);
  105. if (ttisnil(o)) { /* not in use yet? (see 'addK') */
  106. /* boolean value does not need GC barrier;
  107. table has no metatable, so it does not need to invalidate cache */
  108. setbvalue(o, 1); /* t[string] = true */
  109. luaC_checkGC(L);
  110. }
  111. L->top--; /* remove string from stack */
  112. return ts;
  113. }
  114. /*
  115. ** increment line number and skips newline sequence (any of
  116. ** \n, \r, \n\r, or \r\n)
  117. */
  118. static void inclinenumber (LexState *ls) {
  119. int old = ls->current;
  120. lua_assert(currIsNewline(ls));
  121. next(ls); /* skip `\n' or `\r' */
  122. if (currIsNewline(ls) && ls->current != old)
  123. next(ls); /* skip `\n\r' or `\r\n' */
  124. if (++ls->linenumber >= MAX_INT)
  125. luaX_syntaxerror(ls, "chunk has too many lines");
  126. }
  127. void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
  128. int firstchar) {
  129. ls->decpoint = '.';
  130. ls->L = L;
  131. ls->current = firstchar;
  132. ls->lookahead.token = TK_EOS; /* no look-ahead token */
  133. ls->z = z;
  134. ls->fs = NULL;
  135. ls->linenumber = 1;
  136. ls->lastline = 1;
  137. ls->source = source;
  138. ls->envn = luaS_new(L, LUA_ENV); /* create env name */
  139. luaS_fix(ls->envn); /* never collect this name */
  140. luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
  141. }
  142. /*
  143. ** =======================================================
  144. ** LEXICAL ANALYZER
  145. ** =======================================================
  146. */
  147. static int check_next (LexState *ls, const char *set) {
  148. if (ls->current == '\0' || !strchr(set, ls->current))
  149. return 0;
  150. save_and_next(ls);
  151. return 1;
  152. }
  153. /*
  154. ** change all characters 'from' in buffer to 'to'
  155. */
  156. static void buffreplace (LexState *ls, char from, char to) {
  157. size_t n = luaZ_bufflen(ls->buff);
  158. char *p = luaZ_buffer(ls->buff);
  159. while (n--)
  160. if (p[n] == from) p[n] = to;
  161. }
  162. #if !defined(getlocaledecpoint)
  163. #define getlocaledecpoint() (localeconv()->decimal_point[0])
  164. #endif
  165. #define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
  166. /*
  167. ** in case of format error, try to change decimal point separator to
  168. ** the one defined in the current locale and check again
  169. */
  170. static void trydecpoint (LexState *ls, SemInfo *seminfo) {
  171. char old = ls->decpoint;
  172. ls->decpoint = getlocaledecpoint();
  173. buffreplace(ls, old, ls->decpoint); /* try new decimal separator */
  174. if (!buff2d(ls->buff, &seminfo->r)) {
  175. /* format error with correct decimal point: no more options */
  176. buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
  177. lexerror(ls, "malformed number", TK_NUMBER);
  178. }
  179. }
  180. /* LUA_NUMBER */
  181. /*
  182. ** this function is quite liberal in what it accepts, as 'luaO_str2d'
  183. ** will reject ill-formed numerals.
  184. */
  185. static void read_numeral (LexState *ls, SemInfo *seminfo) {
  186. const char *expo = "Ee";
  187. int first = ls->current;
  188. lua_assert(lisdigit(ls->current));
  189. save_and_next(ls);
  190. if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */
  191. expo = "Pp";
  192. for (;;) {
  193. if (check_next(ls, expo)) /* exponent part? */
  194. check_next(ls, "+-"); /* optional exponent sign */
  195. if (lisxdigit(ls->current) || ls->current == '.')
  196. save_and_next(ls);
  197. else break;
  198. }
  199. save(ls, '\0');
  200. buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
  201. if (!buff2d(ls->buff, &seminfo->r)) /* format error? */
  202. trydecpoint(ls, seminfo); /* try to update decimal point separator */
  203. }
  204. /*
  205. ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
  206. ** -1 if sequence is malformed
  207. */
  208. static int skip_sep (LexState *ls) {
  209. int count = 0;
  210. int s = ls->current;
  211. lua_assert(s == '[' || s == ']');
  212. save_and_next(ls);
  213. while (ls->current == '=') {
  214. save_and_next(ls);
  215. count++;
  216. }
  217. return (ls->current == s) ? count : (-count) - 1;
  218. }
  219. static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
  220. save_and_next(ls); /* skip 2nd `[' */
  221. if (currIsNewline(ls)) /* string starts with a newline? */
  222. inclinenumber(ls); /* skip it */
  223. for (;;) {
  224. switch (ls->current) {
  225. case EOZ:
  226. lexerror(ls, (seminfo) ? "unfinished long string" :
  227. "unfinished long comment", TK_EOS);
  228. break; /* to avoid warnings */
  229. case ']': {
  230. if (skip_sep(ls) == sep) {
  231. save_and_next(ls); /* skip 2nd `]' */
  232. goto endloop;
  233. }
  234. break;
  235. }
  236. case '\n': case '\r': {
  237. save(ls, '\n');
  238. inclinenumber(ls);
  239. if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
  240. break;
  241. }
  242. default: {
  243. if (seminfo) save_and_next(ls);
  244. else next(ls);
  245. }
  246. }
  247. } endloop:
  248. if (seminfo)
  249. seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
  250. luaZ_bufflen(ls->buff) - 2*(2 + sep));
  251. }
  252. static void escerror (LexState *ls, int *c, int n, const char *msg) {
  253. int i;
  254. luaZ_resetbuffer(ls->buff); /* prepare error message */
  255. save(ls, '\\');
  256. for (i = 0; i < n && c[i] != EOZ; i++)
  257. save(ls, c[i]);
  258. lexerror(ls, msg, TK_STRING);
  259. }
  260. static int readhexaesc (LexState *ls) {
  261. int c[3], i; /* keep input for error message */
  262. int r = 0; /* result accumulator */
  263. c[0] = 'x'; /* for error message */
  264. for (i = 1; i < 3; i++) { /* read two hexa digits */
  265. c[i] = next(ls);
  266. if (!lisxdigit(c[i]))
  267. escerror(ls, c, i + 1, "hexadecimal digit expected");
  268. r = (r << 4) + luaO_hexavalue(c[i]);
  269. }
  270. return r;
  271. }
  272. static int readdecesc (LexState *ls) {
  273. int c[3], i;
  274. int r = 0; /* result accumulator */
  275. for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
  276. c[i] = ls->current;
  277. r = 10*r + c[i] - '0';
  278. next(ls);
  279. }
  280. if (r > UCHAR_MAX)
  281. escerror(ls, c, i, "decimal escape too large");
  282. return r;
  283. }
  284. static void read_string (LexState *ls, int del, SemInfo *seminfo) {
  285. save_and_next(ls); /* keep delimiter (for error messages) */
  286. while (ls->current != del) {
  287. switch (ls->current) {
  288. case EOZ:
  289. lexerror(ls, "unfinished string", TK_EOS);
  290. break; /* to avoid warnings */
  291. case '\n':
  292. case '\r':
  293. lexerror(ls, "unfinished string", TK_STRING);
  294. break; /* to avoid warnings */
  295. case '\\': { /* escape sequences */
  296. int c; /* final character to be saved */
  297. next(ls); /* do not save the `\' */
  298. switch (ls->current) {
  299. case 'a': c = '\a'; goto read_save;
  300. case 'b': c = '\b'; goto read_save;
  301. case 'f': c = '\f'; goto read_save;
  302. case 'n': c = '\n'; goto read_save;
  303. case 'r': c = '\r'; goto read_save;
  304. case 't': c = '\t'; goto read_save;
  305. case 'v': c = '\v'; goto read_save;
  306. case 'x': c = readhexaesc(ls); goto read_save;
  307. case '\n': case '\r':
  308. inclinenumber(ls); c = '\n'; goto only_save;
  309. case '\\': case '\"': case '\'':
  310. c = ls->current; goto read_save;
  311. case EOZ: goto no_save; /* will raise an error next loop */
  312. case 'z': { /* zap following span of spaces */
  313. next(ls); /* skip the 'z' */
  314. while (lisspace(ls->current)) {
  315. if (currIsNewline(ls)) inclinenumber(ls);
  316. else next(ls);
  317. }
  318. goto no_save;
  319. }
  320. default: {
  321. if (!lisdigit(ls->current))
  322. escerror(ls, &ls->current, 1, "invalid escape sequence");
  323. /* digital escape \ddd */
  324. c = readdecesc(ls);
  325. goto only_save;
  326. }
  327. }
  328. read_save: next(ls); /* read next character */
  329. only_save: save(ls, c); /* save 'c' */
  330. no_save: break;
  331. }
  332. default:
  333. save_and_next(ls);
  334. }
  335. }
  336. save_and_next(ls); /* skip delimiter */
  337. seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
  338. luaZ_bufflen(ls->buff) - 2);
  339. }
  340. static int llex (LexState *ls, SemInfo *seminfo) {
  341. luaZ_resetbuffer(ls->buff);
  342. for (;;) {
  343. switch (ls->current) {
  344. case '\n': case '\r': { /* line breaks */
  345. inclinenumber(ls);
  346. break;
  347. }
  348. case ' ': case '\f': case '\t': case '\v': { /* spaces */
  349. next(ls);
  350. break;
  351. }
  352. case '-': { /* '-' or '--' (comment) */
  353. next(ls);
  354. if (ls->current != '-') return '-';
  355. /* else is a comment */
  356. next(ls);
  357. if (ls->current == '[') { /* long comment? */
  358. int sep = skip_sep(ls);
  359. luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
  360. if (sep >= 0) {
  361. read_long_string(ls, NULL, sep); /* skip long comment */
  362. luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
  363. break;
  364. }
  365. }
  366. /* else short comment */
  367. while (!currIsNewline(ls) && ls->current != EOZ)
  368. next(ls); /* skip until end of line (or end of file) */
  369. break;
  370. }
  371. case '[': { /* long string or simply '[' */
  372. int sep = skip_sep(ls);
  373. if (sep >= 0) {
  374. read_long_string(ls, seminfo, sep);
  375. return TK_STRING;
  376. }
  377. else if (sep == -1) return '[';
  378. else lexerror(ls, "invalid long string delimiter", TK_STRING);
  379. }
  380. case '=': {
  381. next(ls);
  382. if (ls->current != '=') return '=';
  383. else { next(ls); return TK_EQ; }
  384. }
  385. case '<': {
  386. next(ls);
  387. if (ls->current != '=') return '<';
  388. else { next(ls); return TK_LE; }
  389. }
  390. case '>': {
  391. next(ls);
  392. if (ls->current != '=') return '>';
  393. else { next(ls); return TK_GE; }
  394. }
  395. case '~': {
  396. next(ls);
  397. if (ls->current != '=') return '~';
  398. else { next(ls); return TK_NE; }
  399. }
  400. case ':': {
  401. next(ls);
  402. if (ls->current != ':') return ':';
  403. else { next(ls); return TK_DBCOLON; }
  404. }
  405. case '"': case '\'': { /* short literal strings */
  406. read_string(ls, ls->current, seminfo);
  407. return TK_STRING;
  408. }
  409. case '.': { /* '.', '..', '...', or number */
  410. save_and_next(ls);
  411. if (check_next(ls, ".")) {
  412. if (check_next(ls, "."))
  413. return TK_DOTS; /* '...' */
  414. else return TK_CONCAT; /* '..' */
  415. }
  416. else if (!lisdigit(ls->current)) return '.';
  417. /* else go through */
  418. }
  419. case '0': case '1': case '2': case '3': case '4':
  420. case '5': case '6': case '7': case '8': case '9': {
  421. read_numeral(ls, seminfo);
  422. return TK_NUMBER;
  423. }
  424. case EOZ: {
  425. return TK_EOS;
  426. }
  427. default: {
  428. if (lislalpha(ls->current)) { /* identifier or reserved word? */
  429. TString *ts;
  430. do {
  431. save_and_next(ls);
  432. } while (lislalnum(ls->current));
  433. ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
  434. luaZ_bufflen(ls->buff));
  435. seminfo->ts = ts;
  436. if (isreserved(ts)) /* reserved word? */
  437. return ts->tsv.extra - 1 + FIRST_RESERVED;
  438. else {
  439. return TK_NAME;
  440. }
  441. }
  442. else { /* single-char tokens (+ - / ...) */
  443. int c = ls->current;
  444. next(ls);
  445. return c;
  446. }
  447. }
  448. }
  449. }
  450. }
  451. void luaX_next (LexState *ls) {
  452. ls->lastline = ls->linenumber;
  453. if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
  454. ls->t = ls->lookahead; /* use this one */
  455. ls->lookahead.token = TK_EOS; /* and discharge it */
  456. }
  457. else
  458. ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
  459. }
  460. int luaX_lookahead (LexState *ls) {
  461. lua_assert(ls->lookahead.token == TK_EOS);
  462. ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
  463. return ls->lookahead.token;
  464. }