Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| %{ | |
| /* | |
| * R : A Computer Langage for Statistical Data Analysis | |
| * Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka | |
| * Copyright (C) 1997--2014 The R Core Team | |
| * Copyright (C) 2009--2011 Romain Francois | |
| * | |
| * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | |
| * | |
| * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | |
| * | |
| * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, a copy is available at | |
| * http://www.r-project.org/Licenses/ | |
| */ | |
| #ifdef HAVE_CONFIG_H | |
| #include <config.h> | |
| #endif | |
| #include "IOStuff.h" /*-> Defn.h */ | |
| #include "Fileio.h" | |
| #include "Parse.h" | |
| #include <R_ext/Print.h> | |
| #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__FreeBSD__)) | |
| /* This may not be 100% true (see the comment in rlocale.h), | |
| but it seems true in normal locales */ | |
| # define __STDC_ISO_10646__ | |
| #endif | |
| #define YYERROR_VERBOSE 1 | |
| #define PARSE_ERROR_SIZE 256 /* Parse error messages saved here */ | |
| #define PARSE_CONTEXT_SIZE 256 /* Recent parse context kept in a circular buffer */ | |
| static Rboolean busy = FALSE; | |
| static int identifier ; | |
| static void incrementId(void); | |
| static void initData(void); | |
| static void initId(void); | |
| static void record_( int, int, int, int, int, int, char* ) ; | |
| static void yyerror(const char *); | |
| static int yylex(); | |
| int yyparse(void); | |
| static FILE *fp_parse; | |
| static int (*ptr_getc)(void); | |
| static int SavedToken; | |
| static SEXP SavedLval; | |
| #define yyconst const | |
| typedef struct yyltype | |
| { | |
| int first_line; | |
| int first_column; | |
| int first_byte; | |
| int last_line; | |
| int last_column; | |
| int last_byte; | |
| int first_parsed; | |
| int last_parsed; | |
| int id; | |
| } yyltype; | |
| #define INIT_DATA_COUNT 16384 /* init parser data to this size */ | |
| #define MAX_DATA_COUNT 65536 /* release it at the end if it is this size or larger*/ | |
| #define DATA_COUNT (ParseState.data ? length( ParseState.data ) / DATA_ROWS : 0) | |
| #define ID_COUNT ((ParseState.ids ? length( ParseState.ids ) / 2 : 0) - 1) | |
| static void finalizeData( ) ; | |
| static void growData( ) ; | |
| static void growID( int ) ; | |
| #define DATA_ROWS 8 | |
| #define _FIRST_PARSED( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) ] | |
| #define _FIRST_COLUMN( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 1 ] | |
| #define _LAST_PARSED( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 2 ] | |
| #define _LAST_COLUMN( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 3 ] | |
| #define _TERMINAL( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 4 ] | |
| #define _TOKEN( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 5 ] | |
| #define _ID( i ) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 6 ] | |
| #define _PARENT(i) INTEGER( ParseState.data )[ DATA_ROWS*(i) + 7 ] | |
| #define ID_ID( i ) INTEGER(ParseState.ids)[ 2*(i) ] | |
| #define ID_PARENT( i ) INTEGER(ParseState.ids)[ 2*(i) + 1 ] | |
| static void modif_token( yyltype*, int ) ; | |
| static void recordParents( int, yyltype*, int) ; | |
| static int _current_token ; | |
| /** | |
| * Records an expression (non terminal symbol 'expr') and gives it an id | |
| * | |
| * @param expr expression we want to record and flag with the next id | |
| * @param loc the location of the expression | |
| */ | |
| static void setId( SEXP expr, yyltype loc){ | |
| record_( | |
| (loc).first_parsed, (loc).first_column, (loc).last_parsed, (loc).last_column, | |
| _current_token, (loc).id, 0 ) ; | |
| } | |
| # define YYLTYPE yyltype | |
| # define YYLLOC_DEFAULT(Current, Rhs, N) \ | |
| do { \ | |
| if (N){ \ | |
| (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ | |
| (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ | |
| (Current).first_byte = YYRHSLOC (Rhs, 1).first_byte; \ | |
| (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ | |
| (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ | |
| (Current).last_byte = YYRHSLOC (Rhs, N).last_byte; \ | |
| (Current).first_parsed = YYRHSLOC (Rhs, 1).first_parsed; \ | |
| (Current).last_parsed = YYRHSLOC (Rhs, N).last_parsed; \ | |
| incrementId( ) ; \ | |
| (Current).id = identifier ; \ | |
| _current_token = yyr1[yyn] ; \ | |
| yyltype childs[N] ; \ | |
| int ii = 0; \ | |
| for( ii=0; ii<N; ii++){ \ | |
| childs[ii] = YYRHSLOC (Rhs, (ii+1) ) ; \ | |
| } \ | |
| recordParents( identifier, childs, N) ; \ | |
| } else { \ | |
| (Current).first_line = (Current).last_line = \ | |
| YYRHSLOC (Rhs, 0).last_line; \ | |
| (Current).first_column = YYRHSLOC (Rhs, 0).last_column; \ | |
| (Current).last_column = (Current).first_column - 1; \ | |
| (Current).first_byte = YYRHSLOC (Rhs, 0).last_byte; \ | |
| (Current).last_byte = (Current).first_byte - 1; \ | |
| (Current).id = NA_INTEGER; \ | |
| } \ | |
| } while (0) | |
| # define YY_LOCATION_PRINT(Loc) \ | |
| fprintf ( stderr, "%d.%d.%d-%d.%d.%d (%d)", \ | |
| (Loc).first_line, (Loc).first_column, (Loc).first_byte, \ | |
| (Loc).last_line, (Loc).last_column, (Loc).last_byte, \ | |
| (Loc).id ) | |
| /* Useful defines so editors don't get confused ... */ | |
| #define LBRACE '{' | |
| #define RBRACE '}' | |
| /* Functions used in the parsing process */ | |
| static void CheckFormalArgs(SEXP, SEXP, YYLTYPE *); | |
| static SEXP FirstArg(SEXP, SEXP); | |
| static SEXP GrowList(SEXP, SEXP); | |
| static void IfPush(void); | |
| static int KeywordLookup(const char *); | |
| static SEXP NewList(void); | |
| static SEXP NextArg(SEXP, SEXP, SEXP); | |
| static SEXP TagArg(SEXP, SEXP, YYLTYPE *); | |
| static int processLineDirective(); | |
| /* These routines allocate constants */ | |
| static SEXP mkComplex(const char *); | |
| SEXP mkFalse(void); | |
| static SEXP mkFloat(const char *); | |
| static SEXP mkInt(const char *); | |
| static SEXP mkNA(void); | |
| SEXP mkTrue(void); | |
| /* Internal lexer / parser state variables */ | |
| static int EatLines = 0; | |
| static int GenerateCode = 0; | |
| static int EndOfFile = 0; | |
| static int xxgetc(); | |
| static int xxungetc(int); | |
| static int xxcharcount, xxcharsave; | |
| static int xxlinesave, xxbytesave, xxcolsave, xxparsesave; | |
| static SEXP SrcRefs; | |
| static SrcRefState ParseState; | |
| static PROTECT_INDEX srindex; | |
| #include <rlocale.h> | |
| #ifdef HAVE_LANGINFO_CODESET | |
| # include <langinfo.h> | |
| #endif | |
| static int mbcs_get_next(int c, wchar_t *wc) | |
| { | |
| int i, res, clen = 1; char s[9]; | |
| mbstate_t mb_st; | |
| s[0] = (char) c; | |
| /* This assumes (probably OK) that all MBCS embed ASCII as single-byte | |
| lead bytes, including control chars */ | |
| if((unsigned int) c < 0x80) { | |
| *wc = (wchar_t) c; | |
| return 1; | |
| } | |
| if(utf8locale) { | |
| clen = utf8clen((char) c); | |
| for(i = 1; i < clen; i++) { | |
| s[i] = (char) xxgetc(); | |
| if(s[i] == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); | |
| } | |
| s[clen] ='\0'; /* x86 Solaris requires this */ | |
| res = (int) mbrtowc(wc, s, clen, NULL); | |
| if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); | |
| } else { | |
| /* This is not necessarily correct for stateful MBCS */ | |
| while(clen <= MB_CUR_MAX) { | |
| mbs_init(&mb_st); | |
| res = (int) mbrtowc(wc, s, clen, &mb_st); | |
| if(res >= 0) break; | |
| if(res == -1) | |
| error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); | |
| /* so res == -2 */ | |
| c = xxgetc(); | |
| if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); | |
| s[clen++] = (char) c; | |
| } /* we've tried enough, so must be complete or invalid by now */ | |
| } | |
| for(i = clen - 1; i > 0; i--) xxungetc(s[i]); | |
| return clen; | |
| } | |
| /* Soon to be defunct entry points */ | |
| void R_SetInput(int); | |
| int R_fgetc(FILE*); | |
| static int colon ; | |
| /* Routines used to build the parse tree */ | |
| static SEXP xxnullformal(void); | |
| static SEXP xxfirstformal0(SEXP); | |
| static SEXP xxfirstformal1(SEXP, SEXP); | |
| static SEXP xxaddformal0(SEXP, SEXP, YYLTYPE *); | |
| static SEXP xxaddformal1(SEXP, SEXP, SEXP, YYLTYPE *); | |
| static SEXP xxexprlist0(); | |
| static SEXP xxexprlist1(SEXP, YYLTYPE *); | |
| static SEXP xxexprlist2(SEXP, SEXP, YYLTYPE *); | |
| static SEXP xxsub0(void); | |
| static SEXP xxsub1(SEXP, YYLTYPE *); | |
| static SEXP xxsymsub0(SEXP, YYLTYPE *); | |
| static SEXP xxsymsub1(SEXP, SEXP, YYLTYPE *); | |
| static SEXP xxnullsub0(YYLTYPE *); | |
| static SEXP xxnullsub1(SEXP, YYLTYPE *); | |
| static SEXP xxsublist1(SEXP); | |
| static SEXP xxsublist2(SEXP, SEXP); | |
| static SEXP xxcond(SEXP); | |
| static SEXP xxifcond(SEXP); | |
| static SEXP xxif(SEXP, SEXP, SEXP); | |
| static SEXP xxifelse(SEXP, SEXP, SEXP, SEXP); | |
| static SEXP xxforcond(SEXP, SEXP); | |
| static SEXP xxfor(SEXP, SEXP, SEXP); | |
| static SEXP xxwhile(SEXP, SEXP, SEXP); | |
| static SEXP xxrepeat(SEXP, SEXP); | |
| static SEXP xxnxtbrk(SEXP); | |
| static SEXP xxfuncall(SEXP, SEXP); | |
| static SEXP xxdefun(SEXP, SEXP, SEXP, YYLTYPE *); | |
| static SEXP xxunary(SEXP, SEXP); | |
| static SEXP xxbinary(SEXP, SEXP, SEXP); | |
| static SEXP xxparen(SEXP, SEXP); | |
| static SEXP xxsubscript(SEXP, SEXP, SEXP); | |
| static SEXP xxexprlist(SEXP, YYLTYPE *, SEXP); | |
| static int xxvalue(SEXP, int, YYLTYPE *); | |
| #define YYSTYPE SEXP | |
| %} | |
| %token-table | |
| %token END_OF_INPUT ERROR | |
| %token STR_CONST NUM_CONST NULL_CONST SYMBOL FUNCTION | |
| %token INCOMPLETE_STRING | |
| %token LEFT_ASSIGN EQ_ASSIGN RIGHT_ASSIGN LBB | |
| %token FOR IN IF ELSE WHILE NEXT BREAK REPEAT | |
| %token GT GE LT LE EQ NE AND OR AND2 OR2 | |
| %token NS_GET NS_GET_INT | |
| %token COMMENT LINE_DIRECTIVE | |
| %token SYMBOL_FORMALS | |
| %token EQ_FORMALS | |
| %token EQ_SUB SYMBOL_SUB | |
| %token SYMBOL_FUNCTION_CALL | |
| %token SYMBOL_PACKAGE | |
| %token COLON_ASSIGN | |
| %token SLOT | |
| /* This is the precedence table, low to high */ | |
| %left '?' | |
| %left LOW WHILE FOR REPEAT | |
| %right IF | |
| %left ELSE | |
| %right LEFT_ASSIGN | |
| %right EQ_ASSIGN | |
| %left RIGHT_ASSIGN | |
| %left '~' TILDE | |
| %left OR OR2 | |
| %left AND AND2 | |
| %left UNOT NOT | |
| %nonassoc GT GE LT LE EQ NE | |
| %left '+' '-' | |
| %left '*' '/' | |
| %left SPECIAL | |
| %left ':' | |
| %left UMINUS UPLUS | |
| %right '^' | |
| %left '$' '@' | |
| %left NS_GET NS_GET_INT | |
| %nonassoc '(' '[' LBB | |
| %% | |
| prog : END_OF_INPUT { return 0; } | |
| | '\n' { return xxvalue(NULL,2,NULL); } | |
| | expr_or_assign '\n' { return xxvalue($1,3,&@1); } | |
| | expr_or_assign ';' { return xxvalue($1,4,&@1); } | |
| | error { YYABORT; } | |
| ; | |
| expr_or_assign : expr { $$ = $1; } | |
| | equal_assign { $$ = $1; } | |
| ; | |
| equal_assign : expr EQ_ASSIGN expr_or_assign { $$ = xxbinary($2,$1,$3); } | |
| ; | |
| expr : NUM_CONST { $$ = $1; setId( $$, @$); } | |
| | STR_CONST { $$ = $1; setId( $$, @$); } | |
| | NULL_CONST { $$ = $1; setId( $$, @$); } | |
| | SYMBOL { $$ = $1; setId( $$, @$); } | |
| | '{' exprlist '}' { $$ = xxexprlist($1,&@1,$2); setId( $$, @$); } | |
| | '(' expr_or_assign ')' { $$ = xxparen($1,$2); setId( $$, @$); } | |
| | '-' expr %prec UMINUS { $$ = xxunary($1,$2); setId( $$, @$); } | |
| | '+' expr %prec UMINUS { $$ = xxunary($1,$2); setId( $$, @$); } | |
| | '!' expr %prec UNOT { $$ = xxunary($1,$2); setId( $$, @$); } | |
| | '~' expr %prec TILDE { $$ = xxunary($1,$2); setId( $$, @$); } | |
| | '?' expr { $$ = xxunary($1,$2); setId( $$, @$); } | |
| | expr ':' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '+' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '-' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '*' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '/' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '^' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr SPECIAL expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '%' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '~' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '?' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr LT expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr LE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr EQ expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr NE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr GE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr GT expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr AND expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr OR expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr AND2 expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr OR2 expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr LEFT_ASSIGN expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr RIGHT_ASSIGN expr { $$ = xxbinary($2,$3,$1); setId( $$, @$); } | |
| | FUNCTION '(' formlist ')' cr expr_or_assign %prec LOW | |
| { $$ = xxdefun($1,$3,$6,&@$); setId( $$, @$); } | |
| | expr '(' sublist ')' { $$ = xxfuncall($1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_FUNCTION_CALL ) ; } | |
| | IF ifcond expr_or_assign { $$ = xxif($1,$2,$3); setId( $$, @$); } | |
| | IF ifcond expr_or_assign ELSE expr_or_assign { $$ = xxifelse($1,$2,$3,$5); setId( $$, @$); } | |
| | FOR forcond expr_or_assign %prec FOR { $$ = xxfor($1,$2,$3); setId( $$, @$); } | |
| | WHILE cond expr_or_assign { $$ = xxwhile($1,$2,$3); setId( $$, @$); } | |
| | REPEAT expr_or_assign { $$ = xxrepeat($1,$2); setId( $$, @$); } | |
| | expr LBB sublist ']' ']' { $$ = xxsubscript($1,$2,$3); setId( $$, @$); } | |
| | expr '[' sublist ']' { $$ = xxsubscript($1,$2,$3); setId( $$, @$); } | |
| | SYMBOL NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; } | |
| | SYMBOL NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; } | |
| | STR_CONST NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | STR_CONST NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | SYMBOL NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;} | |
| | SYMBOL NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;} | |
| | STR_CONST NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | STR_CONST NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '$' SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '$' STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | expr '@' SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@3, SLOT ) ; } | |
| | expr '@' STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } | |
| | NEXT { $$ = xxnxtbrk($1); setId( $$, @$); } | |
| | BREAK { $$ = xxnxtbrk($1); setId( $$, @$); } | |
| ; | |
| cond : '(' expr ')' { $$ = xxcond($2); } | |
| ; | |
| ifcond : '(' expr ')' { $$ = xxifcond($2); } | |
| ; | |
| forcond : '(' SYMBOL IN expr ')' { $$ = xxforcond($2,$4); setId( $$, @$); } | |
| ; | |
| exprlist: { $$ = xxexprlist0(); setId( $$, @$); } | |
| | expr_or_assign { $$ = xxexprlist1($1, &@1); } | |
| | exprlist ';' expr_or_assign { $$ = xxexprlist2($1, $3, &@3); } | |
| | exprlist ';' { $$ = $1; setId( $$, @$); } | |
| | exprlist '\n' expr_or_assign { $$ = xxexprlist2($1, $3, &@3); } | |
| | exprlist '\n' { $$ = $1;} | |
| ; | |
| sublist : sub { $$ = xxsublist1($1); } | |
| | sublist cr ',' sub { $$ = xxsublist2($1,$4); } | |
| ; | |
| sub : { $$ = xxsub0(); } | |
| | expr { $$ = xxsub1($1, &@1); } | |
| | SYMBOL EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; } | |
| | SYMBOL EQ_ASSIGN expr { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; } | |
| | STR_CONST EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; } | |
| | STR_CONST EQ_ASSIGN expr { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; } | |
| | NULL_CONST EQ_ASSIGN { $$ = xxnullsub0(&@1); modif_token( &@2, EQ_SUB ) ; } | |
| | NULL_CONST EQ_ASSIGN expr { $$ = xxnullsub1($3, &@1); modif_token( &@2, EQ_SUB ) ; } | |
| ; | |
| formlist: { $$ = xxnullformal(); } | |
| | SYMBOL { $$ = xxfirstformal0($1); modif_token( &@1, SYMBOL_FORMALS ) ; } | |
| | SYMBOL EQ_ASSIGN expr { $$ = xxfirstformal1($1,$3); modif_token( &@1, SYMBOL_FORMALS ) ; modif_token( &@2, EQ_FORMALS ) ; } | |
| | formlist ',' SYMBOL { $$ = xxaddformal0($1,$3, &@3); modif_token( &@3, SYMBOL_FORMALS ) ; } | |
| | formlist ',' SYMBOL EQ_ASSIGN expr | |
| { $$ = xxaddformal1($1,$3,$5,&@3); modif_token( &@3, SYMBOL_FORMALS ) ; modif_token( &@4, EQ_FORMALS ) ;} | |
| ; | |
| cr : { EatLines = 1; } | |
| ; | |
| %% | |
| /*----------------------------------------------------------------------------*/ | |
| static int (*ptr_getc)(void); | |
| /* Private pushback, since file ungetc only guarantees one byte. | |
| We need up to one MBCS-worth */ | |
| #define DECLARE_YYTEXT_BUFP(bp) char *bp = yytext ; | |
| #define YYTEXT_PUSH(c, bp) do { \ | |
| if ((bp) - yytext >= sizeof(yytext) - 1){ \ | |
| error(_("input buffer overflow at line %d"), ParseState.xxlineno); \ | |
| } \ | |
| *(bp)++ = ((char)c); \ | |
| } while(0) ; | |
| #define PUSHBACK_BUFSIZE 16 | |
| static int pushback[PUSHBACK_BUFSIZE]; | |
| static unsigned int npush = 0; | |
| static int prevpos = 0; | |
| static int prevlines[PUSHBACK_BUFSIZE]; | |
| static int prevcols[PUSHBACK_BUFSIZE]; | |
| static int prevbytes[PUSHBACK_BUFSIZE]; | |
| static int prevparse[PUSHBACK_BUFSIZE]; | |
| static int xxgetc(void) | |
| { | |
| int c, oldpos; | |
| if(npush) c = pushback[--npush]; else c = ptr_getc(); | |
| oldpos = prevpos; | |
| prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE; | |
| prevbytes[prevpos] = ParseState.xxbyteno; | |
| prevlines[prevpos] = ParseState.xxlineno; | |
| prevparse[prevpos] = ParseState.xxparseno; | |
| /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */ | |
| if (0x80 <= (unsigned char)c && (unsigned char)c <= 0xBF && known_to_be_utf8) { | |
| ParseState.xxcolno--; | |
| prevcols[prevpos] = prevcols[oldpos]; | |
| } else | |
| prevcols[prevpos] = ParseState.xxcolno; | |
| if (c == EOF) { | |
| EndOfFile = 1; | |
| return R_EOF; | |
| } | |
| R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE; | |
| R_ParseContext[R_ParseContextLast] = (char) c; | |
| if (c == '\n') { | |
| ParseState.xxlineno += 1; | |
| ParseState.xxcolno = 0; | |
| ParseState.xxbyteno = 0; | |
| ParseState.xxparseno += 1; | |
| } else { | |
| ParseState.xxcolno++; | |
| ParseState.xxbyteno++; | |
| } | |
| if (c == '\t') ParseState.xxcolno = ((ParseState.xxcolno + 7) & ~7); | |
| R_ParseContextLine = ParseState.xxlineno; | |
| xxcharcount++; | |
| return c; | |
| } | |
| static int xxungetc(int c) | |
| { | |
| /* this assumes that c was the result of xxgetc; if not, some edits will be needed */ | |
| ParseState.xxlineno = prevlines[prevpos]; | |
| ParseState.xxbyteno = prevbytes[prevpos]; | |
| ParseState.xxcolno = prevcols[prevpos]; | |
| ParseState.xxparseno = prevparse[prevpos]; | |
| prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE; | |
| R_ParseContextLine = ParseState.xxlineno; | |
| xxcharcount--; | |
| R_ParseContext[R_ParseContextLast] = '\0'; | |
| /* precaution as to how % is implemented for < 0 numbers */ | |
| R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE -1) % PARSE_CONTEXT_SIZE; | |
| if(npush >= PUSHBACK_BUFSIZE) return EOF; | |
| pushback[npush++] = c; | |
| return c; | |
| } | |
| /* | |
| * Increments/inits the token/grouping counter | |
| */ | |
| static void incrementId(void){ | |
| identifier++; | |
| } | |
| static void initId(void){ | |
| identifier = 0 ; | |
| } | |
| static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile) | |
| { | |
| SEXP val; | |
| PROTECT(val = allocVector(INTSXP, 8)); | |
| INTEGER(val)[0] = lloc->first_line; | |
| INTEGER(val)[1] = lloc->first_byte; | |
| INTEGER(val)[2] = lloc->last_line; | |
| INTEGER(val)[3] = lloc->last_byte; | |
| INTEGER(val)[4] = lloc->first_column; | |
| INTEGER(val)[5] = lloc->last_column; | |
| INTEGER(val)[6] = lloc->first_parsed; | |
| INTEGER(val)[7] = lloc->last_parsed; | |
| setAttrib(val, R_SrcfileSymbol, srcfile); | |
| setAttrib(val, R_ClassSymbol, mkString("srcref")); | |
| UNPROTECT(1); | |
| return val; | |
| } | |
| static SEXP attachSrcrefs(SEXP val) | |
| { | |
| SEXP srval; | |
| PROTECT(val); | |
| PROTECT(srval = PairToVectorList(SrcRefs)); | |
| setAttrib(val, R_SrcrefSymbol, srval); | |
| setAttrib(val, R_SrcfileSymbol, ParseState.SrcFile); | |
| { | |
| YYLTYPE wholeFile; | |
| wholeFile.first_line = 1; | |
| wholeFile.first_byte = 0; | |
| wholeFile.first_column = 0; | |
| wholeFile.last_line = ParseState.xxlineno; | |
| wholeFile.last_byte = ParseState.xxbyteno; | |
| wholeFile.last_column = ParseState.xxcolno; | |
| wholeFile.first_parsed = 1; | |
| wholeFile.last_parsed = ParseState.xxparseno; | |
| setAttrib(val, R_WholeSrcrefSymbol, makeSrcref(&wholeFile, ParseState.SrcFile)); | |
| } | |
| REPROTECT(SrcRefs = R_NilValue, srindex); | |
| ParseState.didAttach = TRUE; | |
| UNPROTECT(2); | |
| return val; | |
| } | |
| static int xxvalue(SEXP v, int k, YYLTYPE *lloc) | |
| { | |
| if (k > 2) { | |
| if (ParseState.keepSrcRefs) | |
| REPROTECT(SrcRefs = listAppend(SrcRefs, list1(makeSrcref(lloc, ParseState.SrcFile))), srindex); | |
| UNPROTECT_PTR(v); | |
| } | |
| R_CurrentExpr = v; | |
| return k; | |
| } | |
| static SEXP xxnullformal() | |
| { | |
| SEXP ans; | |
| PROTECT(ans = R_NilValue); | |
| return ans; | |
| } | |
| static SEXP xxfirstformal0(SEXP sym) | |
| { | |
| SEXP ans; | |
| UNPROTECT_PTR(sym); | |
| if (GenerateCode) | |
| PROTECT(ans = FirstArg(R_MissingArg, sym)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| return ans; | |
| } | |
| static SEXP xxfirstformal1(SEXP sym, SEXP expr) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = FirstArg(expr, sym)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(sym); | |
| return ans; | |
| } | |
| static SEXP xxaddformal0(SEXP formlist, SEXP sym, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) { | |
| CheckFormalArgs(formlist, sym, lloc); | |
| PROTECT(ans = NextArg(formlist, R_MissingArg, sym)); | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(sym); | |
| UNPROTECT_PTR(formlist); | |
| return ans; | |
| } | |
| static SEXP xxaddformal1(SEXP formlist, SEXP sym, SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) { | |
| CheckFormalArgs(formlist, sym, lloc); | |
| PROTECT(ans = NextArg(formlist, expr, sym)); | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(sym); | |
| UNPROTECT_PTR(formlist); | |
| return ans; | |
| } | |
| static SEXP xxexprlist0(void) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) { | |
| PROTECT(ans = NewList()); | |
| if (ParseState.keepSrcRefs) { | |
| setAttrib(ans, R_SrcrefSymbol, SrcRefs); | |
| REPROTECT(SrcRefs = R_NilValue, srindex); | |
| } | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| return ans; | |
| } | |
| static SEXP xxexprlist1(SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans,tmp; | |
| if (GenerateCode) { | |
| PROTECT(tmp = NewList()); | |
| if (ParseState.keepSrcRefs) { | |
| setAttrib(tmp, R_SrcrefSymbol, SrcRefs); | |
| REPROTECT(SrcRefs = list1(makeSrcref(lloc, ParseState.SrcFile)), srindex); | |
| } | |
| PROTECT(ans = GrowList(tmp, expr)); | |
| UNPROTECT_PTR(tmp); | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| return ans; | |
| } | |
| static SEXP xxexprlist2(SEXP exprlist, SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) { | |
| if (ParseState.keepSrcRefs) | |
| REPROTECT(SrcRefs = listAppend(SrcRefs, list1(makeSrcref(lloc, ParseState.SrcFile))), srindex); | |
| PROTECT(ans = GrowList(exprlist, expr)); | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(exprlist); | |
| return ans; | |
| } | |
| static SEXP xxsub0(void) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang2(R_MissingArg,R_NilValue)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| return ans; | |
| } | |
| static SEXP xxsub1(SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = TagArg(expr, R_NilValue, lloc)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| return ans; | |
| } | |
| static SEXP xxsymsub0(SEXP sym, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = TagArg(R_MissingArg, sym, lloc)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(sym); | |
| return ans; | |
| } | |
| static SEXP xxsymsub1(SEXP sym, SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = TagArg(expr, sym, lloc)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(sym); | |
| return ans; | |
| } | |
| static SEXP xxnullsub0(YYLTYPE *lloc) | |
| { | |
| SEXP ans; | |
| UNPROTECT_PTR(R_NilValue); | |
| if (GenerateCode) | |
| PROTECT(ans = TagArg(R_MissingArg, install("NULL"), lloc)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| return ans; | |
| } | |
| static SEXP xxnullsub1(SEXP expr, YYLTYPE *lloc) | |
| { | |
| SEXP ans = install("NULL"); | |
| UNPROTECT_PTR(R_NilValue); | |
| if (GenerateCode) | |
| PROTECT(ans = TagArg(expr, ans, lloc)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| return ans; | |
| } | |
| static SEXP xxsublist1(SEXP sub) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = FirstArg(CAR(sub),CADR(sub))); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(sub); | |
| return ans; | |
| } | |
| static SEXP xxsublist2(SEXP sublist, SEXP sub) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = NextArg(sublist, CAR(sub), CADR(sub))); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(sub); | |
| UNPROTECT_PTR(sublist); | |
| return ans; | |
| } | |
| static SEXP xxcond(SEXP expr) | |
| { | |
| EatLines = 1; | |
| return expr; | |
| } | |
| static SEXP xxifcond(SEXP expr) | |
| { | |
| EatLines = 1; | |
| return expr; | |
| } | |
| static SEXP xxif(SEXP ifsym, SEXP cond, SEXP expr) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang3(ifsym, cond, expr)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(cond); | |
| return ans; | |
| } | |
| static SEXP xxifelse(SEXP ifsym, SEXP cond, SEXP ifexpr, SEXP elseexpr) | |
| { | |
| SEXP ans; | |
| if( GenerateCode) | |
| PROTECT(ans = lang4(ifsym, cond, ifexpr, elseexpr)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(elseexpr); | |
| UNPROTECT_PTR(ifexpr); | |
| UNPROTECT_PTR(cond); | |
| return ans; | |
| } | |
| static SEXP xxforcond(SEXP sym, SEXP expr) | |
| { | |
| SEXP ans; | |
| EatLines = 1; | |
| if (GenerateCode) | |
| PROTECT(ans = LCONS(sym, expr)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(expr); | |
| UNPROTECT_PTR(sym); | |
| return ans; | |
| } | |
| static SEXP xxfor(SEXP forsym, SEXP forcond, SEXP body) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang4(forsym, CAR(forcond), CDR(forcond), body)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(body); | |
| UNPROTECT_PTR(forcond); | |
| return ans; | |
| } | |
| static SEXP xxwhile(SEXP whilesym, SEXP cond, SEXP body) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang3(whilesym, cond, body)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(body); | |
| UNPROTECT_PTR(cond); | |
| return ans; | |
| } | |
| static SEXP xxrepeat(SEXP repeatsym, SEXP body) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang2(repeatsym, body)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(body); | |
| return ans; | |
| } | |
| static SEXP xxnxtbrk(SEXP keyword) | |
| { | |
| if (GenerateCode) | |
| PROTECT(keyword = lang1(keyword)); | |
| else | |
| PROTECT(keyword = R_NilValue); | |
| return keyword; | |
| } | |
| static SEXP xxfuncall(SEXP expr, SEXP args) | |
| { | |
| SEXP ans, sav_expr = expr; | |
| if(GenerateCode) { | |
| if (isString(expr)) | |
| expr = installChar(STRING_ELT(expr, 0)); | |
| PROTECT(expr); | |
| if (length(CDR(args)) == 1 && CADR(args) == R_MissingArg && TAG(CDR(args)) == R_NilValue ) | |
| ans = lang1(expr); | |
| else | |
| ans = LCONS(expr, CDR(args)); | |
| UNPROTECT(1); | |
| PROTECT(ans); | |
| } | |
| else { | |
| PROTECT(ans = R_NilValue); | |
| } | |
| UNPROTECT_PTR(args); | |
| UNPROTECT_PTR(sav_expr); | |
| return ans; | |
| } | |
| static SEXP mkString2(const char *s, size_t len, Rboolean escaped) | |
| { | |
| SEXP t; | |
| cetype_t enc = CE_NATIVE; | |
| if(known_to_be_latin1) enc= CE_LATIN1; | |
| else if(!escaped && known_to_be_utf8) enc = CE_UTF8; | |
| PROTECT(t = allocVector(STRSXP, 1)); | |
| SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc)); | |
| UNPROTECT(1); | |
| return t; | |
| } | |
| static SEXP xxdefun(SEXP fname, SEXP formals, SEXP body, YYLTYPE *lloc) | |
| { | |
| SEXP ans, srcref; | |
| if (GenerateCode) { | |
| if (ParseState.keepSrcRefs) { | |
| srcref = makeSrcref(lloc, ParseState.SrcFile); | |
| ParseState.didAttach = TRUE; | |
| } else | |
| srcref = R_NilValue; | |
| PROTECT(ans = lang4(fname, CDR(formals), body, srcref)); | |
| } else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(body); | |
| UNPROTECT_PTR(formals); | |
| return ans; | |
| } | |
| static SEXP xxunary(SEXP op, SEXP arg) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang2(op, arg)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(arg); | |
| return ans; | |
| } | |
| static SEXP xxbinary(SEXP n1, SEXP n2, SEXP n3) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang3(n1, n2, n3)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(n2); | |
| UNPROTECT_PTR(n3); | |
| return ans; | |
| } | |
| static SEXP xxparen(SEXP n1, SEXP n2) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = lang2(n1, n2)); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(n2); | |
| return ans; | |
| } | |
| /* This should probably use CONS rather than LCONS, but | |
| it shouldn't matter and we would rather not meddle | |
| See PR#7055 */ | |
| static SEXP xxsubscript(SEXP a1, SEXP a2, SEXP a3) | |
| { | |
| SEXP ans; | |
| if (GenerateCode) | |
| PROTECT(ans = LCONS(a2, CONS(a1, CDR(a3)))); | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(a3); | |
| UNPROTECT_PTR(a1); | |
| return ans; | |
| } | |
| static SEXP xxexprlist(SEXP a1, YYLTYPE *lloc, SEXP a2) | |
| { | |
| SEXP ans; | |
| SEXP prevSrcrefs; | |
| EatLines = 0; | |
| if (GenerateCode) { | |
| SET_TYPEOF(a2, LANGSXP); | |
| SETCAR(a2, a1); | |
| if (ParseState.keepSrcRefs) { | |
| PROTECT(prevSrcrefs = getAttrib(a2, R_SrcrefSymbol)); | |
| REPROTECT(SrcRefs = CONS(makeSrcref(lloc, ParseState.SrcFile), SrcRefs), srindex); | |
| PROTECT(ans = attachSrcrefs(a2)); | |
| REPROTECT(SrcRefs = prevSrcrefs, srindex); | |
| /* SrcRefs got NAMED by being an attribute... */ | |
| SET_NAMED(SrcRefs, 0); | |
| UNPROTECT_PTR(prevSrcrefs); | |
| } | |
| else | |
| PROTECT(ans = a2); | |
| } | |
| else | |
| PROTECT(ans = R_NilValue); | |
| UNPROTECT_PTR(a2); | |
| return ans; | |
| } | |
| /*--------------------------------------------------------------------------*/ | |
| static SEXP TagArg(SEXP arg, SEXP tag, YYLTYPE *lloc) | |
| { | |
| switch (TYPEOF(tag)) { | |
| case STRSXP: | |
| tag = installTrChar(STRING_ELT(tag, 0)); | |
| case NILSXP: | |
| case SYMSXP: | |
| return lang2(arg, tag); | |
| default: | |
| error(_("incorrect tag type at line %d"), lloc->first_line); return R_NilValue/* -Wall */; | |
| } | |
| } | |
| /* Stretchy List Structures : Lists are created and grown using a special */ | |
| /* dotted pair. The CAR of the list points to the last cons-cell in the */ | |
| /* list and the CDR points to the first. The list can be extracted from */ | |
| /* the pair by taking its CDR, while the CAR gives fast access to the end */ | |
| /* of the list. */ | |
| /* Create a stretchy-list dotted pair */ | |
| static SEXP NewList(void) | |
| { | |
| SEXP s = CONS(R_NilValue, R_NilValue); | |
| SETCAR(s, s); | |
| return s; | |
| } | |
| /* Add a new element at the end of a stretchy list */ | |
| static SEXP GrowList(SEXP l, SEXP s) | |
| { | |
| SEXP tmp; | |
| PROTECT(s); | |
| tmp = CONS(s, R_NilValue); | |
| UNPROTECT(1); | |
| SETCDR(CAR(l), tmp); | |
| SETCAR(l, tmp); | |
| return l; | |
| } | |
| static SEXP FirstArg(SEXP s, SEXP tag) | |
| { | |
| SEXP tmp; | |
| PROTECT(s); | |
| PROTECT(tag); | |
| PROTECT(tmp = NewList()); | |
| tmp = GrowList(tmp, s); | |
| SET_TAG(CAR(tmp), tag); | |
| UNPROTECT(3); | |
| return tmp; | |
| } | |
| static SEXP NextArg(SEXP l, SEXP s, SEXP tag) | |
| { | |
| PROTECT(tag); | |
| PROTECT(l); | |
| l = GrowList(l, s); | |
| SET_TAG(CAR(l), tag); | |
| UNPROTECT(2); | |
| return l; | |
| } | |
| /*--------------------------------------------------------------------------*/ | |
| /* | |
| * Parsing Entry Points: | |
| * | |
| * The Following entry points provide language parsing facilities. | |
| * Note that there are separate entry points for parsing IoBuffers | |
| * (i.e. interactve use), files and R character strings. | |
| * | |
| * The entry points provide the same functionality, they just | |
| * set things up in slightly different ways. | |
| * | |
| * The following routines parse a single expression: | |
| * | |
| * | |
| * SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status, Rboolean first) | |
| * (used for R_ReplFile in main.c) | |
| * | |
| * SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status, Rboolean first) | |
| * (used for ReplIteration and R_ReplDLLdo1 in main.c) | |
| * | |
| * The success of the parse is indicated as folllows: | |
| * | |
| * | |
| * status = PARSE_NULL - there was no statement to parse | |
| * PARSE_OK - complete statement | |
| * PARSE_INCOMPLETE - incomplete statement | |
| * PARSE_ERROR - syntax error | |
| * PARSE_EOF - end of file | |
| * | |
| * | |
| * The following routines parse several expressions and return | |
| * their values in a single expression vector. | |
| * | |
| * SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile) | |
| * (used for do_edit in file edit.c) | |
| * | |
| * SEXP R_ParseVector(SEXP *text, int n, ParseStatus *status, SEXP srcfile) | |
| * (public, and used by parse(text=) in file source.c) | |
| * | |
| * SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, SEXP srcfile) | |
| * (used by parse(file="") in file source.c) | |
| * | |
| * SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile) | |
| * (used by parse(file=) in file source.c) | |
| * | |
| * Here, status is 1 for a successful parse and 0 if parsing failed | |
| * for some reason. | |
| */ | |
| #define CONTEXTSTACK_SIZE 50 | |
| static int SavedToken; | |
| static SEXP SavedLval; | |
| static char contextstack[CONTEXTSTACK_SIZE], *contextp; | |
| static void PutSrcRefState(SrcRefState *state); | |
| static void UseSrcRefState(SrcRefState *state); | |
| /* This is called once when R starts up. */ | |
| attribute_hidden | |
| void InitParser(void) | |
| { | |
| ParseState.data = NULL; | |
| ParseState.ids = NULL; | |
| } | |
| /* This is called each time a new parse sequence begins */ | |
| attribute_hidden | |
| void R_InitSrcRefState(void) | |
| { | |
| if (busy) { | |
| SrcRefState *prev = malloc(sizeof(SrcRefState)); | |
| PutSrcRefState(prev); | |
| ParseState.prevState = prev; | |
| ParseState.data = NULL; | |
| ParseState.ids = NULL; | |
| } else | |
| ParseState.prevState = NULL; | |
| ParseState.keepSrcRefs = FALSE; | |
| ParseState.didAttach = FALSE; | |
| PROTECT_WITH_INDEX(ParseState.SrcFile = R_NilValue, &(ParseState.SrcFileProt)); | |
| PROTECT_WITH_INDEX(ParseState.Original = R_NilValue, &(ParseState.OriginalProt)); | |
| ParseState.data_count = 0; | |
| ParseState.xxlineno = 1; | |
| ParseState.xxcolno = 0; | |
| ParseState.xxbyteno = 0; | |
| ParseState.xxparseno = 1; | |
| busy = TRUE; | |
| } | |
| attribute_hidden | |
| void R_FinalizeSrcRefState(void) | |
| { | |
| UNPROTECT_PTR(ParseState.SrcFile); | |
| UNPROTECT_PTR(ParseState.Original); | |
| /* Free the data, text and ids if we are restoring a previous state, | |
| or if they have grown too large */ | |
| if (ParseState.data) { | |
| if (ParseState.prevState || DATA_COUNT > MAX_DATA_COUNT) { | |
| R_ReleaseObject(ParseState.data); | |
| R_ReleaseObject(ParseState.text); | |
| ParseState.data = NULL; | |
| } else /* Remove all the strings from the text vector so they don't take up memory, and clean up data */ | |
| for (int i=0; i < ParseState.data_count; i++) { | |
| SET_STRING_ELT(ParseState.text, i, R_BlankString); | |
| _PARENT(i) = 0; | |
| } | |
| } | |
| if (ParseState.ids) { | |
| if (ParseState.prevState || ID_COUNT > MAX_DATA_COUNT) { | |
| R_ReleaseObject(ParseState.ids); | |
| ParseState.ids = NULL; | |
| } else {/* Remove the parent records */ | |
| if (identifier > ID_COUNT) identifier = ID_COUNT; | |
| for (int i=0; i < identifier; i++) | |
| ID_PARENT(i) = 0; | |
| } | |
| } | |
| ParseState.SrcFileProt = NA_INTEGER; | |
| ParseState.OriginalProt = NA_INTEGER; | |
| ParseState.data_count = NA_INTEGER; | |
| if (ParseState.prevState) { | |
| SrcRefState *prev = ParseState.prevState; | |
| UseSrcRefState(prev); | |
| free(prev); | |
| } else | |
| busy = FALSE; | |
| } | |
| static void UseSrcRefState(SrcRefState *state) | |
| { | |
| ParseState.keepSrcRefs = state->keepSrcRefs; | |
| ParseState.SrcFile = state->SrcFile; | |
| ParseState.Original = state->Original; | |
| ParseState.SrcFileProt = state->SrcFileProt; | |
| ParseState.OriginalProt = state->OriginalProt; | |
| ParseState.data = state->data; | |
| ParseState.text = state->text; | |
| ParseState.ids = state->ids; | |
| ParseState.data_count = state->data_count; | |
| ParseState.xxlineno = state->xxlineno; | |
| ParseState.xxcolno = state->xxcolno; | |
| ParseState.xxbyteno = state->xxbyteno; | |
| ParseState.xxparseno = state->xxparseno; | |
| ParseState.prevState = state->prevState; | |
| busy = TRUE; | |
| } | |
| static void PutSrcRefState(SrcRefState *state) | |
| { | |
| if (state) { | |
| state->keepSrcRefs = ParseState.keepSrcRefs; | |
| state->SrcFile = ParseState.SrcFile; | |
| state->Original = ParseState.Original; | |
| state->SrcFileProt = ParseState.SrcFileProt; | |
| state->OriginalProt = ParseState.OriginalProt; | |
| state->data = ParseState.data; | |
| state->text = ParseState.text; | |
| state->ids = ParseState.ids; | |
| state->data_count = ParseState.data_count; | |
| state->xxlineno = ParseState.xxlineno; | |
| state->xxcolno = ParseState.xxcolno; | |
| state->xxbyteno = ParseState.xxbyteno; | |
| state->xxparseno = ParseState.xxparseno; | |
| state->prevState = ParseState.prevState; | |
| } else | |
| R_FinalizeSrcRefState(); | |
| } | |
| static void ParseInit(void) | |
| { | |
| contextp = contextstack; | |
| *contextp = ' '; | |
| SavedToken = 0; | |
| SavedLval = R_NilValue; | |
| EatLines = 0; | |
| EndOfFile = 0; | |
| xxcharcount = 0; | |
| npush = 0; | |
| } | |
| static void initData(void) | |
| { | |
| ParseState.data_count = 0 ; | |
| for (int i = 1; i <= ID_COUNT; i++) | |
| ID_ID( i ) = 0; | |
| } | |
| static void ParseContextInit(void) | |
| { | |
| R_ParseContextLast = 0; | |
| R_ParseContext[0] = '\0'; | |
| colon = 0 ; | |
| /* starts the identifier counter*/ | |
| initId(); | |
| initData(); | |
| } | |
| static SEXP R_Parse1(ParseStatus *status) | |
| { | |
| switch(yyparse()) { | |
| case 0: /* End of file */ | |
| *status = PARSE_EOF; | |
| if (EndOfFile == 2) *status = PARSE_INCOMPLETE; | |
| break; | |
| case 1: /* Syntax error / incomplete */ | |
| *status = PARSE_ERROR; | |
| if (EndOfFile) *status = PARSE_INCOMPLETE; | |
| break; | |
| case 2: /* Empty Line */ | |
| *status = PARSE_NULL; | |
| break; | |
| case 3: /* Valid expr '\n' terminated */ | |
| case 4: /* Valid expr ';' terminated */ | |
| *status = PARSE_OK; | |
| break; | |
| } | |
| return R_CurrentExpr; | |
| } | |
| static FILE *fp_parse; | |
| static int file_getc(void) | |
| { | |
| return R_fgetc(fp_parse); | |
| } | |
| /* used in main.c */ | |
| attribute_hidden | |
| SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status) | |
| { | |
| int savestack; | |
| savestack = R_PPStackTop; | |
| ParseInit(); | |
| ParseContextInit(); | |
| GenerateCode = gencode; | |
| fp_parse = fp; | |
| ptr_getc = file_getc; | |
| R_Parse1(status); | |
| R_PPStackTop = savestack; | |
| return R_CurrentExpr; | |
| } | |
| static IoBuffer *iob; | |
| static int buffer_getc(void) | |
| { | |
| return R_IoBufferGetc(iob); | |
| } | |
| /* Used only in main.c */ | |
| attribute_hidden | |
| SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status) | |
| { | |
| Rboolean keepSource = FALSE; | |
| int savestack; | |
| R_InitSrcRefState(); | |
| savestack = R_PPStackTop; | |
| if (gencode) { | |
| keepSource = asLogical(GetOption1(install("keep.source"))); | |
| if (keepSource) { | |
| ParseState.keepSrcRefs = TRUE; | |
| REPROTECT(ParseState.SrcFile = NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv), ParseState.SrcFileProt); | |
| REPROTECT(ParseState.Original = ParseState.SrcFile, ParseState.OriginalProt); | |
| PROTECT_WITH_INDEX(SrcRefs = R_NilValue, &srindex); | |
| } | |
| } | |
| ParseInit(); | |
| ParseContextInit(); | |
| GenerateCode = gencode; | |
| iob = buffer; | |
| ptr_getc = buffer_getc; | |
| R_Parse1(status); | |
| if (gencode && keepSource) { | |
| if (ParseState.didAttach) { | |
| int buflen = R_IoBufferReadOffset(buffer); | |
| char buf[buflen+1]; | |
| SEXP class; | |
| R_IoBufferReadReset(buffer); | |
| for (int i=0; i<buflen; i++) | |
| buf[i] = (char) R_IoBufferGetc(buffer); | |
| buf[buflen] = 0; | |
| defineVar(install("filename"), ScalarString(mkChar("")), ParseState.Original); | |
| defineVar(install("lines"), ScalarString(mkChar(buf)), ParseState.Original); | |
| PROTECT(class = allocVector(STRSXP, 2)); | |
| SET_STRING_ELT(class, 0, mkChar("srcfilecopy")); | |
| SET_STRING_ELT(class, 1, mkChar("srcfile")); | |
| setAttrib(ParseState.Original, R_ClassSymbol, class); | |
| UNPROTECT(1); | |
| } | |
| } | |
| R_PPStackTop = savestack; | |
| R_FinalizeSrcRefState(); | |
| return R_CurrentExpr; | |
| } | |
| static TextBuffer *txtb; | |
| static int text_getc(void) | |
| { | |
| return R_TextBufferGetc(txtb); | |
| } | |
| static SEXP R_Parse(int n, ParseStatus *status, SEXP srcfile) | |
| { | |
| int savestack; | |
| int i; | |
| SEXP t, rval; | |
| R_InitSrcRefState(); | |
| savestack = R_PPStackTop; | |
| ParseContextInit(); | |
| PROTECT(t = NewList()); | |
| REPROTECT(ParseState.SrcFile = srcfile, ParseState.SrcFileProt); | |
| REPROTECT(ParseState.Original = srcfile, ParseState.OriginalProt); | |
| if (isEnvironment(ParseState.SrcFile)) { | |
| ParseState.keepSrcRefs = TRUE; | |
| PROTECT_WITH_INDEX(SrcRefs = R_NilValue, &srindex); | |
| } | |
| for(i = 0; ; ) { | |
| if(n >= 0 && i >= n) break; | |
| ParseInit(); | |
| rval = R_Parse1(status); | |
| switch(*status) { | |
| case PARSE_NULL: | |
| break; | |
| case PARSE_OK: | |
| t = GrowList(t, rval); | |
| i++; | |
| break; | |
| case PARSE_INCOMPLETE: | |
| case PARSE_ERROR: | |
| if (ParseState.keepSrcRefs) | |
| finalizeData(); | |
| R_PPStackTop = savestack; | |
| R_FinalizeSrcRefState(); | |
| return R_NilValue; | |
| break; | |
| case PARSE_EOF: | |
| goto finish; | |
| break; | |
| } | |
| } | |
| finish: | |
| t = CDR(t); | |
| PROTECT(rval = allocVector(EXPRSXP, length(t))); | |
| for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t)) | |
| SET_VECTOR_ELT(rval, n, CAR(t)); | |
| if (ParseState.keepSrcRefs) { | |
| finalizeData(); | |
| rval = attachSrcrefs(rval); | |
| } | |
| R_PPStackTop = savestack; /* UNPROTECT lots! */ | |
| R_FinalizeSrcRefState(); | |
| *status = PARSE_OK; | |
| return rval; | |
| } | |
| /* used in edit.c */ | |
| attribute_hidden | |
| SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile) | |
| { | |
| GenerateCode = 1; | |
| fp_parse = fp; | |
| ptr_getc = file_getc; | |
| return R_Parse(n, status, srcfile); | |
| } | |
| #include "Rconnections.h" | |
| static Rconnection con_parse; | |
| /* need to handle incomplete last line */ | |
| static int con_getc(void) | |
| { | |
| int c; | |
| static int last=-1000; | |
| c = Rconn_fgetc(con_parse); | |
| if (c == EOF && last != '\n') c = '\n'; | |
| return (last = c); | |
| } | |
| /* used in source.c */ | |
| attribute_hidden | |
| SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile) | |
| { | |
| GenerateCode = 1; | |
| con_parse = con; | |
| ptr_getc = con_getc; | |
| return R_Parse(n, status, srcfile); | |
| } | |
| /* This one is public, and used in source.c */ | |
| SEXP R_ParseVector(SEXP text, int n, ParseStatus *status, SEXP srcfile) | |
| { | |
| SEXP rval; | |
| TextBuffer textb; | |
| R_TextBufferInit(&textb, text); | |
| txtb = &textb; | |
| GenerateCode = 1; | |
| ptr_getc = text_getc; | |
| rval = R_Parse(n, status, srcfile); | |
| R_TextBufferFree(&textb); | |
| return rval; | |
| } | |
| static const char *Prompt(SEXP prompt, int type) | |
| { | |
| if(type == 1) { | |
| if(length(prompt) <= 0) { | |
| return CHAR(STRING_ELT(GetOption1(install("prompt")), 0)); | |
| } | |
| else | |
| return CHAR(STRING_ELT(prompt, 0)); | |
| } | |
| else { | |
| return CHAR(STRING_ELT(GetOption1(install("continue")), 0)); | |
| } | |
| } | |
| /* used in source.c */ | |
| attribute_hidden | |
| SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, | |
| SEXP srcfile) | |
| { | |
| SEXP rval, t; | |
| char *bufp, buf[CONSOLE_BUFFER_SIZE]; | |
| int c, i, prompt_type = 1; | |
| int savestack; | |
| R_IoBufferWriteReset(buffer); | |
| buf[0] = '\0'; | |
| bufp = buf; | |
| R_InitSrcRefState(); | |
| savestack = R_PPStackTop; | |
| PROTECT(t = NewList()); | |
| GenerateCode = 1; | |
| iob = buffer; | |
| ptr_getc = buffer_getc; | |
| REPROTECT(ParseState.SrcFile = srcfile, ParseState.SrcFileProt); | |
| REPROTECT(ParseState.Original = srcfile, ParseState.OriginalProt); | |
| if (isEnvironment(ParseState.SrcFile)) { | |
| ParseState.keepSrcRefs = TRUE; | |
| PROTECT_WITH_INDEX(SrcRefs = R_NilValue, &srindex); | |
| } | |
| for(i = 0; ; ) { | |
| if(n >= 0 && i >= n) break; | |
| if (!*bufp) { | |
| if(R_ReadConsole((char *) Prompt(prompt, prompt_type), | |
| (unsigned char *)buf, CONSOLE_BUFFER_SIZE, 1) == 0) | |
| goto finish; | |
| bufp = buf; | |
| } | |
| while ((c = *bufp++)) { | |
| R_IoBufferPutc(c, buffer); | |
| if (c == ';' || c == '\n') break; | |
| } | |
| /* Was a call to R_Parse1Buffer, but we don't want to reset | |
| xxlineno and xxcolno */ | |
| ParseInit(); | |
| ParseContextInit(); | |
| R_Parse1(status); | |
| rval = R_CurrentExpr; | |
| switch(*status) { | |
| case PARSE_NULL: | |
| break; | |
| case PARSE_OK: | |
| t = GrowList(t, rval); | |
| i++; | |
| break; | |
| case PARSE_INCOMPLETE: | |
| case PARSE_ERROR: | |
| R_IoBufferWriteReset(buffer); | |
| R_PPStackTop = savestack; | |
| R_FinalizeSrcRefState(); | |
| return R_NilValue; | |
| break; | |
| case PARSE_EOF: | |
| goto finish; | |
| break; | |
| } | |
| } | |
| finish: | |
| R_IoBufferWriteReset(buffer); | |
| t = CDR(t); | |
| PROTECT(rval = allocVector(EXPRSXP, length(t))); | |
| for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t)) | |
| SET_VECTOR_ELT(rval, n, CAR(t)); | |
| if (ParseState.keepSrcRefs) { | |
| finalizeData(); | |
| rval = attachSrcrefs(rval); | |
| } | |
| R_PPStackTop = savestack; /* UNPROTECT lots! */ | |
| R_FinalizeSrcRefState(); | |
| *status = PARSE_OK; | |
| return rval; | |
| } | |
| /*---------------------------------------------------------------------------- | |
| * | |
| * The Lexical Analyzer: | |
| * | |
| * Basic lexical analysis is performed by the following | |
| * routines. Input is read a line at a time, and, if the | |
| * program is in batch mode, each input line is echoed to | |
| * standard output after it is read. | |
| * | |
| * The function yylex() scans the input, breaking it into | |
| * tokens which are then passed to the parser. The lexical | |
| * analyser maintains a symbol table (in a very messy fashion). | |
| * | |
| * The fact that if statements need to parse differently | |
| * depending on whether the statement is being interpreted or | |
| * part of the body of a function causes the need for ifpop | |
| * and IfPush. When an if statement is encountered an 'i' is | |
| * pushed on a stack (provided there are parentheses active). | |
| * At later points this 'i' needs to be popped off of the if | |
| * stack. | |
| * | |
| */ | |
| static void IfPush(void) | |
| { | |
| if (*contextp==LBRACE || | |
| *contextp=='[' || | |
| *contextp=='(' || | |
| *contextp == 'i') { | |
| if(contextp - contextstack >= CONTEXTSTACK_SIZE) | |
| error(_("contextstack overflow")); | |
| *++contextp = 'i'; | |
| } | |
| } | |
| static void ifpop(void) | |
| { | |
| if (*contextp=='i') | |
| *contextp-- = 0; | |
| } | |
| /* This is only called following ., so we only care if it is | |
| an ANSI digit or not */ | |
| static int typeofnext(void) | |
| { | |
| int k, c; | |
| c = xxgetc(); | |
| if (isdigit(c)) k = 1; else k = 2; | |
| xxungetc(c); | |
| return k; | |
| } | |
| static int nextchar(int expect) | |
| { | |
| int c = xxgetc(); | |
| if (c == expect) | |
| return 1; | |
| else | |
| xxungetc(c); | |
| return 0; | |
| } | |
| /* Special Symbols */ | |
| /* Syntactic Keywords + Symbolic Constants */ | |
| struct { | |
| char *name; | |
| int token; | |
| } | |
| static keywords[] = { | |
| { "NULL", NULL_CONST }, | |
| { "NA", NUM_CONST }, | |
| { "TRUE", NUM_CONST }, | |
| { "FALSE", NUM_CONST }, | |
| { "Inf", NUM_CONST }, | |
| { "NaN", NUM_CONST }, | |
| { "NA_integer_", NUM_CONST }, | |
| { "NA_real_", NUM_CONST }, | |
| { "NA_character_", NUM_CONST }, | |
| { "NA_complex_", NUM_CONST }, | |
| { "function", FUNCTION }, | |
| { "while", WHILE }, | |
| { "repeat", REPEAT }, | |
| { "for", FOR }, | |
| { "if", IF }, | |
| { "in", IN }, | |
| { "else", ELSE }, | |
| { "next", NEXT }, | |
| { "break", BREAK }, | |
| { "...", SYMBOL }, | |
| { 0, 0 } | |
| }; | |
| /* KeywordLookup has side effects, it sets yylval */ | |
| static int KeywordLookup(const char *s) | |
| { | |
| int i; | |
| for (i = 0; keywords[i].name; i++) { | |
| if (strcmp(keywords[i].name, s) == 0) { | |
| switch (keywords[i].token) { | |
| case NULL_CONST: | |
| PROTECT(yylval = R_NilValue); | |
| break; | |
| case NUM_CONST: | |
| if(GenerateCode) { | |
| switch(i) { | |
| case 1: | |
| PROTECT(yylval = mkNA()); | |
| break; | |
| case 2: | |
| PROTECT(yylval = mkTrue()); | |
| break; | |
| case 3: | |
| PROTECT(yylval = mkFalse()); | |
| break; | |
| case 4: | |
| PROTECT(yylval = allocVector(REALSXP, 1)); | |
| REAL(yylval)[0] = R_PosInf; | |
| break; | |
| case 5: | |
| PROTECT(yylval = allocVector(REALSXP, 1)); | |
| REAL(yylval)[0] = R_NaN; | |
| break; | |
| case 6: | |
| PROTECT(yylval = allocVector(INTSXP, 1)); | |
| INTEGER(yylval)[0] = NA_INTEGER; | |
| break; | |
| case 7: | |
| PROTECT(yylval = allocVector(REALSXP, 1)); | |
| REAL(yylval)[0] = NA_REAL; | |
| break; | |
| case 8: | |
| PROTECT(yylval = allocVector(STRSXP, 1)); | |
| SET_STRING_ELT(yylval, 0, NA_STRING); | |
| break; | |
| case 9: | |
| PROTECT(yylval = allocVector(CPLXSXP, 1)); | |
| COMPLEX(yylval)[0].r = COMPLEX(yylval)[0].i = NA_REAL; | |
| break; | |
| } | |
| } else | |
| PROTECT(yylval = R_NilValue); | |
| break; | |
| case FUNCTION: | |
| case WHILE: | |
| case REPEAT: | |
| case FOR: | |
| case IF: | |
| case NEXT: | |
| case BREAK: | |
| yylval = install(s); | |
| break; | |
| case IN: | |
| case ELSE: | |
| break; | |
| case SYMBOL: | |
| PROTECT(yylval = install(s)); | |
| break; | |
| } | |
| return keywords[i].token; | |
| } | |
| } | |
| return 0; | |
| } | |
| static SEXP mkFloat(const char *s) | |
| { | |
| return ScalarReal(R_atof(s)); | |
| } | |
| static SEXP mkInt(const char *s) | |
| { | |
| double f = R_atof(s); /* or R_strtol? */ | |
| return ScalarInteger((int) f); | |
| } | |
| static SEXP mkComplex(const char *s) | |
| { | |
| SEXP t = R_NilValue; | |
| double f; | |
| f = R_atof(s); /* FIXME: make certain the value is legitimate. */ | |
| if(GenerateCode) { | |
| t = allocVector(CPLXSXP, 1); | |
| COMPLEX(t)[0].r = 0; | |
| COMPLEX(t)[0].i = f; | |
| } | |
| return t; | |
| } | |
| static SEXP mkNA(void) | |
| { | |
| SEXP t = allocVector(LGLSXP, 1); | |
| LOGICAL(t)[0] = NA_LOGICAL; | |
| return t; | |
| } | |
| attribute_hidden | |
| SEXP mkTrue(void) | |
| { | |
| SEXP s = allocVector(LGLSXP, 1); | |
| LOGICAL(s)[0] = 1; | |
| return s; | |
| } | |
| SEXP mkFalse(void) | |
| { | |
| SEXP s = allocVector(LGLSXP, 1); | |
| LOGICAL(s)[0] = 0; | |
| return s; | |
| } | |
| static void yyerror(const char *s) | |
| { | |
| static const char *const yytname_translations[] = | |
| { | |
| /* the left column are strings coming from bison, the right | |
| column are translations for users. | |
| The first YYENGLISH from the right column are English to be translated, | |
| the rest are to be copied literally. The #if 0 block below allows xgettext | |
| to see these. | |
| */ | |
| #define YYENGLISH 8 | |
| "$undefined", "input", | |
| "END_OF_INPUT", "end of input", | |
| "ERROR", "input", | |
| "STR_CONST", "string constant", | |
| "NUM_CONST", "numeric constant", | |
| "SYMBOL", "symbol", | |
| "LEFT_ASSIGN", "assignment", | |
| "'\\n'", "end of line", | |
| "NULL_CONST", "'NULL'", | |
| "FUNCTION", "'function'", | |
| "EQ_ASSIGN", "'='", | |
| "RIGHT_ASSIGN", "'->'", | |
| "LBB", "'[['", | |
| "FOR", "'for'", | |
| "IN", "'in'", | |
| "IF", "'if'", | |
| "ELSE", "'else'", | |
| "WHILE", "'while'", | |
| "NEXT", "'next'", | |
| "BREAK", "'break'", | |
| "REPEAT", "'repeat'", | |
| "GT", "'>'", | |
| "GE", "'>='", | |
| "LT", "'<'", | |
| "LE", "'<='", | |
| "EQ", "'=='", | |
| "NE", "'!='", | |
| "AND", "'&'", | |
| "OR", "'|'", | |
| "AND2", "'&&'", | |
| "OR2", "'||'", | |
| "NS_GET", "'::'", | |
| "NS_GET_INT", "':::'", | |
| 0 | |
| }; | |
| static char const yyunexpected[] = "syntax error, unexpected "; | |
| static char const yyexpecting[] = ", expecting "; | |
| char *expecting; | |
| R_ParseError = yylloc.first_line; | |
| R_ParseErrorCol = yylloc.first_column; | |
| R_ParseErrorFile = ParseState.SrcFile; | |
| if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) { | |
| int i; | |
| /* Edit the error message */ | |
| expecting = strstr(s + sizeof yyunexpected -1, yyexpecting); | |
| if (expecting) *expecting = '\0'; | |
| for (i = 0; yytname_translations[i]; i += 2) { | |
| if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) { | |
| switch(i/2) | |
| { | |
| case 0: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input")); | |
| break; | |
| case 1: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of input")); | |
| break; | |
| case 2: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input")); | |
| break; | |
| case 3: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected string constant")); | |
| break; | |
| case 4: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected numeric constant")); | |
| break; | |
| case 5: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected symbol")); | |
| break; | |
| case 6: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected assignment")); | |
| break; | |
| case 7: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of line")); | |
| break; | |
| default: | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected %s"), | |
| yytname_translations[i+1]); | |
| break; | |
| } | |
| return; | |
| } | |
| } | |
| snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE - 1, _("unexpected %s"), | |
| s + sizeof yyunexpected - 1); | |
| } else { | |
| strncpy(R_ParseErrorMsg, s, PARSE_ERROR_SIZE - 1); | |
| R_ParseErrorMsg[PARSE_ERROR_SIZE - 1] = '\0'; | |
| } | |
| } | |
| static void CheckFormalArgs(SEXP formlist, SEXP _new, YYLTYPE *lloc) | |
| { | |
| while (formlist != R_NilValue) { | |
| if (TAG(formlist) == _new) { | |
| error(_("repeated formal argument '%s' on line %d"), EncodeChar(PRINTNAME(_new)), | |
| lloc->first_line); | |
| } | |
| formlist = CDR(formlist); | |
| } | |
| } | |
| /* This is used as the buffer for NumericValue, SpecialValue and | |
| SymbolValue. None of these could conceivably need 8192 bytes. | |
| It has not been used as the buffer for input character strings | |
| since Oct 2007 (released as 2.7.0), and for comments since 2.8.0 | |
| */ | |
| static char yytext[MAXELTSIZE]; | |
| static int SkipSpace(void) | |
| { | |
| int c; | |
| #ifdef Win32 | |
| if(!mbcslocale) { /* 0xa0 is NBSP in all 8-bit Windows locales */ | |
| while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f' || | |
| (unsigned int) c == 0xa0) ; | |
| return c; | |
| } else { | |
| int i, clen; | |
| wchar_t wc; | |
| while (1) { | |
| c = xxgetc(); | |
| if (c == ' ' || c == '\t' || c == '\f') continue; | |
| if (c == '\n' || c == R_EOF) break; | |
| if ((unsigned int) c < 0x80) break; | |
| clen = mbcs_get_next(c, &wc); /* always 2 */ | |
| if(! Ri18n_iswctype(wc, Ri18n_wctype("blank")) ) break; | |
| for(i = 1; i < clen; i++) c = xxgetc(); | |
| } | |
| return c; | |
| } | |
| #endif | |
| #if defined(__STDC_ISO_10646__) | |
| if(mbcslocale) { /* wctype functions need Unicode wchar_t */ | |
| int i, clen; | |
| wchar_t wc; | |
| while (1) { | |
| c = xxgetc(); | |
| if (c == ' ' || c == '\t' || c == '\f') continue; | |
| if (c == '\n' || c == R_EOF) break; | |
| if ((unsigned int) c < 0x80) break; | |
| clen = mbcs_get_next(c, &wc); | |
| if(! Ri18n_iswctype(wc, Ri18n_wctype("blank")) ) break; | |
| for(i = 1; i < clen; i++) c = xxgetc(); | |
| } | |
| } else | |
| #endif | |
| while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f') ; | |
| return c; | |
| } | |
| /* Note that with interactive use, EOF cannot occur inside */ | |
| /* a comment. However, semicolons inside comments make it */ | |
| /* appear that this does happen. For this reason we use the */ | |
| /* special assignment EndOfFile=2 to indicate that this is */ | |
| /* going on. This is detected and dealt with in Parse1Buffer. */ | |
| static int SkipComment(void) | |
| { | |
| int c='#', i; | |
| /* locations before the # character was read */ | |
| int _first_column = ParseState.xxcolno ; | |
| int _first_parsed = ParseState.xxparseno ; | |
| int type = COMMENT ; | |
| Rboolean maybeLine = (ParseState.xxcolno == 1); | |
| Rboolean doSave; | |
| DECLARE_YYTEXT_BUFP(yyp); | |
| if (maybeLine) { | |
| char lineDirective[] = "#line"; | |
| YYTEXT_PUSH(c, yyp); | |
| for (i=1; i<5; i++) { | |
| c = xxgetc(); | |
| if (c != (int)(lineDirective[i])) { | |
| maybeLine = FALSE; | |
| break; | |
| } | |
| YYTEXT_PUSH(c, yyp); | |
| } | |
| if (maybeLine) | |
| c = processLineDirective(&type); | |
| } | |
| // we want to track down the character | |
| // __before__ the new line character | |
| int _last_column = ParseState.xxcolno ; | |
| int _last_parsed = ParseState.xxparseno ; | |
| if (c == '\n') { | |
| _last_column = prevcols[prevpos]; | |
| _last_parsed = prevparse[prevpos]; | |
| } | |
| doSave = !maybeLine; | |
| while (c != '\n' && c != R_EOF) { | |
| // Comments can be any length; we only record the ones that fit in yytext. | |
| if (doSave) { | |
| YYTEXT_PUSH(c, yyp); | |
| doSave = (yyp - yytext) < sizeof(yytext) - 2; | |
| } | |
| _last_column = ParseState.xxcolno ; | |
| _last_parsed = ParseState.xxparseno ; | |
| c = xxgetc(); | |
| } | |
| if (c == R_EOF) EndOfFile = 2; | |
| incrementId( ) ; | |
| YYTEXT_PUSH('\0', yyp); | |
| record_( _first_parsed, _first_column, _last_parsed, _last_column, | |
| type, identifier, doSave ? yytext : 0 ) ; | |
| return c; | |
| } | |
| static int NumericValue(int c) | |
| { | |
| int seendot = (c == '.'); | |
| int seenexp = 0; | |
| int last = c; | |
| int nd = 0; | |
| int asNumeric = 0; | |
| int count = 1; /* The number of characters seen */ | |
| DECLARE_YYTEXT_BUFP(yyp); | |
| YYTEXT_PUSH(c, yyp); | |
| /* We don't care about other than ASCII digits */ | |
| while (isdigit(c = xxgetc()) || c == '.' || c == 'e' || c == 'E' | |
| || c == 'x' || c == 'X' || c == 'L') | |
| { | |
| count++; | |
| if (c == 'L') /* must be at the end. Won't allow 1Le3 (at present). */ | |
| { YYTEXT_PUSH(c, yyp); | |
| break; | |
| } | |
| if (c == 'x' || c == 'X') { | |
| if (count > 2 || last != '0') break; /* 0x must be first */ | |
| YYTEXT_PUSH(c, yyp); | |
| while(isdigit(c = xxgetc()) || ('a' <= c && c <= 'f') || | |
| ('A' <= c && c <= 'F') || c == '.') { | |
| if (c == '.') { | |
| if (seendot) return ERROR; | |
| seendot = 1; | |
| } | |
| YYTEXT_PUSH(c, yyp); | |
| nd++; | |
| } | |
| if (nd == 0) return ERROR; | |
| if (c == 'p' || c == 'P') { | |
| seenexp = 1; | |
| YYTEXT_PUSH(c, yyp); | |
| c = xxgetc(); | |
| if (!isdigit(c) && c != '+' && c != '-') return ERROR; | |
| if (c == '+' || c == '-') { | |
| YYTEXT_PUSH(c, yyp); | |
| c = xxgetc(); | |
| } | |
| for(nd = 0; isdigit(c); c = xxgetc(), nd++) | |
| YYTEXT_PUSH(c, yyp); | |
| if (nd == 0) return ERROR; | |
| } | |
| if (seendot && !seenexp) return ERROR; | |
| break; | |
| } | |
| if (c == 'E' || c == 'e') { | |
| if (seenexp) | |
| break; | |
| seenexp = 1; | |
| seendot = seendot == 1 ? seendot : 2; | |
| YYTEXT_PUSH(c, yyp); | |
| c = xxgetc(); | |
| if (!isdigit(c) && c != '+' && c != '-') return ERROR; | |
| if (c == '+' || c == '-') { | |
| YYTEXT_PUSH(c, yyp); | |
| c = xxgetc(); | |
| if (!isdigit(c)) return ERROR; | |
| } | |
| } | |
| if (c == '.') { | |
| if (seendot) | |
| break; | |
| seendot = 1; | |
| } | |
| YYTEXT_PUSH(c, yyp); | |
| last = c; | |
| } | |
| if(c == 'i') | |
| YYTEXT_PUSH(c, yyp); /* for getParseData */ | |
| YYTEXT_PUSH('\0', yyp); | |
| /* Make certain that things are okay. */ | |
| if(c == 'L') { | |
| double a = R_atof(yytext); | |
| int b = (int) a; | |
| /* We are asked to create an integer via the L, so we check that the | |
| double and int values are the same. If not, this is a problem and we | |
| will not lose information and so use the numeric value. | |
| */ | |
| if(a != (double) b) { | |
| if(GenerateCode) { | |
| if(seendot == 1 && seenexp == 0) | |
| warning(_("integer literal %s contains decimal; using numeric value"), yytext); | |
| else { | |
| /* hide the L for the warning message */ | |
| warning(_("non-integer value %s qualified with L; using numeric value"), yytext); | |
| } | |
| } | |
| asNumeric = 1; | |
| seenexp = 1; | |
| } | |
| } | |
| if(c == 'i') { | |
| yylval = GenerateCode ? mkComplex(yytext) : R_NilValue; | |
| } else if(c == 'L' && asNumeric == 0) { | |
| if(GenerateCode && seendot == 1 && seenexp == 0) | |
| warning(_("integer literal %s contains unnecessary decimal point"), yytext); | |
| yylval = GenerateCode ? mkInt(yytext) : R_NilValue; | |
| #if 0 /* do this to make 123 integer not double */ | |
| } else if(!(seendot || seenexp)) { | |
| if(c != 'L') xxungetc(c); | |
| if (GenerateCode) { | |
| double a = R_atof(yytext); | |
| int b = (int) a; | |
| yylval = (a != (double) b) ? mkFloat(yytext) : mkInt(yytext); | |
| } else yylval = R_NilValue; | |
| #endif | |
| } else { | |
| if(c != 'L') | |
| xxungetc(c); | |
| yylval = GenerateCode ? mkFloat(yytext) : R_NilValue; | |
| } | |
| PROTECT(yylval); | |
| return NUM_CONST; | |
| } | |
| /* Strings may contain the standard ANSI escapes and octal */ | |
| /* specifications of the form \o, \oo or \ooo, where 'o' */ | |
| /* is an octal digit. */ | |
| #define STEXT_PUSH(c) do { \ | |
| size_t nc = bp - stext; \ | |
| if (nc >= nstext - 1) { \ | |
| char *old = stext; \ | |
| nstext *= 2; \ | |
| stext = malloc(nstext); \ | |
| if(!stext) error(_("unable to allocate buffer for long string at line %d"), ParseState.xxlineno);\ | |
| memmove(stext, old, nc); \ | |
| if(old != st0) free(old); \ | |
| bp = stext+nc; } \ | |
| *bp++ = ((char) c); \ | |
| } while(0) | |
| /* The idea here is that if a string contains \u escapes that are not | |
| valid in the current locale, we should switch to UTF-8 for that | |
| string. Needs Unicode wide-char support. | |
| Defining __STDC_ISO_10646__ is done by the OS (nor to) in wchar.t. | |
| Some (e.g. Solaris, FreeBSD) have Unicode wchar_t but do not define it. | |
| */ | |
| #if defined(Win32) || defined(__STDC_ISO_10646__) | |
| typedef wchar_t ucs_t; | |
| # define mbcs_get_next2 mbcs_get_next | |
| #else | |
| typedef unsigned int ucs_t; | |
| # define WC_NOT_UNICODE | |
| static int mbcs_get_next2(int c, ucs_t *wc) | |
| { | |
| int i, res, clen = 1; char s[9]; | |
| s[0] = c; | |
| /* This assumes (probably OK) that all MBCS embed ASCII as single-byte | |
| lead bytes, including control chars */ | |
| if((unsigned int) c < 0x80) { | |
| *wc = (wchar_t) c; | |
| return 1; | |
| } | |
| if(utf8locale) { | |
| clen = utf8clen(c); | |
| for(i = 1; i < clen; i++) { | |
| s[i] = xxgetc(); | |
| if(s[i] == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); | |
| } | |
| s[clen] ='\0'; /* x86 Solaris requires this */ | |
| res = mbtoucs(wc, s, clen); | |
| if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); | |
| } else { | |
| /* This is not necessarily correct for stateful MBCS */ | |
| while(clen <= MB_CUR_MAX) { | |
| res = mbtoucs(wc, s, clen); | |
| if(res >= 0) break; | |
| if(res == -1) | |
| error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); | |
| /* so res == -2 */ | |
| c = xxgetc(); | |
| if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); | |
| s[clen++] = c; | |
| } /* we've tried enough, so must be complete or invalid by now */ | |
| } | |
| for(i = clen - 1; i > 0; i--) xxungetc(s[i]); | |
| return clen; | |
| } | |
| #endif | |
| #define WTEXT_PUSH(c) do { if(wcnt < 10000) wcs[wcnt++] = c; } while(0) | |
| static SEXP mkStringUTF8(const ucs_t *wcs, int cnt) | |
| { | |
| SEXP t; | |
| int nb; | |
| /* NB: cnt includes the terminator */ | |
| #ifdef Win32 | |
| nb = cnt*4; /* UCS-2/UTF-16 so max 4 bytes per wchar_t */ | |
| #else | |
| nb = cnt*6; | |
| #endif | |
| R_CheckStack2(nb); | |
| char s[nb]; | |
| memset(s, 0, nb); /* safety */ | |
| #ifdef WC_NOT_UNICODE | |
| for(char *ss = s; *wcs; wcs++) ss += ucstoutf8(ss, *wcs); | |
| #else | |
| wcstoutf8(s, wcs, nb); | |
| #endif | |
| PROTECT(t = allocVector(STRSXP, 1)); | |
| SET_STRING_ELT(t, 0, mkCharCE(s, CE_UTF8)); | |
| UNPROTECT(1); | |
| return t; | |
| } | |
| #define CTEXT_PUSH(c) do { \ | |
| if (ct - currtext >= 1000) { \ | |
| memmove(currtext, currtext+100, 901); memmove(currtext, "... ", 4); ct -= 100; \ | |
| currtext_truncated = TRUE; \ | |
| } \ | |
| *ct++ = ((char) c); \ | |
| } while(0) | |
| #define CTEXT_POP() ct-- | |
| /* forSymbol is true when parsing backticked symbols */ | |
| static int StringValue(int c, Rboolean forSymbol) | |
| { | |
| int quote = c; | |
| char currtext[1010], *ct = currtext; | |
| char st0[MAXELTSIZE]; | |
| unsigned int nstext = MAXELTSIZE; | |
| char *stext = st0, *bp = st0; | |
| int wcnt = 0; | |
| ucs_t wcs[10001]; | |
| Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE; | |
| CTEXT_PUSH(c); | |
| while ((c = xxgetc()) != R_EOF && c != quote) { | |
| CTEXT_PUSH(c); | |
| if (c == '\n') { | |
| xxungetc(c); CTEXT_POP(); | |
| /* Fix suggested by Mark Bravington to allow multiline strings | |
| * by pretending we've seen a backslash. Was: | |
| * return ERROR; | |
| */ | |
| c = '\\'; | |
| } | |
| if (c == '\\') { | |
| c = xxgetc(); CTEXT_PUSH(c); | |
| if ('0' <= c && c <= '7') { | |
| int octal = c - '0'; | |
| if ('0' <= (c = xxgetc()) && c <= '7') { | |
| CTEXT_PUSH(c); | |
| octal = 8 * octal + c - '0'; | |
| if ('0' <= (c = xxgetc()) && c <= '7') { | |
| CTEXT_PUSH(c); | |
| octal = 8 * octal + c - '0'; | |
| } else { | |
| xxungetc(c); | |
| CTEXT_POP(); | |
| } | |
| } else { | |
| xxungetc(c); | |
| CTEXT_POP(); | |
| } | |
| if (!octal) | |
| error(_("nul character not allowed (line %d)"), ParseState.xxlineno); | |
| c = octal; | |
| oct_or_hex = TRUE; | |
| } | |
| else if(c == 'x') { | |
| int val = 0; int i, ext; | |
| for(i = 0; i < 2; i++) { | |
| c = xxgetc(); CTEXT_PUSH(c); | |
| if(c >= '0' && c <= '9') ext = c - '0'; | |
| else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; | |
| else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; | |
| else { | |
| xxungetc(c); | |
| CTEXT_POP(); | |
| if (i == 0) { /* was just \x */ | |
| *ct = '\0'; | |
| errorcall(R_NilValue, _("'\\x' used without hex digits in character string starting \"%s\""), currtext); | |
| } | |
| break; | |
| } | |
| val = 16*val + ext; | |
| } | |
| if (!val) | |
| error(_("nul character not allowed (line %d)"), ParseState.xxlineno); | |
| c = val; | |
| oct_or_hex = TRUE; | |
| } | |
| else if(c == 'u') { | |
| unsigned int val = 0; int i, ext; | |
| Rboolean delim = FALSE; | |
| if(forSymbol) | |
| error(_("\\uxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno); | |
| if((c = xxgetc()) == '{') { | |
| delim = TRUE; | |
| CTEXT_PUSH(c); | |
| } else xxungetc(c); | |
| for(i = 0; i < 4; i++) { | |
| c = xxgetc(); CTEXT_PUSH(c); | |
| if(c >= '0' && c <= '9') ext = c - '0'; | |
| else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; | |
| else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; | |
| else { | |
| xxungetc(c); | |
| CTEXT_POP(); | |
| if (i == 0) { /* was just \u */ | |
| *ct = '\0'; | |
| errorcall(R_NilValue, _("'\\u' used without hex digits in character string starting \"%s\""), currtext); | |
| } | |
| break; | |
| } | |
| val = 16*val + ext; | |
| } | |
| if(delim) { | |
| if((c = xxgetc()) != '}') | |
| error(_("invalid \\u{xxxx} sequence (line %d)"), | |
| ParseState.xxlineno); | |
| else CTEXT_PUSH(c); | |
| } | |
| if (!val) | |
| error(_("nul character not allowed (line %d)"), ParseState.xxlineno); | |
| WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */ | |
| use_wcs = TRUE; | |
| continue; | |
| } | |
| else if(c == 'U') { | |
| unsigned int val = 0; int i, ext; | |
| Rboolean delim = FALSE; | |
| if(forSymbol) | |
| error(_("\\Uxxxxxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno); | |
| if((c = xxgetc()) == '{') { | |
| delim = TRUE; | |
| CTEXT_PUSH(c); | |
| } else xxungetc(c); | |
| for(i = 0; i < 8; i++) { | |
| c = xxgetc(); CTEXT_PUSH(c); | |
| if(c >= '0' && c <= '9') ext = c - '0'; | |
| else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; | |
| else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; | |
| else { | |
| xxungetc(c); | |
| CTEXT_POP(); | |
| if (i == 0) { /* was just \U */ | |
| *ct = '\0'; | |
| errorcall(R_NilValue, _("'\\U' used without hex digits in character string starting \"%s\""), currtext); | |
| } | |
| break; | |
| } | |
| val = 16*val + ext; | |
| } | |
| if(delim) { | |
| if((c = xxgetc()) != '}') | |
| error(_("invalid \\U{xxxxxxxx} sequence (line %d)"), ParseState.xxlineno); | |
| else CTEXT_PUSH(c); | |
| } | |
| if (!val) | |
| error(_("nul character not allowed (line %d)"), ParseState.xxlineno); | |
| WTEXT_PUSH(val); | |
| use_wcs = TRUE; | |
| continue; | |
| } | |
| else { | |
| switch (c) { | |
| case 'a': | |
| c = '\a'; | |
| break; | |
| case 'b': | |
| c = '\b'; | |
| break; | |
| case 'f': | |
| c = '\f'; | |
| break; | |
| case 'n': | |
| c = '\n'; | |
| break; | |
| case 'r': | |
| c = '\r'; | |
| break; | |
| case 't': | |
| c = '\t'; | |
| break; | |
| case 'v': | |
| c = '\v'; | |
| break; | |
| case '\\': | |
| c = '\\'; | |
| break; | |
| case '"': | |
| case '\'': | |
| case '`': | |
| case ' ': | |
| case '\n': | |
| break; | |
| default: | |
| *ct = '\0'; | |
| errorcall(R_NilValue, _("'\\%c' is an unrecognized escape in character string starting \"%s\""), c, currtext); | |
| } | |
| } | |
| } else if(mbcslocale) { | |
| int i, clen; | |
| ucs_t wc; | |
| clen = mbcs_get_next2(c, &wc); | |
| WTEXT_PUSH(wc); | |
| for(i = 0; i < clen - 1; i++){ | |
| STEXT_PUSH(c); | |
| c = xxgetc(); | |
| if (c == R_EOF) break; | |
| CTEXT_PUSH(c); | |
| if (c == '\n') { | |
| xxungetc(c); CTEXT_POP(); | |
| c = '\\'; | |
| } | |
| } | |
| if (c == R_EOF) break; | |
| STEXT_PUSH(c); | |
| continue; | |
| } | |
| STEXT_PUSH(c); | |
| if ((unsigned int) c < 0x80) WTEXT_PUSH(c); | |
| else { /* have an 8-bit char in the current encoding */ | |
| #ifdef WC_NOT_UNICODE | |
| ucs_t wc; | |
| char s[2] = " "; | |
| s[0] = (char) c; | |
| mbtoucs(&wc, s, 2); | |
| #else | |
| wchar_t wc; | |
| char s[2] = " "; | |
| s[0] = (char) c; | |
| mbrtowc(&wc, s, 2, NULL); | |
| #endif | |
| WTEXT_PUSH(wc); | |
| } | |
| } | |
| STEXT_PUSH('\0'); | |
| WTEXT_PUSH(0); | |
| yytext[0] = '\0'; | |
| if (c == R_EOF) { | |
| if(stext != st0) free(stext); | |
| PROTECT(yylval = R_NilValue); | |
| return INCOMPLETE_STRING; | |
| } else { | |
| CTEXT_PUSH(c); | |
| CTEXT_PUSH('\0'); | |
| } | |
| if (!currtext_truncated) | |
| strcpy(yytext, currtext); | |
| if(forSymbol) { | |
| PROTECT(yylval = install(stext)); | |
| if(stext != st0) free(stext); | |
| return SYMBOL; | |
| } else { | |
| if(use_wcs) { | |
| if(oct_or_hex) | |
| error(_("mixing Unicode and octal/hex escapes in a string is not allowed")); | |
| if(wcnt < 10000) | |
| PROTECT(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */ | |
| else | |
| error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno); | |
| } else | |
| PROTECT(yylval = mkString2(stext, bp - stext - 1, oct_or_hex)); | |
| if(stext != st0) free(stext); | |
| return STR_CONST; | |
| } | |
| } | |
| static int SpecialValue(int c) | |
| { | |
| DECLARE_YYTEXT_BUFP(yyp); | |
| YYTEXT_PUSH(c, yyp); | |
| while ((c = xxgetc()) != R_EOF && c != '%') { | |
| if (c == '\n') { | |
| xxungetc(c); | |
| return ERROR; | |
| } | |
| YYTEXT_PUSH(c, yyp); | |
| } | |
| if (c == '%') | |
| YYTEXT_PUSH(c, yyp); | |
| YYTEXT_PUSH('\0', yyp); | |
| yylval = install(yytext); | |
| return SPECIAL; | |
| } | |
| /* return 1 if name is a valid name 0 otherwise */ | |
| attribute_hidden | |
| int isValidName(const char *name) | |
| { | |
| const char *p = name; | |
| int i; | |
| if(mbcslocale) { | |
| /* the only way to establish which chars are alpha etc is to | |
| use the wchar variants */ | |
| size_t n = strlen(name), used; | |
| wchar_t wc; | |
| used = Mbrtowc(&wc, p, n, NULL); p += used; n -= used; | |
| if(used == 0) return 0; | |
| if (wc != L'.' && !iswalpha(wc) ) return 0; | |
| if (wc == L'.') { | |
| /* We don't care about other than ASCII digits */ | |
| if(isdigit(0xff & (int)*p)) return 0; | |
| /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */ | |
| } | |
| while((used = Mbrtowc(&wc, p, n, NULL))) { | |
| if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break; | |
| p += used; n -= used; | |
| } | |
| if (*p != '\0') return 0; | |
| } else { | |
| int c = 0xff & *p++; | |
| if (c != '.' && !isalpha(c) ) return 0; | |
| if (c == '.' && isdigit(0xff & (int)*p)) return 0; | |
| while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ; | |
| if (c != '\0') return 0; | |
| } | |
| if (strcmp(name, "...") == 0) return 1; | |
| for (i = 0; keywords[i].name != NULL; i++) | |
| if (strcmp(keywords[i].name, name) == 0) return 0; | |
| return 1; | |
| } | |
| static int SymbolValue(int c) | |
| { | |
| int kw; | |
| DECLARE_YYTEXT_BUFP(yyp); | |
| if(mbcslocale) { | |
| wchar_t wc; int i, clen; | |
| clen = mbcs_get_next(c, &wc); | |
| while(1) { | |
| /* at this point we have seen one char, so push its bytes | |
| and get one more */ | |
| for(i = 0; i < clen; i++) { | |
| YYTEXT_PUSH(c, yyp); | |
| c = xxgetc(); | |
| } | |
| if(c == R_EOF) break; | |
| if(c == '.' || c == '_') { | |
| clen = 1; | |
| continue; | |
| } | |
| clen = mbcs_get_next(c, &wc); | |
| if(!iswalnum(wc)) break; | |
| } | |
| } else | |
| do { | |
| YYTEXT_PUSH(c, yyp); | |
| } while ((c = xxgetc()) != R_EOF && | |
| (isalnum(c) || c == '.' || c == '_')); | |
| xxungetc(c); | |
| YYTEXT_PUSH('\0', yyp); | |
| if ((kw = KeywordLookup(yytext))) | |
| return kw; | |
| PROTECT(yylval = install(yytext)); | |
| return SYMBOL; | |
| } | |
| static void setParseFilename(SEXP newname) { | |
| SEXP class; | |
| if (isEnvironment(ParseState.SrcFile)) { | |
| SEXP oldname = findVar(install("filename"), ParseState.SrcFile); | |
| if (isString(oldname) && length(oldname) > 0 && | |
| strcmp(CHAR(STRING_ELT(oldname, 0)), | |
| CHAR(STRING_ELT(newname, 0))) == 0) return; | |
| REPROTECT(ParseState.SrcFile = NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv), ParseState.SrcFileProt); | |
| defineVar(install("filename"), newname, ParseState.SrcFile); | |
| defineVar(install("original"), ParseState.Original, ParseState.SrcFile); | |
| PROTECT(class = allocVector(STRSXP, 2)); | |
| SET_STRING_ELT(class, 0, mkChar("srcfilealias")); | |
| SET_STRING_ELT(class, 1, mkChar("srcfile")); | |
| setAttrib(ParseState.SrcFile, R_ClassSymbol, class); | |
| UNPROTECT(1); | |
| } else { | |
| REPROTECT(ParseState.SrcFile = duplicate(newname), ParseState.SrcFileProt); | |
| } | |
| UNPROTECT_PTR(newname); | |
| } | |
| static int processLineDirective(int *type) | |
| { | |
| int c, tok, linenumber; | |
| c = SkipSpace(); | |
| if (!isdigit(c)) return(c); | |
| tok = NumericValue(c); | |
| linenumber = atoi(yytext); | |
| c = SkipSpace(); | |
| if (c == '"') | |
| tok = StringValue(c, FALSE); | |
| else | |
| xxungetc(c); | |
| if (tok == STR_CONST) | |
| setParseFilename(yylval); | |
| while ((c = xxgetc()) != '\n' && c != R_EOF) /* skip */ ; | |
| ParseState.xxlineno = linenumber; | |
| *type = LINE_DIRECTIVE; | |
| /* we don't change xxparseno here: it counts parsed lines, not official lines */ | |
| R_ParseContext[R_ParseContextLast] = '\0'; /* Context report shouldn't show the directive */ | |
| return(c); | |
| } | |
| /* Get the R symbol, and set yytext at the same time */ | |
| static SEXP install_and_save(char * text) | |
| { | |
| strcpy(yytext, text); | |
| return install(text); | |
| } | |
| /* Get an R symbol, and set different yytext. Used for translation of -> to <-. ->> to <<- */ | |
| static SEXP install_and_save2(char * text, char * savetext) | |
| { | |
| strcpy(yytext, savetext); | |
| return install(text); | |
| } | |
| /* Split the input stream into tokens. */ | |
| /* This is the lowest of the parsing levels. */ | |
| static int token(void) | |
| { | |
| int c; | |
| wchar_t wc; | |
| if (SavedToken) { | |
| c = SavedToken; | |
| yylval = SavedLval; | |
| SavedLval = R_NilValue; | |
| SavedToken = 0; | |
| yylloc.first_line = xxlinesave; | |
| yylloc.first_column = xxcolsave; | |
| yylloc.first_byte = xxbytesave; | |
| yylloc.first_parsed = xxparsesave; | |
| return c; | |
| } | |
| xxcharsave = xxcharcount; /* want to be able to go back one token */ | |
| c = SkipSpace(); | |
| if (c == '#') c = SkipComment(); | |
| yylloc.first_line = ParseState.xxlineno; | |
| yylloc.first_column = ParseState.xxcolno; | |
| yylloc.first_byte = ParseState.xxbyteno; | |
| yylloc.first_parsed = ParseState.xxparseno; | |
| if (c == R_EOF) return END_OF_INPUT; | |
| /* Either digits or symbols can start with a "." */ | |
| /* so we need to decide which it is and jump to */ | |
| /* the correct spot. */ | |
| if (c == '.' && typeofnext() >= 2) goto symbol; | |
| /* literal numbers */ | |
| if (c == '.') return NumericValue(c); | |
| /* We don't care about other than ASCII digits */ | |
| if (isdigit(c)) return NumericValue(c); | |
| /* literal strings */ | |
| if (c == '\"' || c == '\'') | |
| return StringValue(c, FALSE); | |
| /* special functions */ | |
| if (c == '%') | |
| return SpecialValue(c); | |
| /* functions, constants and variables */ | |
| if (c == '`') | |
| return StringValue(c, TRUE); | |
| symbol: | |
| if (c == '.') return SymbolValue(c); | |
| if(mbcslocale) { | |
| mbcs_get_next(c, &wc); | |
| if (iswalpha(wc)) return SymbolValue(c); | |
| } else | |
| if (isalpha(c)) return SymbolValue(c); | |
| /* compound tokens */ | |
| switch (c) { | |
| case '<': | |
| if (nextchar('=')) { | |
| yylval = install_and_save("<="); | |
| return LE; | |
| } | |
| if (nextchar('-')) { | |
| yylval = install_and_save("<-"); | |
| return LEFT_ASSIGN; | |
| } | |
| if (nextchar('<')) { | |
| if (nextchar('-')) { | |
| yylval = install_and_save("<<-"); | |
| return LEFT_ASSIGN; | |
| } | |
| else | |
| return ERROR; | |
| } | |
| yylval = install_and_save("<"); | |
| return LT; | |
| case '-': | |
| if (nextchar('>')) { | |
| if (nextchar('>')) { | |
| yylval = install_and_save2("<<-", "->>"); | |
| return RIGHT_ASSIGN; | |
| } | |
| else { | |
| yylval = install_and_save2("<-", "->"); | |
| return RIGHT_ASSIGN; | |
| } | |
| } | |
| yylval = install_and_save("-"); | |
| return '-'; | |
| case '>': | |
| if (nextchar('=')) { | |
| yylval = install_and_save(">="); | |
| return GE; | |
| } | |
| yylval = install_and_save(">"); | |
| return GT; | |
| case '!': | |
| if (nextchar('=')) { | |
| yylval = install_and_save("!="); | |
| return NE; | |
| } | |
| yylval = install_and_save("!"); | |
| return '!'; | |
| case '=': | |
| if (nextchar('=')) { | |
| yylval = install_and_save("=="); | |
| return EQ; | |
| } | |
| yylval = install_and_save("="); | |
| return EQ_ASSIGN; | |
| case ':': | |
| if (nextchar(':')) { | |
| if (nextchar(':')) { | |
| yylval = install_and_save(":::"); | |
| return NS_GET_INT; | |
| } | |
| else { | |
| yylval = install_and_save("::"); | |
| return NS_GET; | |
| } | |
| } | |
| if (nextchar('=')) { | |
| yylval = install_and_save(":="); | |
| return LEFT_ASSIGN; | |
| } | |
| yylval = install_and_save(":"); | |
| return ':'; | |
| case '&': | |
| if (nextchar('&')) { | |
| yylval = install_and_save("&&"); | |
| return AND2; | |
| } | |
| yylval = install_and_save("&"); | |
| return AND; | |
| case '|': | |
| if (nextchar('|')) { | |
| yylval = install_and_save("||"); | |
| return OR2; | |
| } | |
| yylval = install_and_save("|"); | |
| return OR; | |
| case LBRACE: | |
| yylval = install_and_save("{"); | |
| return c; | |
| case RBRACE: | |
| strcpy(yytext, "}"); | |
| return c; | |
| case '(': | |
| yylval = install_and_save("("); | |
| return c; | |
| case ')': | |
| strcpy(yytext, ")"); | |
| return c; | |
| case '[': | |
| if (nextchar('[')) { | |
| yylval = install_and_save("[["); | |
| return LBB; | |
| } | |
| yylval = install_and_save("["); | |
| return c; | |
| case ']': | |
| strcpy(yytext, "]"); | |
| return c; | |
| case '?': | |
| yylval = install_and_save("?"); | |
| return c; | |
| case '*': | |
| /* Replace ** by ^. This has been here since 1998, but is | |
| undocumented (at least in the obvious places). It is in | |
| the index of the Blue Book with a reference to p. 431, the | |
| help for 'Deprecated'. S-PLUS 6.2 still allowed this, so | |
| presumably it was for compatibility with S. */ | |
| if (nextchar('*')) { | |
| yylval = install_and_save2("^", "**"); | |
| return '^'; | |
| } else | |
| yylval = install_and_save("*"); | |
| return c; | |
| case '+': | |
| case '/': | |
| case '^': | |
| case '~': | |
| case '$': | |
| case '@': | |
| yytext[0] = (char) c; | |
| yytext[1] = '\0'; | |
| yylval = install(yytext); | |
| return c; | |
| default: | |
| yytext[0] = (char) c; | |
| yytext[1] = '\0'; | |
| return c; | |
| } | |
| } | |
| /** | |
| * Sets the first elements of the yyloc structure with current | |
| * information | |
| */ | |
| static void setfirstloc(void) | |
| { | |
| yylloc.first_line = ParseState.xxlineno; | |
| yylloc.first_column = ParseState.xxcolno; | |
| yylloc.first_byte = ParseState.xxbyteno; | |
| yylloc.first_parsed = ParseState.xxparseno; | |
| } | |
| static void setlastloc(void) | |
| { | |
| yylloc.last_line = ParseState.xxlineno; | |
| yylloc.last_column = ParseState.xxcolno; | |
| yylloc.last_byte = ParseState.xxbyteno; | |
| yylloc.last_parsed = ParseState.xxparseno; | |
| } | |
| /** | |
| * Wrap around the token function. Returns the same result | |
| * but increments the identifier, after a call to token_, | |
| * the identifier variable contains the id of the token | |
| * just returned | |
| * | |
| * @return the same as token | |
| */ | |
| static int token_(void){ | |
| // capture the position before retrieving the token | |
| setfirstloc( ) ; | |
| // get the token | |
| int res = token( ) ; | |
| // capture the position after | |
| int _last_col = ParseState.xxcolno ; | |
| int _last_parsed = ParseState.xxparseno ; | |
| _current_token = res ; | |
| incrementId( ) ; | |
| yylloc.id = identifier ; | |
| // record the position | |
| if( res != '\n' && res != END_OF_INPUT) | |
| record_( yylloc.first_parsed, yylloc.first_column, | |
| _last_parsed, _last_col, | |
| res, identifier, yytext ); | |
| return res; | |
| } | |
| static int yylex(void) | |
| { | |
| int tok; | |
| again: | |
| tok = token_(); | |
| /* Newlines must be handled in a context */ | |
| /* sensitive way. The following block of */ | |
| /* deals directly with newlines in the */ | |
| /* body of "if" statements. */ | |
| if (tok == '\n') { | |
| if (EatLines || *contextp == '[' || *contextp == '(') | |
| goto again; | |
| /* The essence of this is that in the body of */ | |
| /* an "if", any newline must be checked to */ | |
| /* see if it is followed by an "else". */ | |
| /* such newlines are discarded. */ | |
| if (*contextp == 'i') { | |
| /* Find the next non-newline token */ | |
| while(tok == '\n') | |
| tok = token_(); | |
| /* If we encounter "}", ")" or "]" then */ | |
| /* we know that all immediately preceding */ | |
| /* "if" bodies have been terminated. */ | |
| /* The corresponding "i" values are */ | |
| /* popped off the context stack. */ | |
| if (tok == RBRACE || tok == ')' || tok == ']' ) { | |
| while (*contextp == 'i') | |
| ifpop(); | |
| *contextp-- = 0; | |
| setlastloc(); | |
| return tok; | |
| } | |
| /* When a "," is encountered, it terminates */ | |
| /* just the immediately preceding "if" body */ | |
| /* so we pop just a single "i" of the */ | |
| /* context stack. */ | |
| if (tok == ',') { | |
| ifpop(); | |
| setlastloc(); | |
| return tok; | |
| } | |
| /* Tricky! If we find an "else" we must */ | |
| /* ignore the preceding newline. Any other */ | |
| /* token means that we must return the newline */ | |
| /* to terminate the "if" and "push back" that */ | |
| /* token so that we will obtain it on the next */ | |
| /* call to token. In either case sensitivity */ | |
| /* is lost, so we pop the "i" from the context */ | |
| /* stack. */ | |
| if(tok == ELSE) { | |
| EatLines = 1; | |
| ifpop(); | |
| setlastloc(); | |
| return ELSE; | |
| } | |
| else { | |
| ifpop(); | |
| SavedToken = tok; | |
| xxlinesave = yylloc.first_line; | |
| xxcolsave = yylloc.first_column; | |
| xxbytesave = yylloc.first_byte; | |
| xxparsesave = yylloc.first_parsed; | |
| SavedLval = yylval; | |
| setlastloc(); | |
| if (yytext[0]) /* unrecord the pushed back token if not null */ | |
| ParseState.data_count--; | |
| return '\n'; | |
| } | |
| } | |
| else { | |
| setlastloc(); | |
| return '\n'; | |
| } | |
| } | |
| /* Additional context sensitivities */ | |
| switch(tok) { | |
| /* Any newlines immediately following the */ | |
| /* the following tokens are discarded. The */ | |
| /* expressions are clearly incomplete. */ | |
| case '+': | |
| case '-': | |
| case '*': | |
| case '/': | |
| case '^': | |
| case LT: | |
| case LE: | |
| case GE: | |
| case GT: | |
| case EQ: | |
| case NE: | |
| case OR: | |
| case AND: | |
| case OR2: | |
| case AND2: | |
| case SPECIAL: | |
| case FUNCTION: | |
| case WHILE: | |
| case REPEAT: | |
| case FOR: | |
| case IN: | |
| case '?': | |
| case '!': | |
| case '=': | |
| case ':': | |
| case '~': | |
| case '$': | |
| case '@': | |
| case LEFT_ASSIGN: | |
| case RIGHT_ASSIGN: | |
| case EQ_ASSIGN: | |
| EatLines = 1; | |
| break; | |
| /* Push any "if" statements found and */ | |
| /* discard any immediately following newlines. */ | |
| case IF: | |
| IfPush(); | |
| EatLines = 1; | |
| break; | |
| /* Terminate any immediately preceding "if" */ | |
| /* statements and discard any immediately */ | |
| /* following newlines. */ | |
| case ELSE: | |
| ifpop(); | |
| EatLines = 1; | |
| break; | |
| /* These tokens terminate any immediately */ | |
| /* preceding "if" statements. */ | |
| case ';': | |
| case ',': | |
| ifpop(); | |
| break; | |
| /* Any newlines following these tokens can */ | |
| /* indicate the end of an expression. */ | |
| case SYMBOL: | |
| case STR_CONST: | |
| case NUM_CONST: | |
| case NULL_CONST: | |
| case NEXT: | |
| case BREAK: | |
| EatLines = 0; | |
| break; | |
| /* Handle brackets, braces and parentheses */ | |
| case LBB: | |
| if(contextp - contextstack >= CONTEXTSTACK_SIZE - 1) | |
| error(_("contextstack overflow at line %d"), ParseState.xxlineno); | |
| *++contextp = '['; | |
| *++contextp = '['; | |
| break; | |
| case '[': | |
| if(contextp - contextstack >= CONTEXTSTACK_SIZE) | |
| error(_("contextstack overflow at line %d"), ParseState.xxlineno); | |
| *++contextp = (char) tok; | |
| break; | |
| case LBRACE: | |
| if(contextp - contextstack >= CONTEXTSTACK_SIZE) | |
| error(_("contextstack overflow at line %d"), ParseState.xxlineno); | |
| *++contextp = (char) tok; | |
| EatLines = 1; | |
| break; | |
| case '(': | |
| if(contextp - contextstack >= CONTEXTSTACK_SIZE) | |
| error(_("contextstack overflow at line %d"), ParseState.xxlineno); | |
| *++contextp = (char) tok; | |
| break; | |
| case ']': | |
| while (*contextp == 'i') | |
| ifpop(); | |
| *contextp-- = 0; | |
| EatLines = 0; | |
| break; | |
| case RBRACE: | |
| while (*contextp == 'i') | |
| ifpop(); | |
| *contextp-- = 0; | |
| break; | |
| case ')': | |
| while (*contextp == 'i') | |
| ifpop(); | |
| *contextp-- = 0; | |
| EatLines = 0; | |
| break; | |
| } | |
| setlastloc(); | |
| return tok; | |
| } | |
| /** | |
| * Records location information about a symbol. The information is | |
| * used to fill the data | |
| * | |
| */ | |
| static void record_( int first_parsed, int first_column, int last_parsed, int last_column, | |
| int token, int id, char* text_in ){ | |
| if( token == LEFT_ASSIGN && colon == 1){ | |
| token = COLON_ASSIGN ; | |
| colon = 0 ; | |
| } | |
| if (!ParseState.keepSrcRefs || id == NA_INTEGER) return; | |
| // don't care about zero sized things | |
| if( !yytext[0] ) return ; | |
| if (ParseState.data_count == DATA_COUNT) | |
| growData(); | |
| _FIRST_COLUMN( ParseState.data_count ) = first_column; | |
| _FIRST_PARSED( ParseState.data_count ) = first_parsed; | |
| _LAST_COLUMN( ParseState.data_count ) = last_column; | |
| _LAST_PARSED( ParseState.data_count ) = last_parsed; | |
| _TOKEN( ParseState.data_count ) = token; | |
| _ID( ParseState.data_count ) = id ; | |
| _PARENT(ParseState.data_count) = 0 ; | |
| if ( text_in ) | |
| SET_STRING_ELT(ParseState.text, ParseState.data_count, mkChar(text_in)); | |
| else | |
| SET_STRING_ELT(ParseState.text, ParseState.data_count, mkChar("")); | |
| if( id > ID_COUNT ){ | |
| growID(id) ; | |
| } | |
| ID_ID( id ) = ParseState.data_count ; | |
| ParseState.data_count++ ; | |
| } | |
| /** | |
| * records parent as the parent of all its childs. This grows the | |
| * parents list with a new vector. The first element of the new | |
| * vector is the parent id, and other elements are childs id | |
| * | |
| * @param parent id of the parent expression | |
| * @param childs array of location information for all child symbols | |
| * @param nchilds number of childs | |
| */ | |
| static void recordParents( int parent, yyltype * childs, int nchilds){ | |
| if( parent > ID_COUNT ){ | |
| growID(parent) ; | |
| } | |
| /* some of the childs might be an empty token (like cr) | |
| which we do not want to track */ | |
| int ii; /* loop index */ | |
| yyltype loc ; | |
| for( ii=0; ii<nchilds; ii++){ | |
| loc = childs[ii] ; | |
| if( loc.id == NA_INTEGER || (loc.first_line == loc.last_line && loc.first_byte > loc.last_byte) ) | |
| continue ; | |
| /* This shouldn't happen... */ | |
| if (loc.id < 0 || loc.id > identifier) { | |
| error(_("internal parser error at line %d"), ParseState.xxlineno); | |
| } | |
| ID_PARENT( (childs[ii]).id ) = parent ; | |
| } | |
| } | |
| /** | |
| * The token pointed by the location has the wrong token type, | |
| * This updates the type | |
| * | |
| * @param loc location information for the token to track | |
| */ | |
| static void modif_token( yyltype* loc, int tok ){ | |
| int id = loc->id ; | |
| if (!ParseState.keepSrcRefs || id < 0 || id > ID_COUNT) | |
| return; | |
| if( tok == SYMBOL_FUNCTION_CALL ){ | |
| // looking for first child of id | |
| int j = ID_ID( id ) ; | |
| int parent = id ; | |
| if (j < 0 || j > ID_COUNT) | |
| return; | |
| while( ID_PARENT( _ID(j) ) != parent ){ | |
| j-- ; | |
| if (j < 0) | |
| return; | |
| } | |
| if( _TOKEN(j) == SYMBOL ){ | |
| _TOKEN(j) = SYMBOL_FUNCTION_CALL ; | |
| } | |
| } else{ | |
| _TOKEN( ID_ID(id) ) = tok ; | |
| } | |
| } | |
| /* this local version of lengthgets() always copies and doesn't fill with NA */ | |
| static SEXP lengthgets2(SEXP x, int len) { | |
| SEXP result; | |
| PROTECT(result = allocVector( TYPEOF(x), len )); | |
| len = (len < length(x)) ? len : length(x); | |
| switch(TYPEOF(x)) { | |
| case INTSXP: | |
| for (int i = 0; i < len; i++) | |
| INTEGER(result)[i] = INTEGER(x)[i]; | |
| for (int i = len; i < length(result); i++) | |
| INTEGER(result)[i] = 0; | |
| break; | |
| case STRSXP: | |
| for (int i = 0; i < len; i++) | |
| SET_STRING_ELT(result, i, STRING_ELT(x, i)); | |
| break; | |
| default: | |
| UNIMPLEMENTED_TYPE("lengthgets2", x); | |
| } | |
| UNPROTECT(1); | |
| return result; | |
| } | |
| static void finalizeData( ){ | |
| int nloc = ParseState.data_count ; | |
| // int maxId = _ID(nloc-1) ; | |
| int i, j, id ; | |
| int parent ; | |
| /* attach comments to closest enclosing symbol */ | |
| int comment_line, comment_first_col; | |
| int this_first_parsed, this_last_parsed, this_first_col ; | |
| int orphan ; | |
| for( i=0; i<nloc; i++){ | |
| if( _TOKEN(i) == COMMENT ){ | |
| comment_line = _FIRST_PARSED( i ) ; | |
| comment_first_col = _FIRST_COLUMN( i ) ; | |
| orphan = 1 ; | |
| for( j=i+1; j<nloc; j++){ | |
| this_first_parsed = _FIRST_PARSED( j ) ; | |
| this_first_col = _FIRST_COLUMN( j ) ; | |
| this_last_parsed = _LAST_PARSED( j ) ; | |
| /* the comment needs to start after the current symbol */ | |
| if( comment_line < this_first_parsed ) continue ; | |
| if( (comment_line == this_first_parsed) & (comment_first_col < this_first_col) ) continue ; | |
| /* the current symbol must finish after the comment */ | |
| if( this_last_parsed <= comment_line ) continue ; | |
| /* we have a match, record the parent and stop looking */ | |
| ID_PARENT( _ID(i) ) = _ID(j) ; | |
| orphan = 0; | |
| break ; | |
| } | |
| if(orphan){ | |
| ID_PARENT( _ID(i) ) = 0 ; | |
| } | |
| } | |
| } | |
| int idp; | |
| /* store parents in the data */ | |
| for( i=0; i<nloc; i++){ | |
| id = _ID(i); | |
| parent = ID_PARENT( id ) ; | |
| if( parent == 0 ){ | |
| _PARENT(i)=parent; | |
| continue; | |
| } | |
| while( 1 ){ | |
| idp = ID_ID( parent ) ; | |
| if( idp > 0 ) break ; | |
| if( parent == 0 ){ | |
| break ; | |
| } | |
| parent = ID_PARENT( parent ) ; | |
| } | |
| _PARENT(i) = parent ; | |
| } | |
| /* now rework the parents of comments, we try to attach | |
| comments that are not already attached (parent=0) to the next | |
| enclosing top-level expression */ | |
| for( i=0; i<nloc; i++){ | |
| int token = _TOKEN(i); | |
| if( token == COMMENT && _PARENT(i) == 0 ){ | |
| for( j=i; j<nloc; j++){ | |
| int token_j = _TOKEN(j); | |
| if( token_j == COMMENT ) continue ; | |
| if( _PARENT(j) != 0 ) continue ; | |
| _PARENT(i) = - _ID(j) ; | |
| break ; | |
| } | |
| } | |
| } | |
| /* attach the token names as an attribute so we don't need to switch to a dataframe, and decide on terminals */ | |
| SEXP tokens; | |
| PROTECT(tokens = allocVector( STRSXP, nloc ) ); | |
| for (int i=0; i<nloc; i++) { | |
| int token = _TOKEN(i); | |
| int xlat = yytranslate[token]; | |
| if (xlat == 2) /* "unknown" */ | |
| xlat = token; | |
| if (xlat < YYNTOKENS + YYNNTS) | |
| SET_STRING_ELT(tokens, i, mkChar(yytname[xlat])); | |
| else { /* we have a token which doesn't have a name, e.g. an illegal character as in PR#15518 */ | |
| char name[2]; | |
| name[0] = (char) xlat; | |
| name[1] = 0; | |
| SET_STRING_ELT(tokens, i, mkChar(name)); | |
| } | |
| _TERMINAL(i) = xlat < YYNTOKENS; | |
| } | |
| SEXP dims, newdata, newtext; | |
| if (nloc) { | |
| PROTECT( newdata = lengthgets2(ParseState.data, nloc * DATA_ROWS)); | |
| PROTECT( newtext = lengthgets2(ParseState.text, nloc)); | |
| } else { | |
| PROTECT( newdata = allocVector( INTSXP, 0)); | |
| PROTECT( newtext = allocVector( STRSXP, 0)); | |
| } | |
| PROTECT( dims = allocVector( INTSXP, 2 ) ) ; | |
| INTEGER(dims)[0] = DATA_ROWS ; | |
| INTEGER(dims)[1] = nloc ; | |
| setAttrib( newdata, install( "dim" ), dims ) ; | |
| setAttrib( newdata, install("tokens"), tokens ); | |
| setAttrib( newdata, install("text"), newtext ); | |
| setAttrib(newdata, R_ClassSymbol, mkString("parseData")); | |
| /* Put it into the srcfile environment */ | |
| if (isEnvironment(ParseState.SrcFile)) | |
| defineVar(install("parseData"), newdata, ParseState.SrcFile); | |
| UNPROTECT(4); | |
| } | |
| /** | |
| * Grows the data | |
| */ | |
| static void growData(){ | |
| SEXP bigger, biggertext ; | |
| int new_data_count; | |
| if (!ParseState.data) { | |
| new_data_count = INIT_DATA_COUNT; | |
| R_PreserveObject(ParseState.data = allocVector(INTSXP, 0)); | |
| R_PreserveObject(ParseState.text = allocVector(STRSXP, 0)); | |
| } else | |
| new_data_count = 2*DATA_COUNT; | |
| R_PreserveObject( bigger = lengthgets2(ParseState.data, new_data_count * DATA_ROWS ) ) ; | |
| R_PreserveObject( biggertext = lengthgets2(ParseState.text, new_data_count ) ); | |
| R_ReleaseObject( ParseState.data ); | |
| R_ReleaseObject( ParseState.text ); | |
| ParseState.data = bigger; | |
| ParseState.text = biggertext; | |
| } | |
| /** | |
| * Grows the ids vector so that ID_ID(target) can be called | |
| */ | |
| static void growID( int target ){ | |
| SEXP bigger; | |
| int new_count; | |
| if (!ParseState.ids) { | |
| new_count = INIT_DATA_COUNT/2 - 1; | |
| R_PreserveObject(ParseState.ids = allocVector(INTSXP, 0)); | |
| } else | |
| new_count = ID_COUNT; | |
| while (target > new_count) | |
| new_count = 2*new_count + 1; | |
| if (new_count <= ID_COUNT) | |
| return; | |
| int new_size = (1 + new_count)*2; | |
| R_PreserveObject( bigger = lengthgets2(ParseState.ids, new_size ) ); | |
| R_ReleaseObject( ParseState.ids ); | |
| ParseState.ids = bigger; | |
| } |