/* * LuCI Template - Parser implementation * * Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "template_parser.h" /* leading and trailing code for different types */ const char * gen_code[7][2] = { { "write(\"", "\")" }, { NULL, NULL }, { "write(tostring(", " or \"\"))" }, { "include(\"", "\")" }, { "write(pcdata(translate(\"", "\")))" }, { "write(translate(\"", "\"))" }, { NULL, " " } }; /* Simple strstr() like function that takes len arguments for both haystack and needle. */ static char *strfind(char *haystack, int hslen, const char *needle, int ndlen) { int match = 0; int i, j; for( i = 0; i < hslen; i++ ) { if( haystack[i] == needle[0] ) { match = ((ndlen == 1) || ((i + ndlen) <= hslen)); for( j = 1; (j < ndlen) && ((i + j) < hslen); j++ ) { if( haystack[i+j] != needle[j] ) { match = 0; break; } } if( match ) return &haystack[i]; } } return NULL; } /* * Inspect current read buffer and find the number of "vague" characters at the end * which could indicate an opening token. Returns the number of "vague" chars. * The last continuous sequence of whitespace, optionally followed by a "<" is * treated as "vague" because whitespace may be discarded if the upcoming opening * token indicates pre-whitespace-removal ("<%-"). A single remaining "<" char * can't be differentiated from an opening token ("<%"), so it's kept to be processed * in the next cycle. */ static int stokscan(struct template_parser *data, int off, int no_whitespace) { int i; int skip = 0; int tokoff = data->bufsize - 1; for( i = tokoff; i >= off; i-- ) { if( data->buf[i] == T_TOK_START[0] ) { skip = tokoff - i + 1; tokoff = i - 1; break; } } if( !no_whitespace ) { for( i = tokoff; i >= off; i-- ) { if( isspace(data->buf[i]) ) skip++; else break; } } return skip; } /* * Similar to stokscan() but looking for closing token indicators. * Matches "-", optionally followed by a "%" char. */ static int etokscan(struct template_parser *data) { int skip = 0; if( (data->bufsize > 0) && (data->buf[data->bufsize-1] == T_TOK_END[0]) ) skip++; if( (data->bufsize > skip) && (data->buf[data->bufsize-skip-1] == T_TOK_SKIPWS[0]) ) skip++; return skip; } /* * Generate Lua expressions from the given raw code, write it into the * output buffer and set the lua_Reader specific size pointer. * Takes parser-state, lua_Reader's size pointer and generator flags * as parameter. The given flags indicate whether leading or trailing * code should be added. Returns a pointer to the output buffer. */ static const char * generate_expression(struct template_parser *data, size_t *sz, int what) { char tmp[T_OUTBUFSZ]; int i; int size = 0; int start = 0; int whitespace = 0; memset(tmp, 0, T_OUTBUFSZ); /* Inject leading expression code (if any) */ if( (what & T_GEN_START) && (gen_code[data->type][0] != NULL) ) { memcpy(tmp, gen_code[data->type][0], strlen(gen_code[data->type][0])); size += strlen(gen_code[data->type][0]); } /* Parse source buffer */ for( i = 0; i < data->outsize; i++ ) { /* Skip leading whitespace for non-raw and non-expr chunks */ if( !start && isspace(data->out[i]) && (data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW || data->type == T_TYPE_INCLUDE) ) continue; else if( !start ) start = 1; /* Found whitespace after i18n key */ if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW ) { /* Is initial whitespace, insert space */ if( !whitespace && isspace(data->out[i]) ) { tmp[size++] = ' '; whitespace = 1; } /* Suppress subsequent whitespace, escape special chars */ else if( !isspace(data->out[i]) ) { if( data->out[i] == '\\' || data->out[i] == '"' ) tmp[size++] = '\\'; tmp[size++] = data->out[i]; whitespace = 0; } } /* Escape quotes, backslashes and newlines for plain and include expressions */ else if( (data->type == T_TYPE_TEXT || data->type == T_TYPE_INCLUDE) && (data->out[i] == '\\' || data->out[i] == '"' || data->out[i] == '\n' || data->out[i] == '\t') ) { tmp[size++] = '\\'; switch(data->out[i]) { case '\n': tmp[size++] = 'n'; break; case '\t': tmp[size++] = 't'; break; default: tmp[size++] = data->out[i]; } } /* Normal char */ else { tmp[size++] = data->out[i]; } } /* Inject trailing expression code (if any) */ if( (what & T_GEN_END) && (gen_code[data->type][1] != NULL) ) { /* Strip trailing space for i18n expressions */ if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW ) if( (size > 0) && (tmp[size-1] == ' ') ) size--; memcpy(&tmp[size], gen_code[data->type][1], strlen(gen_code[data->type][1])); size += strlen(gen_code[data->type][1]); } *sz = data->outsize = size; memset(data->out, 0, T_OUTBUFSZ); memcpy(data->out, tmp, size); //printf("<<<%i|%i|%i|%s>>>\n", what, data->type, *sz, data->out); return data->out; } /* * Move the number of bytes specified in data->bufsize from the * given source pointer to the beginning of the read buffer. */ static void bufmove(struct template_parser *data, const char *src) { if( data->bufsize > 0 ) memmove(data->buf, src, data->bufsize); else if( data->bufsize < 0 ) data->bufsize = 0; data->buf[data->bufsize] = 0; } /* * Move the given amount of bytes from the given source pointer * to the output buffer and set data->outputsize. */ static void bufout(struct template_parser *data, const char *src, int len) { if( len >= 0 ) { memset(data->out, 0, T_OUTBUFSZ); memcpy(data->out, src, len); data->outsize = len; } else { data->outsize = 0; } } /* * lua_Reader compatible function that parses template code on demand from * the given file handle. */ const char *template_reader(lua_State *L, void *ud, size_t *sz) { struct template_parser *data = ud; char *match = NULL; int off = 0; int ignore = 0; int genflags = 0; int readlen = 0; int vague = 0; while( !(data->flags & T_FLAG_EOF) || (data->bufsize > 0) ) { /* Fill buffer */ if( !(data->flags & T_FLAG_EOF) && (data->bufsize < T_READBUFSZ) ) { if( (readlen = read(data->fd, &data->buf[data->bufsize], T_READBUFSZ - data->bufsize)) > 0 ) data->bufsize += readlen; else if( readlen == 0 ) data->flags |= T_FLAG_EOF; else return NULL; } /* Evaluate state */ switch(data->state) { /* Plain text chunk (before "<%") */ case T_STATE_TEXT_INIT: case T_STATE_TEXT_NEXT: off = 0; ignore = 0; *sz = 0; data->type = T_TYPE_TEXT; /* Skip leading whitespace if requested */ if( data->flags & T_FLAG_SKIPWS ) { data->flags &= ~T_FLAG_SKIPWS; while( (off < data->bufsize) && isspace(data->buf[off]) ) off++; } /* Found "<%" */ if( (match = strfind(&data->buf[off], data->bufsize - off - 1, T_TOK_START, strlen(T_TOK_START))) != NULL ) { readlen = (int)(match - &data->buf[off]); data->bufsize -= (readlen + strlen(T_TOK_START) + off); match += strlen(T_TOK_START); /* Check for leading '-' */ if( match[0] == T_TOK_SKIPWS[0] ) { data->bufsize--; match++; while( (readlen > 1) && isspace(data->buf[off+readlen-1]) ) { readlen--; } } bufout(data, &data->buf[off], readlen); bufmove(data, match); data->state = T_STATE_CODE_INIT; } /* Maybe plain chunk */ else { /* Preserve trailing "<" or white space, maybe a start token */ vague = stokscan(data, off, 0); /* We can process some bytes ... */ if( vague < data->bufsize ) { readlen = data->bufsize - vague - off; } /* No bytes to process, so try to remove at least whitespace ... */ else { /* ... but try to preserve trailing "<" ... */ vague = stokscan(data, off, 1); if( vague < data->bufsize ) { readlen = data->bufsize - vague - off; } /* ... no chance, push out buffer */ else { readlen = vague - off; vague = 0; } } bufout(data, &data->buf[off], readlen); data->state = T_STATE_TEXT_NEXT; data->bufsize = vague; bufmove(data, &data->buf[off+readlen]); } if( ignore || data->outsize == 0 ) continue; else return generate_expression(data, sz, T_GEN_START | T_GEN_END); break; /* Ignored chunk (inside "<%# ... %>") */ case T_STATE_SKIP: ignore = 1; /* Initial code chunk ("<% ...") */ case T_STATE_CODE_INIT: off = 0; /* Check for leading '-' */ if( data->buf[off] == T_TOK_SKIPWS[0] ) off++; /* Determine code type */ switch(data->buf[off]) { case '#': ignore = 1; off++; data->type = T_TYPE_COMMENT; break; case '=': off++; data->type = T_TYPE_EXPR; break; case '+': off++; data->type = T_TYPE_INCLUDE; break; case ':': off++; data->type = T_TYPE_I18N; break; case '_': off++; data->type = T_TYPE_I18N_RAW; break; default: data->type = T_TYPE_CODE; break; } /* Subsequent code chunk ("..." or "... %>") */ case T_STATE_CODE_NEXT: /* Found "%>" */ if( (match = strfind(&data->buf[off], data->bufsize - off, T_TOK_END, strlen(T_TOK_END))) != NULL ) { genflags = ( data->state == T_STATE_CODE_INIT ) ? (T_GEN_START | T_GEN_END) : T_GEN_END; readlen = (int)(match - &data->buf[off]); /* Check for trailing '-' */ if( (match > data->buf) && (*(match-1) == T_TOK_SKIPWS[0]) ) { readlen--; data->flags |= T_FLAG_SKIPWS; } bufout(data, &data->buf[off], readlen); data->state = T_STATE_TEXT_INIT; data->bufsize -= ((int)(match - &data->buf[off]) + strlen(T_TOK_END) + off); bufmove(data, &match[strlen(T_TOK_END)]); } /* Code chunk */ else { genflags = ( data->state == T_STATE_CODE_INIT ) ? T_GEN_START : 0; /* Preserve trailing "%" and "-", maybe an end token */ vague = etokscan(data); readlen = data->bufsize - off - vague; bufout(data, &data->buf[off], readlen); data->state = T_STATE_CODE_NEXT; data->bufsize = vague; bufmove(data, &data->buf[readlen+off]); } if( ignore || (data->outsize == 0 && !genflags) ) continue; else return generate_expression(data, sz, genflags); break; } } *sz = 0; return NULL; }