mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-12-26 14:07:05 +03:00
d7a4adf481
CSS parsing improvements: new tokeniser using re2c, improve memory-exhaustion behaviour, plug leaks, reduce memory usage, clean up code, add source documention. svn path=/import/netsurf/; revision=806
99 lines
2.6 KiB
Plaintext
99 lines
2.6 KiB
Plaintext
/*
|
|
* This file is part of NetSurf, http://netsurf.sourceforge.net/
|
|
* Licensed under the GNU General Public License,
|
|
* http://www.opensource.org/licenses/gpl-license
|
|
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
|
|
*/
|
|
|
|
/** \file
|
|
* CSS tokeniser using re2c.
|
|
*
|
|
* see CSS2 Specification, chapter 4
|
|
* http://www.w3.org/TR/REC-CSS2/syndata.html,
|
|
* and errata
|
|
* http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#define CSS_INTERNALS
|
|
#include "netsurf/css/css.h"
|
|
#include "netsurf/css/parser.h"
|
|
|
|
#define YYCTYPE unsigned char
|
|
#define YYCURSOR (*buffer)
|
|
#define YYLIMIT end
|
|
#define YYMARKER marker
|
|
#define YYFILL(n) { return 0; }
|
|
|
|
|
|
/**
|
|
* Identify a CSS source token.
|
|
*
|
|
* \param buffer source to tokenise, updated to new position
|
|
* \param end end of source
|
|
* \param token_text updated to start of recognized token
|
|
* \return token number
|
|
*/
|
|
|
|
int css_tokenise(unsigned char **buffer, unsigned char *end,
|
|
unsigned char **token_text)
|
|
{
|
|
unsigned char *marker;
|
|
|
|
start:
|
|
*token_text = YYCURSOR;
|
|
|
|
/*!re2c
|
|
nonascii = [\200-\377];
|
|
unicode = "\\" [0-9a-f]+ [ \n\r\t\f]?;
|
|
escape = unicode | "\\" [ -~\200-\377];
|
|
nmchar = [-a-zA-Z0-9_] | nonascii | escape;
|
|
nmstart = [a-zA-Z_] | nonascii | escape;
|
|
ident = nmstart nmchar*;
|
|
name = nmchar+;
|
|
num = [+-]? [0-9]+ | [0-9]* "." [0-9]+;
|
|
nl = "\n" | "\r\n" | "\r" | "\f";
|
|
string1 = "\"" ([\t !#$%&(-~] | "\\" nl | "'" | nonascii | escape)* "\"";
|
|
string2 = "'" ([\t !#$%&(-~] | "\\" nl | "\""| nonascii | escape)* "'";
|
|
string = string1 | string2;
|
|
w = [ \t\r\n\f]*;
|
|
any = [\000-\377];
|
|
|
|
ident { return IDENT; }
|
|
"@" ident { return ATKEYWORD; }
|
|
string { return STRING; }
|
|
"#" name { return HASH; }
|
|
num { return NUMBER; }
|
|
num "%" { return PERCENTAGE; }
|
|
num ident { return DIMENSION; }
|
|
"url(" w string w ")" | "url(" w ([!#$%&*-~]|nonascii|escape)* w ")"
|
|
{ return URI; }
|
|
"U+" [0-9A-F?]+ ("-" [0-9A-F]+ )?
|
|
{ return UNICODE_RANGE; }
|
|
"<!--" { goto start; /* ignore CDO */ }
|
|
"-->" { goto start; /* ignore CDC */ }
|
|
";" { return SEMI; }
|
|
"{" { return LBRACE; }
|
|
"}" { return RBRACE; }
|
|
"(" { return LPAREN; }
|
|
")" { return RPAREN; }
|
|
"[" { return LBRAC; }
|
|
"]" { return RBRAC; }
|
|
[ \t\r\n\f]+ { goto start; /* ignore whitespace */ }
|
|
"/*" (any\[*])* "*"+ ((any\[/]) (any\[*])* "*"+)* "/"
|
|
{ goto start; /* ignore comments */ }
|
|
ident "(" { return FUNCTION; }
|
|
"=" { return EQUALS; }
|
|
"~=" { return INCLUDES; }
|
|
"|=" { return DASHMATCH; }
|
|
":" { return COLON; }
|
|
"," { return COMMA; }
|
|
"+" { return PLUS; }
|
|
">" { return GT; }
|
|
"." { return DOT; }
|
|
"*" { return ASTERISK; }
|
|
any { return DELIM; }
|
|
*/
|
|
|
|
}
|