netsurf/css/scanner.l
John Mark Bell 8f999376d5 Bring definition of {escape} in line with CSS 2.1 spec.
Require units when parsing css lengths.
Correctly ignore background: meta-properties which have multiple instances of sub-properties (e.g. foo { background: red pink; })

svn path=/trunk/netsurf/; revision=3893
2008-03-06 23:20:32 +00:00

113 lines
2.6 KiB
Plaintext

/*
* This file is part of NetSurf, http://netsurf-browser.org/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
/** \file
* CSS tokeniser using re2c.
*
* see CSS2 Specification, chapter 4
* http://www.w3.org/TR/REC-CSS2/syndata.html,
* and errata
* http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata
*/
#include <stdbool.h>
#define CSS_INTERNALS
#include "css/css.h"
#include "css/parser.h"
#define YYCTYPE unsigned char
#define YYCURSOR (*buffer)
#define YYLIMIT end
#define YYMARKER marker
#define YYFILL(n) { return 0; }
/**
* Identify a CSS source token.
*
* \param buffer source to tokenise, updated to new position
* \param end end of source
* \param token_text updated to start of recognized token
* \return token number
*/
int css_tokenise(unsigned char **buffer, unsigned char *end,
unsigned char **token_text)
{
unsigned char *marker;
start:
*token_text = YYCURSOR;
/*!re2c
nonascii = [\200-\377];
unicode = "\\" [0-9a-f]+ ("\r\n" | [ \n\r\t\f])?;
escape = unicode | "\\" [^\n\r\f0-9a-f];
nmchar = [-a-zA-Z0-9_] | nonascii | escape;
nmstart = [a-zA-Z_] | nonascii | escape;
ident = [-]? nmstart nmchar*;
name = nmchar+;
num = [+-]? ([0-9]+ | [0-9]* "." [0-9]+);
nl = "\n" | "\r\n" | "\r" | "\f";
string1 = "\"" ([\t !#$%&(-~] | "\\" nl | "'" | nonascii | escape)* "\"";
string2 = "'" ([\t !#$%&(-~] | "\\" nl | "\""| nonascii | escape)* "'";
string = string1 | string2;
s = [ \t\r\n\f];
w = s*;
any = [\000-\377];
ident { return IDENT; }
"@" ident { return ATKEYWORD; }
string { return STRING; }
"#" name { return HASH; }
num { return NUMBER; }
num "%" { return PERCENTAGE; }
num ident { return DIMENSION; }
"url(" w string w ")" | "url(" w ([!#$%&*-~]|nonascii|escape)* w ")"
{ return URI; }
"U+" [0-9A-F?]+ ("-" [0-9A-F]+ )?
{ return UNICODE_RANGE; }
"<!--" { goto start; /* ignore CDO */ }
"-->" { goto start; /* ignore CDC */ }
";" { return SEMI; }
"{" { return LBRACE; }
"}" { return RBRACE; }
"(" { return LPAREN; }
")" { return RPAREN; }
"[" { return LBRAC; }
"]" { return RBRAC; }
s+ { return S; }
"/*" (any\[*])* "*"+ ((any\[/*]) (any\[*])* "*"+)* "/"
{ goto start; /* ignore comments */ }
ident "(" { return FUNCTION; }
"~=" { return INCLUDES; }
"|=" { return DASHMATCH; }
"^=" { return PREFIX; }
"$=" { return SUFFIX; }
"*=" { return SUBSTR; }
"=" { return EQUALS; }
":" { return COLON; }
"," { return COMMA; }
"+" { return PLUS; }
">" { return GT; }
"." { return DOT; }
"*" { return ASTERISK; }
any { return DELIM; }
*/
}