mirror of
https://github.com/lexborisov/Modest
synced 2024-11-26 07:39:34 +03:00
209 lines
5.7 KiB
Perl
209 lines
5.7 KiB
Perl
|
#!/usr/bin/perl -w
|
||
|
|
||
|
BEGIN {
|
||
|
use FindBin;
|
||
|
push @INC, $FindBin::Bin. "/../ext/";
|
||
|
};
|
||
|
|
||
|
use bytes;
|
||
|
use strict;
|
||
|
use Encode;
|
||
|
|
||
|
my $enum_begin_count = 1;
|
||
|
my $mycss_state_prefix = "MyCSS_TOKENIZER_STATE_";
|
||
|
|
||
|
my $skip_char_state = "delim single code point";
|
||
|
my $conflict_fix_name = "name-start code point";
|
||
|
my $conflict_fix_name_comment = "name-start code point and LATIN CAPITAL AND SMALL LETTER U";
|
||
|
|
||
|
my $char_index = {
|
||
|
'[\n\t ]' => ["whitespace", "whitespace"],
|
||
|
'"' => ["QUOTATION MARK", "U+0022 QUOTATION MARK (\")"],
|
||
|
'\#' => ["NUMBER SIGN", "U+0023 NUMBER SIGN (#)"],
|
||
|
'\$' => ["DOLLAR SIGN", "U+0024 DOLLAR SIGN (\$)"],
|
||
|
"'" => ["APOSTROPHE", "U+0027 APOSTROPHE (')"],
|
||
|
'\(' => ["LEFT PARENTHESIS", "U+0028 LEFT PARENTHESIS (()"],
|
||
|
'\)' => ["RIGHT PARENTHESIS", "U+0029 RIGHT PARENTHESIS ())"],
|
||
|
'\*' => ["ASTERISK", "U+002A ASTERISK (*)"],
|
||
|
'\+' => ["PLUS SIGN", "U+002B PLUS SIGN (+)"],
|
||
|
',' => ["COMMA", "U+002C COMMA (,)"],
|
||
|
'-' => ["HYPHEN-MINUS", "U+002D HYPHEN-MINUS (-)"],
|
||
|
'\.' => ["FULL STOP", "U+002E FULL STOP (.)"],
|
||
|
'/' => ["SOLIDUS", "U+002F SOLIDUS (/)"],
|
||
|
'\:' => ["COLON", "U+003A COLON (:)"],
|
||
|
';' => ["SEMICOLON", "U+003B SEMICOLON (;)"],
|
||
|
'<' => ["LESS-THAN SIGN", "U+003C LESS-THAN SIGN (<)"],
|
||
|
'\@' => ["COMMERCIAL AT", "U+0040 COMMERCIAL AT (@)"],
|
||
|
'\[' => ["LEFT SQUARE BRACKET", "U+005B LEFT SQUARE BRACKET ([)"],
|
||
|
'\\\\' => ["REVERSE SOLIDUS", "U+005C REVERSE SOLIDUS (\)"],
|
||
|
'\]' => ["RIGHT SQUARE BRACKET", "U+005D RIGHT SQUARE BRACKET (])"],
|
||
|
'\^' => ["CIRCUMFLEX ACCENT", "U+005E CIRCUMFLEX ACCENT (^)"],
|
||
|
'\{' => ["LEFT CURLY BRACKET", "U+007B LEFT CURLY BRACKET ({)"],
|
||
|
'\}' => ["RIGHT CURLY BRACKET", "U+007D RIGHT CURLY BRACKET (})"],
|
||
|
'[0-9]' => ["digit", "digit"],
|
||
|
'(?:u|U)' => ["LATIN CAPITAL AND SMALL LETTER U", "U+0055 LATIN CAPITAL LETTER U (U) or U+0075 LATIN SMALL LETTER U (u)"],
|
||
|
'[a-zA-Z_]' => [$conflict_fix_name, "name-start code point"],
|
||
|
'>=128' => [$conflict_fix_name, "name-start code point"],
|
||
|
'\|' => ["VERTICAL LINE", "U+007C VERTICAL LINE (|)"],
|
||
|
'\~' => ["TILDE", "U+007E TILDE (~)"]
|
||
|
};
|
||
|
|
||
|
create_first($char_index);
|
||
|
|
||
|
sub create_first {
|
||
|
my ($char_index) = @_;
|
||
|
|
||
|
my $res = {};
|
||
|
|
||
|
foreach my $code (0..255)
|
||
|
{
|
||
|
$res->{$code} = {} unless exists $res->{$code};
|
||
|
|
||
|
is_it($char_index, $code, $res->{$code});
|
||
|
}
|
||
|
|
||
|
my @static_data;
|
||
|
foreach my $code (sort {$a <=> $b} keys %$res)
|
||
|
{
|
||
|
my @keys = keys %{$res->{$code}};
|
||
|
my $end_name = "";
|
||
|
my $comment = "";
|
||
|
|
||
|
if (@keys > 1) {
|
||
|
if ($code == 85 || $code == 117) {
|
||
|
$end_name = convert_name($conflict_fix_name);
|
||
|
$comment = $conflict_fix_name_comment;
|
||
|
}
|
||
|
else {
|
||
|
die "Conflict for CODE $code: ", join(", ", map {$char_index->{$_}->[0]} @keys), "\n";
|
||
|
}
|
||
|
}
|
||
|
elsif(@keys) {
|
||
|
$end_name = convert_name($char_index->{ $keys[0] }->[0]);
|
||
|
$comment = $char_index->{ $keys[0] }->[1];
|
||
|
}
|
||
|
else {
|
||
|
$end_name = convert_name($skip_char_state);
|
||
|
}
|
||
|
|
||
|
push @static_data, ["$mycss_state_prefix$end_name", $comment];
|
||
|
}
|
||
|
|
||
|
my ($max_len, $max_len_coment) = get_max_length(\@static_data);
|
||
|
my $uniq = {};
|
||
|
|
||
|
print "//\n";
|
||
|
print "// generated by Perl script utils/mycss_first_char.pl\n";
|
||
|
|
||
|
print "static const mycss_tokenizer_state_t mycss_begin_chars_state_map[] = {\n";
|
||
|
foreach my $idx (0..$#static_data)
|
||
|
{
|
||
|
my $entry = $static_data[$idx];
|
||
|
|
||
|
my $st_len = $max_len - length($entry->[0]);
|
||
|
|
||
|
if ($idx == $#static_data) {
|
||
|
print "\t", $entry->[0], " ";
|
||
|
}
|
||
|
else {
|
||
|
print "\t", $entry->[0], ",";
|
||
|
}
|
||
|
|
||
|
print map {" "} 0..$st_len;
|
||
|
#print "/* $entry->[1] ";
|
||
|
#print map {" "} 0..($max_len_coment - length($entry->[1]));
|
||
|
#print "*/\n";
|
||
|
print "// $entry->[1]\n";
|
||
|
|
||
|
$uniq->{$entry->[0]} = $st_len unless exists $uniq->{$entry->[0]};
|
||
|
}
|
||
|
print "}\n";
|
||
|
|
||
|
print "\n\nstatic const unsigned char mycss_chars_name_code_point_map[] = {\n";
|
||
|
foreach my $idx (0..$#static_data)
|
||
|
{
|
||
|
my $entry = $static_data[$idx];
|
||
|
|
||
|
if ($entry->[0] eq "MyCSS_TOKENIZER_STATE_DIGIT" ||
|
||
|
$entry->[0] eq "MyCSS_TOKENIZER_STATE_NAME_START_CODE_POINT" ||
|
||
|
$entry->[0] eq "MyCSS_TOKENIZER_STATE_HYPHEN_MINUS")
|
||
|
{
|
||
|
print sprintf("0x%02x, ", $idx);
|
||
|
}
|
||
|
else {
|
||
|
print "0xff, ";
|
||
|
}
|
||
|
|
||
|
print "\n" unless ($idx + 1) % 10;
|
||
|
}
|
||
|
print "}\n";
|
||
|
|
||
|
my $count = $enum_begin_count;
|
||
|
|
||
|
print "\n\n//\n";
|
||
|
print "// generated by Perl script utils/mycss_first_char.pl\n";
|
||
|
|
||
|
foreach my $key (sort {$a cmp $b} keys %$uniq)
|
||
|
{
|
||
|
print "\t", $key;
|
||
|
print map {" "} 0..$uniq->{$key};
|
||
|
print " = 0x", sprintf("%03x", $count++) ,",\n";
|
||
|
|
||
|
}
|
||
|
|
||
|
print "\n\n//\n// for header:\n";
|
||
|
foreach my $key (sort {$a cmp $b} keys %$uniq)
|
||
|
{
|
||
|
print "size_t ", lc($key) ,"(mycss_entry_t* entry, mythread_queue_node_t* qnode, const char* css, size_t css_offset, size_t css_size);\n";
|
||
|
}
|
||
|
|
||
|
print "\n\n//\n// functions:\n";
|
||
|
foreach my $key (sort {$a cmp $b} keys %$uniq)
|
||
|
{
|
||
|
print "
|
||
|
size_t ", lc($key) ,"(mycss_entry_t* entry, mythread_queue_node_t* qnode, const char* css, size_t css_offset, size_t css_size)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
";
|
||
|
}
|
||
|
|
||
|
print "\n\n//\n// for init:\n";
|
||
|
foreach my $key (sort {$a cmp $b} keys %$uniq)
|
||
|
{
|
||
|
print "mycss->parse_state_func[$key] = ", lc($key),";\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub convert_name {
|
||
|
my $name = shift;
|
||
|
|
||
|
$name =~ s/[-\s]+/_/g;
|
||
|
uc($name);
|
||
|
}
|
||
|
|
||
|
sub is_it {
|
||
|
foreach my $key (keys %{$_[0]})
|
||
|
{
|
||
|
if($key =~ /^>=([0-9]+)/) {
|
||
|
$_[2]->{$key}++ if $_[1] >= $1;
|
||
|
}
|
||
|
else {
|
||
|
my $rex = chr($_[1]);
|
||
|
$_[2]->{$key}++ if $rex =~ /^$key$/;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
undef;
|
||
|
}
|
||
|
|
||
|
sub get_max_length {
|
||
|
my $max_len = 0;
|
||
|
my $max_len_comment = 0;
|
||
|
foreach (@{$_[0]}) {
|
||
|
$max_len = length($_->[0]) if length($_->[0]) > $max_len;
|
||
|
$max_len_comment = length($_->[1]) if length($_->[1]) > $max_len_comment;
|
||
|
}
|
||
|
|
||
|
($max_len, $max_len_comment);
|
||
|
}
|