Modest/utils/MyCSS/mycss_first_char.pl
2016-12-26 16:58:10 +03:00

209 lines
5.7 KiB
Perl
Executable File

#!/usr/bin/perl -w
BEGIN {
use FindBin;
push @INC, $FindBin::Bin. "/../ext/";
};
use bytes;
use strict;
use Encode;
my $enum_begin_count = 1;
my $mycss_state_prefix = "MyCSS_TOKENIZER_STATE_";
my $skip_char_state = "delim single code point";
my $conflict_fix_name = "name-start code point";
my $conflict_fix_name_comment = "name-start code point and LATIN CAPITAL AND SMALL LETTER U";
my $char_index = {
'[\n\t ]' => ["whitespace", "whitespace"],
'"' => ["QUOTATION MARK", "U+0022 QUOTATION MARK (\")"],
'\#' => ["NUMBER SIGN", "U+0023 NUMBER SIGN (#)"],
'\$' => ["DOLLAR SIGN", "U+0024 DOLLAR SIGN (\$)"],
"'" => ["APOSTROPHE", "U+0027 APOSTROPHE (')"],
'\(' => ["LEFT PARENTHESIS", "U+0028 LEFT PARENTHESIS (()"],
'\)' => ["RIGHT PARENTHESIS", "U+0029 RIGHT PARENTHESIS ())"],
'\*' => ["ASTERISK", "U+002A ASTERISK (*)"],
'\+' => ["PLUS SIGN", "U+002B PLUS SIGN (+)"],
',' => ["COMMA", "U+002C COMMA (,)"],
'-' => ["HYPHEN-MINUS", "U+002D HYPHEN-MINUS (-)"],
'\.' => ["FULL STOP", "U+002E FULL STOP (.)"],
'/' => ["SOLIDUS", "U+002F SOLIDUS (/)"],
'\:' => ["COLON", "U+003A COLON (:)"],
';' => ["SEMICOLON", "U+003B SEMICOLON (;)"],
'<' => ["LESS-THAN SIGN", "U+003C LESS-THAN SIGN (<)"],
'\@' => ["COMMERCIAL AT", "U+0040 COMMERCIAL AT (@)"],
'\[' => ["LEFT SQUARE BRACKET", "U+005B LEFT SQUARE BRACKET ([)"],
'\\\\' => ["REVERSE SOLIDUS", "U+005C REVERSE SOLIDUS (\)"],
'\]' => ["RIGHT SQUARE BRACKET", "U+005D RIGHT SQUARE BRACKET (])"],
'\^' => ["CIRCUMFLEX ACCENT", "U+005E CIRCUMFLEX ACCENT (^)"],
'\{' => ["LEFT CURLY BRACKET", "U+007B LEFT CURLY BRACKET ({)"],
'\}' => ["RIGHT CURLY BRACKET", "U+007D RIGHT CURLY BRACKET (})"],
'[0-9]' => ["digit", "digit"],
'(?:u|U)' => ["LATIN CAPITAL AND SMALL LETTER U", "U+0055 LATIN CAPITAL LETTER U (U) or U+0075 LATIN SMALL LETTER U (u)"],
'[a-zA-Z_]' => [$conflict_fix_name, "name-start code point"],
'>=128' => [$conflict_fix_name, "name-start code point"],
'\|' => ["VERTICAL LINE", "U+007C VERTICAL LINE (|)"],
'\~' => ["TILDE", "U+007E TILDE (~)"]
};
create_first($char_index);
sub create_first {
my ($char_index) = @_;
my $res = {};
foreach my $code (0..255)
{
$res->{$code} = {} unless exists $res->{$code};
is_it($char_index, $code, $res->{$code});
}
my @static_data;
foreach my $code (sort {$a <=> $b} keys %$res)
{
my @keys = keys %{$res->{$code}};
my $end_name = "";
my $comment = "";
if (@keys > 1) {
if ($code == 85 || $code == 117) {
$end_name = convert_name($conflict_fix_name);
$comment = $conflict_fix_name_comment;
}
else {
die "Conflict for CODE $code: ", join(", ", map {$char_index->{$_}->[0]} @keys), "\n";
}
}
elsif(@keys) {
$end_name = convert_name($char_index->{ $keys[0] }->[0]);
$comment = $char_index->{ $keys[0] }->[1];
}
else {
$end_name = convert_name($skip_char_state);
}
push @static_data, ["$mycss_state_prefix$end_name", $comment];
}
my ($max_len, $max_len_coment) = get_max_length(\@static_data);
my $uniq = {};
print "//\n";
print "// generated by Perl script utils/mycss_first_char.pl\n";
print "static const mycss_tokenizer_state_t mycss_begin_chars_state_map[] = {\n";
foreach my $idx (0..$#static_data)
{
my $entry = $static_data[$idx];
my $st_len = $max_len - length($entry->[0]);
if ($idx == $#static_data) {
print "\t", $entry->[0], " ";
}
else {
print "\t", $entry->[0], ",";
}
print map {" "} 0..$st_len;
#print "/* $entry->[1] ";
#print map {" "} 0..($max_len_coment - length($entry->[1]));
#print "*/\n";
print "// $entry->[1]\n";
$uniq->{$entry->[0]} = $st_len unless exists $uniq->{$entry->[0]};
}
print "}\n";
print "\n\nstatic const unsigned char mycss_chars_name_code_point_map[] = {\n";
foreach my $idx (0..$#static_data)
{
my $entry = $static_data[$idx];
if ($entry->[0] eq "MyCSS_TOKENIZER_STATE_DIGIT" ||
$entry->[0] eq "MyCSS_TOKENIZER_STATE_NAME_START_CODE_POINT" ||
$entry->[0] eq "MyCSS_TOKENIZER_STATE_HYPHEN_MINUS")
{
print sprintf("0x%02x, ", $idx);
}
else {
print "0xff, ";
}
print "\n" unless ($idx + 1) % 10;
}
print "}\n";
my $count = $enum_begin_count;
print "\n\n//\n";
print "// generated by Perl script utils/mycss_first_char.pl\n";
foreach my $key (sort {$a cmp $b} keys %$uniq)
{
print "\t", $key;
print map {" "} 0..$uniq->{$key};
print " = 0x", sprintf("%03x", $count++) ,",\n";
}
print "\n\n//\n// for header:\n";
foreach my $key (sort {$a cmp $b} keys %$uniq)
{
print "size_t ", lc($key) ,"(mycss_entry_t* entry, mythread_queue_node_t* qnode, const char* css, size_t css_offset, size_t css_size);\n";
}
print "\n\n//\n// functions:\n";
foreach my $key (sort {$a cmp $b} keys %$uniq)
{
print "
size_t ", lc($key) ,"(mycss_entry_t* entry, mythread_queue_node_t* qnode, const char* css, size_t css_offset, size_t css_size)
{
return 0;
}
";
}
print "\n\n//\n// for init:\n";
foreach my $key (sort {$a cmp $b} keys %$uniq)
{
print "mycss->parse_state_func[$key] = ", lc($key),";\n";
}
}
sub convert_name {
my $name = shift;
$name =~ s/[-\s]+/_/g;
uc($name);
}
sub is_it {
foreach my $key (keys %{$_[0]})
{
if($key =~ /^>=([0-9]+)/) {
$_[2]->{$key}++ if $_[1] >= $1;
}
else {
my $rex = chr($_[1]);
$_[2]->{$key}++ if $rex =~ /^$key$/;
}
}
undef;
}
sub get_max_length {
my $max_len = 0;
my $max_len_comment = 0;
foreach (@{$_[0]}) {
$max_len = length($_->[0]) if length($_->[0]) > $max_len;
$max_len_comment = length($_->[1]) if length($_->[1]) > $max_len_comment;
}
($max_len, $max_len_comment);
}