mirror of
https://github.com/lexborisov/Modest
synced 2024-11-22 22:01:57 +03:00
235 lines
7.6 KiB
Perl
Executable File
235 lines
7.6 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
|
|
BEGIN {
|
|
use FindBin;
|
|
push @INC, $FindBin::Bin. "/../ext/";
|
|
};
|
|
|
|
use utf8;
|
|
use strict;
|
|
use MyHTML::Base;
|
|
use JSON::XS;
|
|
|
|
my $static_list_index_length = 419;
|
|
|
|
my $utils = MyHTML::Base->new(dirs => {source => "../../source/myhtml", template => "tmpl"});
|
|
my $raw_data = $utils->read_tmpl_raw("encoding/encodings.json");
|
|
my $encodings = decode_json(join("", @$raw_data));
|
|
#test_result();
|
|
|
|
my ($result, $index) = create_result($encodings, $static_list_index_length);
|
|
print_enum($index);
|
|
#print_for_test_meta($result);
|
|
|
|
#my $static_list = create_static_list_index($result);
|
|
#print $static_list, "\n";
|
|
|
|
print "";
|
|
|
|
sub print_for_test_meta {
|
|
my ($result) = @_;
|
|
|
|
foreach my $key (sort {$a cmp $b} keys %$result) {
|
|
foreach my $encode_entry (sort {$a cmp $b} @{$result->{$key}})
|
|
{
|
|
print $encode_entry->[0], "\n";
|
|
print '<meta http-equiv="Content-Type" Content="text/html; charset=', $encode_entry->[1], '">', "\n\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub create_result {
|
|
my ($encodings, $static_list_index_length) = @_;
|
|
my $result = {};
|
|
my $index = {};
|
|
|
|
foreach my $entry (@$encodings) {
|
|
foreach my $encoding (@{$entry->{encodings}}) {
|
|
next if $encoding->{name} =~ /replacement/i;
|
|
|
|
$index->{ $encoding->{name} } = 1;
|
|
|
|
foreach my $label (sort {$a cmp $b} @{$encoding->{labels}}) {
|
|
my $id = get_index_id($label, $static_list_index_length);
|
|
|
|
push @{$result->{$id}}, [$encoding->{name}, $label, length($label), length($encoding->{name})];
|
|
}
|
|
}
|
|
}
|
|
|
|
($result, $index);
|
|
}
|
|
|
|
sub test_result {
|
|
my $op = [0, undef];
|
|
|
|
foreach my $idx (1..2048) {
|
|
my $result = create_result($encodings, $idx);
|
|
my $res_max = test_result_max_value($result, 0);
|
|
|
|
if(!defined($op->[1]) || $op->[1] > $res_max) {
|
|
$op->[0] = $idx;
|
|
$op->[1] = $res_max;
|
|
}
|
|
}
|
|
|
|
print "Best:\n";
|
|
print $op->[0], ": ", $op->[1], "\n";
|
|
}
|
|
|
|
sub test_result_max_value {
|
|
my ($res, $is_print) = @_;
|
|
|
|
my $max = 0;
|
|
foreach my $id (sort {scalar(@{$res->{$a}}) <=> scalar(@{$res->{$b}})} keys %$res) {
|
|
print "$id: ", scalar(@{$res->{$id}}), "\n" if $is_print;
|
|
|
|
$max = scalar(@{$res->{$id}}) if $max < scalar(@{$res->{$id}});
|
|
}
|
|
|
|
$max;
|
|
}
|
|
|
|
sub get_index_id {
|
|
use bytes;
|
|
|
|
my @chars = split //, $_[0];
|
|
my $f = ord(lc($chars[0]));
|
|
my $l = ord(lc($chars[-1]));
|
|
|
|
((($f * $l * scalar(@chars))) % $_[1]) + 1;
|
|
}
|
|
|
|
sub create_sub_static_list_index {
|
|
my ($result, $struct, $offset) = @_;
|
|
|
|
my @list_sorted = sort {$a->[2] <=> $b->[2]} @$result[0..$#$result];
|
|
|
|
foreach my $i (1..$#list_sorted) {
|
|
my $cur = $offset;
|
|
$offset++;
|
|
|
|
push @$struct, "\t{".
|
|
'"'. $list_sorted[$i]->[0] .'", '. $list_sorted[$i]->[3] .', '.
|
|
'"'. $list_sorted[$i]->[1] .'", '. $list_sorted[$i]->[2] .', '.
|
|
name_to_myhtml_encoding($list_sorted[$i]->[0]), ', '.
|
|
($i < $#list_sorted ? $offset : 0) .", $cur},\n";
|
|
}
|
|
|
|
$offset;
|
|
}
|
|
|
|
sub create_static_list_index {
|
|
my ($result) = @_;
|
|
|
|
my @res;
|
|
my $struct = [];
|
|
my $offset = $static_list_index_length + 1;
|
|
|
|
foreach my $i (0..$static_list_index_length)
|
|
{
|
|
if(exists $result->{$i}) {
|
|
my $id = 0;
|
|
|
|
if(scalar @{$result->{$i}} > 1) {
|
|
$offset = create_sub_static_list_index($result->{$i}, $struct, $offset);
|
|
|
|
$id = $offset - (@{$result->{$i}} - 1);
|
|
}
|
|
|
|
my @list_sorted = sort {$a->[2] <=> $b->[2]} @{$result->{$i}}[0..$#{$result->{$i}}];
|
|
|
|
push @res, "\t{".
|
|
'"'. $list_sorted[0]->[0] .'", '. $list_sorted[0]->[3] .', '.
|
|
'"'. $list_sorted[0]->[1] .'", '. $list_sorted[0]->[2] .', '.
|
|
name_to_myhtml_encoding($list_sorted[0]->[0]), ', '.
|
|
"$id, $i},\n";
|
|
}
|
|
else {
|
|
push @res, "\t{NULL, 0, NULL, 0, 0, 0, 0},\n";
|
|
}
|
|
}
|
|
|
|
"static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_entry_static_list_index[] = \n{\n". join("", @res, @$struct) ."};\n"
|
|
}
|
|
|
|
sub name_to_myhtml_encoding {
|
|
my ($name) = @_;
|
|
|
|
$name =~ s/[-]+/_/g;
|
|
$name = "MyHTML_ENCODING_". uc($name);
|
|
|
|
$name;
|
|
}
|
|
|
|
sub print_enum {
|
|
my ($index) = @_;
|
|
|
|
print "enum myhtml_encoding_list {\n\t";
|
|
|
|
my @vals;
|
|
push @vals, ["MyHTML_ENCODING_DEFAULT", "0x00", "utf_8"];
|
|
push @vals, ["MyHTML_ENCODING_AUTO", "0x01", "NULL"];
|
|
push @vals, ["MyHTML_ENCODING_NOT_DETERMINED", "0x02", "NULL"];
|
|
push @vals, ["MyHTML_ENCODING_UTF_8", "0x00", "utf_8"];
|
|
push @vals, ["MyHTML_ENCODING_UTF_16LE", "0x04", "utf_16le"];
|
|
push @vals, ["MyHTML_ENCODING_UTF_16BE", "0x05", , "utf_16be"];
|
|
push @vals, ["MyHTML_ENCODING_X_USER_DEFINED", "0x06", "x_user_defined"];
|
|
|
|
my $i = 7;
|
|
foreach my $id (sort {$a cmp $b} keys %$index) {
|
|
$id =~ s/[-\s]+/_/g;
|
|
next if $id =~ /UTF_8$/i;
|
|
next if $id =~ /UTF_16LE$/i;
|
|
next if $id =~ /UTF_16BE$/i;
|
|
next if $id =~ /X_USER_DEFINED$/i;
|
|
|
|
push @vals, ["MyHTML_ENCODING_". uc($id), sprintf("0x%02x", $i), lc($id)];
|
|
|
|
$i++;
|
|
}
|
|
|
|
push @vals, ["MyHTML_ENCODING_LAST_ENTRY", sprintf("0x%02x", $i), "NULL"];
|
|
|
|
print join(",\n\t", @{MyHTML::Base->format_list_text(\@vals, "= ")}), "\n";
|
|
|
|
print "}\ntypedef myhtml_encoding_t;\n\n";
|
|
|
|
print "static const myhtml_encoding_custom_f myhtml_encoding_function_index[] = \n{\n";
|
|
foreach my $entry (@vals) {
|
|
if($entry->[2] eq "NULL") {
|
|
print "\t", $entry->[2], ",\n";
|
|
} else {
|
|
print "\t", "myhtml_encoding_decode_", $entry->[2], ",\t// ", $entry->[0], "\n";
|
|
}
|
|
}
|
|
|
|
print "};\n";
|
|
|
|
print "static const myhtml_encoding_entry_name_index_t myhtml_encoding_entry_name_index_static_list_index[(MyHTML_ENCODING_LAST_ENTRY + 1)] =\n{\n";
|
|
|
|
foreach my $entry (sort {hex($a->[1]) <=> hex($b->[1])} @vals) {
|
|
$entry->[0] =~ s/MyHTML_ENCODING_//;
|
|
|
|
if($entry->[0] ne "SHIFT_JIS") {
|
|
$entry->[0] =~ s/_/-/g;
|
|
}
|
|
|
|
if($entry->[0] eq "LAST-ENTRY") {
|
|
$entry->[0] = "";
|
|
}
|
|
|
|
#if($entry->[0] eq "AUTO") {
|
|
# $entry->[0] = "";
|
|
#}
|
|
|
|
print "\t", '{"', $entry->[0], '", ', length($entry->[0]), "}", ",\n";
|
|
}
|
|
|
|
print "};\n";
|
|
}
|
|
|
|
|
|
|
|
|