Shrink Unicode category table.

Missing entries can implicitly be considered "unassigned".

Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel@j-davis.com
This commit is contained in:
Jeff Davis 2023-12-07 15:44:03 -08:00
parent d16a0c1e2e
commit 719b342d36
3 changed files with 15 additions and 723 deletions

View File

@ -72,7 +72,10 @@ while (my $line = <$FH>)
# the current range, emit the current range and initialize a new # the current range, emit the current range and initialize a new
# range representing the gap. # range representing the gap.
if ($range_end + 1 != $code && $range_category ne $gap_category) { if ($range_end + 1 != $code && $range_category ne $gap_category) {
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
category => $range_category});
}
$range_start = $range_end + 1; $range_start = $range_end + 1;
$range_end = $code - 1; $range_end = $code - 1;
$range_category = $gap_category; $range_category = $gap_category;
@ -80,7 +83,10 @@ while (my $line = <$FH>)
# different category; new range # different category; new range
if ($range_category ne $category) { if ($range_category ne $category) {
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
category => $range_category});
}
$range_start = $code; $range_start = $code;
$range_end = $code; $range_end = $code;
$range_category = $category; $range_category = $category;
@ -109,14 +115,9 @@ die "<..., First> entry with no corresponding <..., Last> entry"
if $gap_category ne $CATEGORY_UNASSIGNED; if $gap_category ne $CATEGORY_UNASSIGNED;
# emit final range # emit final range
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
# emit range for any unassigned code points after last entry category => $range_category});
if ($range_end < 0x10FFFF) {
$range_start = $range_end + 1;
$range_end = 0x10FFFF;
$range_category = $CATEGORY_UNASSIGNED;
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
} }
my $num_ranges = scalar @category_ranges; my $num_ranges = scalar @category_ranges;

View File

@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
int mid; int mid;
int max = lengthof(unicode_categories) - 1; int max = lengthof(unicode_categories) - 1;
Assert(ucs >= unicode_categories[0].first && Assert(ucs <= 0x10ffff);
ucs <= unicode_categories[max].last);
while (max >= min) while (max >= min)
{ {
@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
return unicode_categories[mid].category; return unicode_categories[mid].category;
} }
Assert(false); return PG_U_UNASSIGNED;
return (pg_unicode_category) - 1;
} }
/* /*

File diff suppressed because it is too large Load Diff