Shrink Unicode category table.

Missing entries can implicitly be considered "unassigned".

Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel@j-davis.com
This commit is contained in:
Jeff Davis 2023-12-07 15:44:03 -08:00
parent d16a0c1e2e
commit 719b342d36
3 changed files with 15 additions and 723 deletions

View File

@ -72,7 +72,10 @@ while (my $line = <$FH>)
# the current range, emit the current range and initialize a new
# range representing the gap.
if ($range_end + 1 != $code && $range_category ne $gap_category) {
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
category => $range_category});
}
$range_start = $range_end + 1;
$range_end = $code - 1;
$range_category = $gap_category;
@ -80,7 +83,10 @@ while (my $line = <$FH>)
# different category; new range
if ($range_category ne $category) {
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
category => $range_category});
}
$range_start = $code;
$range_end = $code;
$range_category = $category;
@ -109,14 +115,9 @@ die "<..., First> entry with no corresponding <..., Last> entry"
if $gap_category ne $CATEGORY_UNASSIGNED;
# emit final range
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
# emit range for any unassigned code points after last entry
if ($range_end < 0x10FFFF) {
$range_start = $range_end + 1;
$range_end = 0x10FFFF;
$range_category = $CATEGORY_UNASSIGNED;
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
if ($range_category ne $CATEGORY_UNASSIGNED) {
push(@category_ranges, {start => $range_start, end => $range_end,
category => $range_category});
}
my $num_ranges = scalar @category_ranges;

View File

@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
int mid;
int max = lengthof(unicode_categories) - 1;
Assert(ucs >= unicode_categories[0].first &&
ucs <= unicode_categories[max].last);
Assert(ucs <= 0x10ffff);
while (max >= min)
{
@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
return unicode_categories[mid].category;
}
Assert(false);
return (pg_unicode_category) - 1;
return PG_U_UNASSIGNED;
}
/*

File diff suppressed because it is too large Load Diff