Shrink Unicode category table.
Missing entries can implicitly be considered "unassigned". Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel@j-davis.com
This commit is contained in:
parent
d16a0c1e2e
commit
719b342d36
@ -72,7 +72,10 @@ while (my $line = <$FH>)
|
|||||||
# the current range, emit the current range and initialize a new
|
# the current range, emit the current range and initialize a new
|
||||||
# range representing the gap.
|
# range representing the gap.
|
||||||
if ($range_end + 1 != $code && $range_category ne $gap_category) {
|
if ($range_end + 1 != $code && $range_category ne $gap_category) {
|
||||||
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
if ($range_category ne $CATEGORY_UNASSIGNED) {
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end,
|
||||||
|
category => $range_category});
|
||||||
|
}
|
||||||
$range_start = $range_end + 1;
|
$range_start = $range_end + 1;
|
||||||
$range_end = $code - 1;
|
$range_end = $code - 1;
|
||||||
$range_category = $gap_category;
|
$range_category = $gap_category;
|
||||||
@ -80,7 +83,10 @@ while (my $line = <$FH>)
|
|||||||
|
|
||||||
# different category; new range
|
# different category; new range
|
||||||
if ($range_category ne $category) {
|
if ($range_category ne $category) {
|
||||||
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
if ($range_category ne $CATEGORY_UNASSIGNED) {
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end,
|
||||||
|
category => $range_category});
|
||||||
|
}
|
||||||
$range_start = $code;
|
$range_start = $code;
|
||||||
$range_end = $code;
|
$range_end = $code;
|
||||||
$range_category = $category;
|
$range_category = $category;
|
||||||
@ -109,14 +115,9 @@ die "<..., First> entry with no corresponding <..., Last> entry"
|
|||||||
if $gap_category ne $CATEGORY_UNASSIGNED;
|
if $gap_category ne $CATEGORY_UNASSIGNED;
|
||||||
|
|
||||||
# emit final range
|
# emit final range
|
||||||
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
if ($range_category ne $CATEGORY_UNASSIGNED) {
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end,
|
||||||
# emit range for any unassigned code points after last entry
|
category => $range_category});
|
||||||
if ($range_end < 0x10FFFF) {
|
|
||||||
$range_start = $range_end + 1;
|
|
||||||
$range_end = 0x10FFFF;
|
|
||||||
$range_category = $CATEGORY_UNASSIGNED;
|
|
||||||
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
my $num_ranges = scalar @category_ranges;
|
my $num_ranges = scalar @category_ranges;
|
||||||
|
@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
|
|||||||
int mid;
|
int mid;
|
||||||
int max = lengthof(unicode_categories) - 1;
|
int max = lengthof(unicode_categories) - 1;
|
||||||
|
|
||||||
Assert(ucs >= unicode_categories[0].first &&
|
Assert(ucs <= 0x10ffff);
|
||||||
ucs <= unicode_categories[max].last);
|
|
||||||
|
|
||||||
while (max >= min)
|
while (max >= min)
|
||||||
{
|
{
|
||||||
@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
|
|||||||
return unicode_categories[mid].category;
|
return unicode_categories[mid].category;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert(false);
|
return PG_U_UNASSIGNED;
|
||||||
return (pg_unicode_category) - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user