Catch up to 20 years of HTML and URL changes.

This commit is contained in:
jmcneill 2023-01-28 13:12:16 +00:00
parent 2f8c60d111
commit e0e5768dcf
2 changed files with 72 additions and 31 deletions

View File

@ -1,11 +1,49 @@
# $NetBSD: nanpa.awk,v 1.2 2003/03/13 02:55:01 jhawk Exp $
# $NetBSD: nanpa.awk,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
#
# todo:
# parse "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa?
# function=list_npa_introduced" to produce parenthetical
# notes about what area codes are overlayed by others
# (or split from).
# parse "https://nationalnanpa.com/nanp1/npa_report.csv"
# instead of scraping HTML.
#
function trim(s)
{
gsub(/^[ \t]+|[ \t]+$/, "", s);
return s;
}
function mapinit(postdb)
{
while ((getline < postdb) > 0) {
sub(/#.*/, "");
if (length($0)==0) continue;
NF=split($0, f);
location[f[1]] = f[2];
flocation[tolower(f[2])] = f[2];
country[f[1]] = f[4];
fcountry[tolower(f[2])] = f[4];
}
}
function countrymap(s)
{
if (s == "CA") return "Canada";
if (s == "US") return "USA";
return s;
}
function locationmap(s, t)
{
if (s in location) {
t = location[s];
if (s in country) {
t = t " (" countrymap(country[s]) ")";
}
} else if (tolower(s) in flocation) {
t = flocation[tolower(s)];
if (tolower(s) in fcountry) {
t = t " (" countrymap(fcountry[tolower(s)]) ")";
}
} else {
t = s;
}
return t;
}
function parse(file, ispipe, isplanning, i, planinit, t)
{
planinit = 0;
@ -13,30 +51,30 @@ function parse(file, ispipe, isplanning, i, planinit, t)
sub(/#.*/, "");
if (length($0)==0) continue;
if (isplanning) {
split($0, f);
if (!planinit && f[2]=="NEW NPA") {
NF=split($0, f);
if (!planinit && f[2]=="New NPA") {
planinit=1;
for (i=1; i<=NF; i++)
fnames[$i]=i-1;
} else if (planinit && length(f[fnames["NEW NPA"]])>1) {
t = f[fnames["LOCATION"]] FS;
if (f[fnames["OVERLAY?"]]=="Yes")
t = t "Overlay of " f[fnames["OLD NPA"]];
else if (f[fnames["OLD NPA"]])
t = t "Split of " f[fnames["OLD NPA"]];
if (f[fnames["STATUS"]])
t = t " (" f[fnames["STATUS"]] ")";
if (length(f[fnames["IN SERVICE DATE"]]) > 1)
fnames[f[i]]=i-1;
} else if (planinit && length(f[fnames["New NPA"]])>1) {
t = locationmap(trim(f[fnames["Location"]])) FS;
if (trim(f[fnames["Overlay?"]])=="Yes")
t = t "Overlay of " trim(f[fnames["Old NPA"]]);
else if (f[fnames["Old NPA"]])
t = t "Split of " trim(f[fnames["Old NPA"]]);
if (f[fnames["Status"]])
t = t " (" trim(f[fnames["Status"]]) ")";
if (length(f[fnames["In Service Date"]]) > 1)
t = t " effective " \
f[fnames["IN SERVICE DATE"]];
data[f[fnames["NEW NPA"]] "*"] = t;
trim(f[fnames["In Service Date"]]);
data[trim(f[fnames["New NPA"]]) "*"] = t;
}
} else {
# digits only
match($0, /^[0-9]/);
if (RSTART==0) continue;
i=index($0, FS);
data[substr($0, 1, i-1)]=substr($0,i+1);
data[substr($0, 1, i-1)]=locationmap(trim(substr($0,i+1)));
}
}
close(file);
@ -44,8 +82,9 @@ function parse(file, ispipe, isplanning, i, planinit, t)
BEGIN{
FS=":"
mapinit("na.postal");
print "# $""NetBSD: $";
print "# Generated from http://www.nanpa.com/area_codes/index.html";
print "# Generated from https://nationalnanpa.com/area_codes/index.html";
print "# (with local exceptions)";
print "# ";
print "# format:";
@ -54,14 +93,14 @@ BEGIN{
print "# A * in the Area Code field indicates a future area code."
print "# ";
parse("ftp -o - " \
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
"function=list_npa_geo_number | sed -f nanpa.sed", 1, 0);
"https://nationalnanpa.com/enas/geoAreaCodeNumberReport.do" \
" | sed -f nanpa.sed", 1, 0);
parse("ftp -o - " \
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
"function=list_npa_non_geo | sed -f nanpa.sed", 1, 0);
"https://nationalnanpa.com/enas/nonGeoNpaServiceReport.do" \
" | sed -f nanpa.sed", 1, 0);
parse("ftp -o - " \
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
"function=list_npa_not_in_service | sed -f nanpa.sed", 1, 1);
"https://nationalnanpa.com/enas/plannedNpasNotInServiceReport.do" \
" | sed -f nanpa.sed", 1, 1);
parse("na.phone.add", 0, 0);
sort="sort -n";
for (i in data)

View File

@ -1,4 +1,4 @@
# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
#
# Parse HTML tables output by
# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
@ -34,7 +34,7 @@ s/\$$//
# Remove lines not starting with <TR>
/<[Tt][Rr][^>]*>/!d
# Replace all <TD> with colon
s/[ ]*<TD[^>]*> */:/g
s/[ ]*<[Tt][Dd][^>]*> */:/g
# Strip all HTML tags
s/<[^>]*>//g
# Handle HTML characters
@ -42,7 +42,9 @@ s/&nbsp;/ /g
# Compress spaces/tabs
s/[ ][ ]*/ /g
# Strip leading colons
s/^://
s/://
# Strip leading/trailing whitespace
s/^ //
s/ *//
s/ $//
# Strip HTML comments
s/^--.*$//