Catch up to 20 years of HTML and URL changes.
This commit is contained in:
parent
2f8c60d111
commit
e0e5768dcf
|
@ -1,11 +1,49 @@
|
|||
# $NetBSD: nanpa.awk,v 1.2 2003/03/13 02:55:01 jhawk Exp $
|
||||
# $NetBSD: nanpa.awk,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
|
||||
#
|
||||
# todo:
|
||||
# parse "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa?
|
||||
# function=list_npa_introduced" to produce parenthetical
|
||||
# notes about what area codes are overlayed by others
|
||||
# (or split from).
|
||||
# parse "https://nationalnanpa.com/nanp1/npa_report.csv"
|
||||
# instead of scraping HTML.
|
||||
#
|
||||
function trim(s)
|
||||
{
|
||||
gsub(/^[ \t]+|[ \t]+$/, "", s);
|
||||
return s;
|
||||
}
|
||||
function mapinit(postdb)
|
||||
{
|
||||
while ((getline < postdb) > 0) {
|
||||
sub(/#.*/, "");
|
||||
if (length($0)==0) continue;
|
||||
NF=split($0, f);
|
||||
location[f[1]] = f[2];
|
||||
flocation[tolower(f[2])] = f[2];
|
||||
country[f[1]] = f[4];
|
||||
fcountry[tolower(f[2])] = f[4];
|
||||
}
|
||||
}
|
||||
function countrymap(s)
|
||||
{
|
||||
if (s == "CA") return "Canada";
|
||||
if (s == "US") return "USA";
|
||||
return s;
|
||||
}
|
||||
function locationmap(s, t)
|
||||
{
|
||||
if (s in location) {
|
||||
t = location[s];
|
||||
if (s in country) {
|
||||
t = t " (" countrymap(country[s]) ")";
|
||||
}
|
||||
} else if (tolower(s) in flocation) {
|
||||
t = flocation[tolower(s)];
|
||||
if (tolower(s) in fcountry) {
|
||||
t = t " (" countrymap(fcountry[tolower(s)]) ")";
|
||||
}
|
||||
} else {
|
||||
t = s;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
function parse(file, ispipe, isplanning, i, planinit, t)
|
||||
{
|
||||
planinit = 0;
|
||||
|
@ -13,30 +51,30 @@ function parse(file, ispipe, isplanning, i, planinit, t)
|
|||
sub(/#.*/, "");
|
||||
if (length($0)==0) continue;
|
||||
if (isplanning) {
|
||||
split($0, f);
|
||||
if (!planinit && f[2]=="NEW NPA") {
|
||||
NF=split($0, f);
|
||||
if (!planinit && f[2]=="New NPA") {
|
||||
planinit=1;
|
||||
for (i=1; i<=NF; i++)
|
||||
fnames[$i]=i-1;
|
||||
} else if (planinit && length(f[fnames["NEW NPA"]])>1) {
|
||||
t = f[fnames["LOCATION"]] FS;
|
||||
if (f[fnames["OVERLAY?"]]=="Yes")
|
||||
t = t "Overlay of " f[fnames["OLD NPA"]];
|
||||
else if (f[fnames["OLD NPA"]])
|
||||
t = t "Split of " f[fnames["OLD NPA"]];
|
||||
if (f[fnames["STATUS"]])
|
||||
t = t " (" f[fnames["STATUS"]] ")";
|
||||
if (length(f[fnames["IN SERVICE DATE"]]) > 1)
|
||||
fnames[f[i]]=i-1;
|
||||
} else if (planinit && length(f[fnames["New NPA"]])>1) {
|
||||
t = locationmap(trim(f[fnames["Location"]])) FS;
|
||||
if (trim(f[fnames["Overlay?"]])=="Yes")
|
||||
t = t "Overlay of " trim(f[fnames["Old NPA"]]);
|
||||
else if (f[fnames["Old NPA"]])
|
||||
t = t "Split of " trim(f[fnames["Old NPA"]]);
|
||||
if (f[fnames["Status"]])
|
||||
t = t " (" trim(f[fnames["Status"]]) ")";
|
||||
if (length(f[fnames["In Service Date"]]) > 1)
|
||||
t = t " effective " \
|
||||
f[fnames["IN SERVICE DATE"]];
|
||||
data[f[fnames["NEW NPA"]] "*"] = t;
|
||||
trim(f[fnames["In Service Date"]]);
|
||||
data[trim(f[fnames["New NPA"]]) "*"] = t;
|
||||
}
|
||||
} else {
|
||||
# digits only
|
||||
match($0, /^[0-9]/);
|
||||
if (RSTART==0) continue;
|
||||
i=index($0, FS);
|
||||
data[substr($0, 1, i-1)]=substr($0,i+1);
|
||||
data[substr($0, 1, i-1)]=locationmap(trim(substr($0,i+1)));
|
||||
}
|
||||
}
|
||||
close(file);
|
||||
|
@ -44,8 +82,9 @@ function parse(file, ispipe, isplanning, i, planinit, t)
|
|||
|
||||
BEGIN{
|
||||
FS=":"
|
||||
mapinit("na.postal");
|
||||
print "# $""NetBSD: $";
|
||||
print "# Generated from http://www.nanpa.com/area_codes/index.html";
|
||||
print "# Generated from https://nationalnanpa.com/area_codes/index.html";
|
||||
print "# (with local exceptions)";
|
||||
print "# ";
|
||||
print "# format:";
|
||||
|
@ -54,14 +93,14 @@ BEGIN{
|
|||
print "# A * in the Area Code field indicates a future area code."
|
||||
print "# ";
|
||||
parse("ftp -o - " \
|
||||
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
|
||||
"function=list_npa_geo_number | sed -f nanpa.sed", 1, 0);
|
||||
"https://nationalnanpa.com/enas/geoAreaCodeNumberReport.do" \
|
||||
" | sed -f nanpa.sed", 1, 0);
|
||||
parse("ftp -o - " \
|
||||
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
|
||||
"function=list_npa_non_geo | sed -f nanpa.sed", 1, 0);
|
||||
"https://nationalnanpa.com/enas/nonGeoNpaServiceReport.do" \
|
||||
" | sed -f nanpa.sed", 1, 0);
|
||||
parse("ftp -o - " \
|
||||
"http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
|
||||
"function=list_npa_not_in_service | sed -f nanpa.sed", 1, 1);
|
||||
"https://nationalnanpa.com/enas/plannedNpasNotInServiceReport.do" \
|
||||
" | sed -f nanpa.sed", 1, 1);
|
||||
parse("na.phone.add", 0, 0);
|
||||
sort="sort -n";
|
||||
for (i in data)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
|
||||
# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
|
||||
#
|
||||
# Parse HTML tables output by
|
||||
# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
|
||||
|
@ -34,7 +34,7 @@ s/\$$//
|
|||
# Remove lines not starting with <TR>
|
||||
/<[Tt][Rr][^>]*>/!d
|
||||
# Replace all <TD> with colon
|
||||
s/[ ]*<TD[^>]*> */:/g
|
||||
s/[ ]*<[Tt][Dd][^>]*> */:/g
|
||||
# Strip all HTML tags
|
||||
s/<[^>]*>//g
|
||||
# Handle HTML characters
|
||||
|
@ -42,7 +42,9 @@ s/ / /g
|
|||
# Compress spaces/tabs
|
||||
s/[ ][ ]*/ /g
|
||||
# Strip leading colons
|
||||
s/^://
|
||||
s/://
|
||||
# Strip leading/trailing whitespace
|
||||
s/^ //
|
||||
s/ *//
|
||||
s/ $//
|
||||
# Strip HTML comments
|
||||
s/^--.*$//
|
||||
|
|
Loading…
Reference in New Issue