2023-01-28 16:12:16 +03:00
|
|
|
# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
|
2003-03-03 04:13:36 +03:00
|
|
|
#
|
|
|
|
# Parse HTML tables output by
|
|
|
|
# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
|
|
|
|
# Specifically, for each html table row (TR),
|
2006-12-25 21:39:47 +03:00
|
|
|
# print the <TD> elements separated by colons.
|
2003-03-03 04:13:36 +03:00
|
|
|
#
|
|
|
|
# This could break on HTML comments.
|
|
|
|
#
|
|
|
|
:top
|
|
|
|
# Strip ^Ms
|
|
|
|
s/
//g
|
|
|
|
# Join all lines with unterminated HTML tags
|
|
|
|
/<[^>]*$/{
|
|
|
|
N
|
|
|
|
b top
|
|
|
|
}
|
|
|
|
# Replace all </TR> with EOL tag
|
|
|
|
s;</[Tt][Rr]>;$;g
|
|
|
|
# Join lines with only <TR>.
|
|
|
|
/<[Tt][Rr][^>]*>$/{
|
|
|
|
N
|
|
|
|
s/\n//g
|
|
|
|
b top
|
|
|
|
}
|
|
|
|
# Also, join all lines starting with <TR>.
|
|
|
|
/<[TtRr][^>]*>[^$]*$/{
|
|
|
|
N
|
|
|
|
s/\n//g
|
|
|
|
b top
|
|
|
|
}
|
|
|
|
# Remove EOL markers
|
|
|
|
s/\$$//
|
|
|
|
# Remove lines not starting with <TR>
|
|
|
|
/<[Tt][Rr][^>]*>/!d
|
|
|
|
# Replace all <TD> with colon
|
2023-01-28 16:12:16 +03:00
|
|
|
s/[ ]*<[Tt][Dd][^>]*> */:/g
|
2003-03-03 04:13:36 +03:00
|
|
|
# Strip all HTML tags
|
|
|
|
s/<[^>]*>//g
|
|
|
|
# Handle HTML characters
|
|
|
|
s/ / /g
|
|
|
|
# Compress spaces/tabs
|
|
|
|
s/[ ][ ]*/ /g
|
|
|
|
# Strip leading colons
|
2023-01-28 16:12:16 +03:00
|
|
|
s/://
|
2003-03-03 04:13:36 +03:00
|
|
|
# Strip leading/trailing whitespace
|
2023-01-28 16:12:16 +03:00
|
|
|
s/ *//
|
2003-03-03 04:13:36 +03:00
|
|
|
s/ $//
|
2023-01-28 16:12:16 +03:00
|
|
|
# Strip HTML comments
|
|
|
|
s/^--.*$//
|