diff --git a/share/misc/nanpa.sed b/share/misc/nanpa.sed new file mode 100644 index 000000000000..3de0db0cc3a7 --- /dev/null +++ b/share/misc/nanpa.sed @@ -0,0 +1,48 @@ +# $NetBSD: nanpa.sed,v 1.1 2003/03/03 01:13:36 jhawk Exp $ +# +# Parse HTML tables output by +# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa +# Specifically, for each html table row (TR), +# print the elements seperated by colons. +# +# This could break on HTML comments. +# +:top +# Strip ^Ms +s/ //g +# Join all lines with unterminated HTML tags +/<[^>]*$/{ + N + b top +} +# Replace all with EOL tag +s;;$;g +# Join lines with only . +/<[Tt][Rr][^>]*>$/{ + N + s/\n//g + b top +} +# Also, join all lines starting with . +/<[TtRr][^>]*>[^$]*$/{ + N + s/\n//g + b top +} +# Remove EOL markers +s/\$$// +# Remove lines not starting with +/<[Tt][Rr][^>]*>/!d +# Replace all with colon +s/[ ]*]*> */:/g +# Strip all HTML tags +s/<[^>]*>//g +# Handle HTML characters +s/ / /g +# Compress spaces/tabs +s/[ ][ ]*/ /g +# Strip leading colons +s/^:// +# Strip leading/trailing whitespace +s/^ // +s/ $//