NetBSD/usr.bin/fmt/fmt.c
2017-10-13 00:11:56 +00:00

558 lines
12 KiB
C

/* $NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $ */
/*
* Copyright (c) 1980, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1980, 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
#endif
__RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $");
#endif /* not lint */
#include <wctype.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <err.h>
#include <limits.h>
#include <string.h>
#include <locale.h>
#include "buffer.h"
/*
* fmt -- format the concatenation of input files or standard input
* onto standard output. Designed for use with Mail ~|
*
* Syntax : fmt [ goal [ max ] ] [ name ... ]
* Authors: Kurt Shoens (UCB) 12/7/78;
* Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
*/
/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
#define GOAL_LENGTH 65
#define MAX_LENGTH 75
static size_t goal_length; /* Target or goal line length in output */
static size_t max_length; /* Max line length in output */
static size_t pfx; /* Current leading blank count */
static int raw; /* Don't treat mail specially */
static int lineno; /* Current input line */
static int mark; /* Last place we saw a head line */
static int center;
static struct buffer outbuf;
static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL };
static void usage(void) __dead;
static int getnum(const char *, const char *, size_t *, int);
static void fmt(FILE *);
static int ispref(const wchar_t *, const wchar_t *);
static void leadin(void);
static void oflush(void);
static void pack(const wchar_t *, size_t);
static void prefix(const struct buffer *, int);
static void split(const wchar_t *, int);
static void tabulate(struct buffer *);
int ishead(const wchar_t *);
/*
* Drive the whole formatter by managing input files. Also,
* cause initialization of the output stuff and flush it out
* at the end.
*/
int
main(int argc, char **argv)
{
FILE *fi;
int errs = 0;
int compat = 1;
int c;
goal_length = GOAL_LENGTH;
max_length = MAX_LENGTH;
buf_init(&outbuf);
lineno = 1;
mark = -10;
setprogname(*argv);
(void)setlocale(LC_ALL, "");
while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1)
switch (c) {
case 'C':
center++;
break;
case 'g':
(void)getnum(optarg, "goal", &goal_length, 1);
compat = 0;
break;
case 'm':
case 'w':
(void)getnum(optarg, "max", &max_length, 1);
compat = 0;
break;
case 'r':
raw++;
break;
default:
usage();
}
argc -= optind;
argv += optind;
/*
* compatibility with old usage.
*/
if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
argv++;
argc--;
if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
argv++;
argc--;
}
}
if (max_length <= goal_length) {
errx(1, "Max length (%zu) must be greater than goal "
"length (%zu)", max_length, goal_length);
}
if (argc == 0) {
fmt(stdin);
oflush();
return 0;
}
for (;argc; argc--, argv++) {
if ((fi = fopen(*argv, "r")) == NULL) {
warn("Cannot open `%s'", *argv);
errs++;
continue;
}
fmt(fi);
(void)fclose(fi);
}
oflush();
buf_end(&outbuf);
return errs;
}
static void
usage(void)
{
(void)fprintf(stderr,
"Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n"
"\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
getprogname(), getprogname());
exit(1);
}
static int
getnum(const char *str, const char *what, size_t *res, int badnum)
{
unsigned long ul;
char *ep;
errno = 0;
ul = strtoul(str, &ep, 0);
if (*str != '\0' && *ep == '\0') {
if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
errx(1, "%s number `%s' too big", what, str);
*res = (size_t)ul;
return 1;
} else if (badnum)
errx(1, "Bad %s number `%s'", what, str);
return 0;
}
/*
* Read up characters from the passed input file, forming lines,
* doing ^H processing, expanding tabs, stripping trailing blanks,
* and sending each line down for analysis.
*/
static void
fmt(FILE *fi)
{
struct buffer lbuf, cbuf;
wchar_t *cp, *cp2;
wint_t c;
int add_space;
size_t len, col, i;
if (center) {
for (;;) {
cp = fgetwln(fi, &len);
if (!cp)
return;
/* skip over leading space */
while (len > 0) {
if (!iswspace(*cp))
break;
cp++;
len--;
}
/* clear trailing space */
while (len > 0) {
if (!iswspace((unsigned char)cp[len-1]))
break;
len--;
}
if (len == 0) {
/* blank line */
(void)putwchar(L'\n');
continue;
}
if (goal_length > len) {
for (i = 0; i < (goal_length - len) / 2; i++) {
(void)putwchar(L' ');
}
}
for (i = 0; i < len; i++) {
(void)putwchar(cp[i]);
}
(void)putwchar(L'\n');
}
}
buf_init(&lbuf);
buf_init(&cbuf);
c = getwc(fi);
while (c != WEOF) {
/*
* Collect a line, doing ^H processing.
* Leave tabs for now.
*/
buf_reset(&lbuf);
while (c != '\n' && c != WEOF) {
if (c == '\b') {
(void)buf_unputc(&lbuf);
c = getwc(fi);
continue;
}
if(!(iswprint(c) || c == '\t' || c >= 160)) {
c = getwc(fi);
continue;
}
buf_putc(&lbuf, c);
c = getwc(fi);
}
buf_putc(&lbuf, '\0');
(void)buf_unputc(&lbuf);
add_space = c != WEOF;
/*
* Expand tabs on the way.
*/
col = 0;
cp = lbuf.bptr;
buf_reset(&cbuf);
while ((c = *cp++) != '\0') {
if (c != '\t') {
col++;
buf_putc(&cbuf, c);
continue;
}
do {
buf_putc(&cbuf, ' ');
col++;
} while ((col & 07) != 0);
}
/*
* Swipe trailing blanks from the line.
*/
for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
continue;
cbuf.ptr = cp2 + 1;
buf_putc(&cbuf, '\0');
(void)buf_unputc(&cbuf);
prefix(&cbuf, add_space);
if (c != WEOF)
c = getwc(fi);
}
buf_end(&cbuf);
buf_end(&lbuf);
}
/*
* Take a line devoid of tabs and other garbage and determine its
* blank prefix. If the indent changes, call for a linebreak.
* If the input line is blank, echo the blank line on the output.
* Finally, if the line minus the prefix is a mail header, try to keep
* it on a line by itself.
*/
static void
prefix(const struct buffer *buf, int add_space)
{
const wchar_t *cp;
const wchar_t **hp;
size_t np;
int h;
if (buf->ptr == buf->bptr) {
oflush();
(void)putwchar(L'\n');
return;
}
for (cp = buf->bptr; *cp == ' '; cp++)
continue;
np = cp - buf->bptr;
/*
* The following horrible expression attempts to avoid linebreaks
* when the indent changes due to a paragraph.
*/
if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
oflush();
if (!raw) {
if ((h = ishead(cp)) != 0) {
oflush();
mark = lineno;
}
if (lineno - mark < 3 && lineno - mark > 0)
for (hp = &headnames[0]; *hp != NULL; hp++)
if (ispref(*hp, cp)) {
h = 1;
oflush();
break;
}
if (!h && (h = (*cp == '.')))
oflush();
} else
h = 0;
pfx = np;
if (h) {
pack(cp, (size_t)(buf->ptr - cp));
oflush();
} else
split(cp, add_space);
lineno++;
}
/*
* Split up the passed line into output "words" which are
* maximal strings of non-blanks with the blank separation
* attached at the end. Pass these words along to the output
* line packer.
*/
static void
split(const wchar_t line[], int add_space)
{
const wchar_t *cp;
struct buffer word;
size_t wlen;
buf_init(&word);
cp = line;
while (*cp) {
buf_reset(&word);
wlen = 0;
/*
* Collect a 'word,' allowing it to contain escaped white
* space.
*/
while (*cp && *cp != ' ') {
if (*cp == '\\' && iswspace(cp[1]))
buf_putc(&word, *cp++);
buf_putc(&word, *cp++);
wlen++;
}
/*
* Guarantee a space at end of line. Two spaces after end of
* sentence punctuation.
*/
if (*cp == '\0' && add_space) {
buf_putc(&word, ' ');
if (strchr(".:!", cp[-1]))
buf_putc(&word, ' ');
}
while (*cp == ' ')
buf_putc(&word, *cp++);
buf_putc(&word, '\0');
(void)buf_unputc(&word);
pack(word.bptr, wlen);
}
buf_end(&word);
}
/*
* Output section.
* Build up line images from the words passed in. Prefix
* each line with correct number of blanks.
*
* At the bottom of this whole mess, leading tabs are reinserted.
*/
/*
* Pack a word onto the output line. If this is the beginning of
* the line, push on the appropriately-sized string of blanks first.
* If the word won't fit on the current line, flush and begin a new
* line. If the word is too long to fit all by itself on a line,
* just give it its own and hope for the best.
*
* LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
* goal length, take it. If not, then check to see if the line
* will be over the max length; if so put the word on the next
* line. If not, check to see if the line will be closer to the
* goal length with or without the word and take it or put it on
* the next line accordingly.
*/
static void
pack(const wchar_t *word, size_t wlen)
{
const wchar_t *cp;
size_t s, t;
if (outbuf.bptr == outbuf.ptr)
leadin();
/*
* LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
* length of the line before the word is added; t is now the length
* of the line after the word is added
*/
s = outbuf.ptr - outbuf.bptr;
t = wlen + s;
if ((t <= goal_length) || ((t <= max_length) &&
(s <= goal_length) && (t - goal_length <= goal_length - s))) {
/*
* In like flint!
*/
for (cp = word; *cp;)
buf_putc(&outbuf, *cp++);
return;
}
if (s > pfx) {
oflush();
leadin();
}
for (cp = word; *cp;)
buf_putc(&outbuf, *cp++);
}
/*
* If there is anything on the current output line, send it on
* its way. Reset outbuf.
*/
static void
oflush(void)
{
if (outbuf.bptr == outbuf.ptr)
return;
buf_putc(&outbuf, '\0');
(void)buf_unputc(&outbuf);
tabulate(&outbuf);
buf_reset(&outbuf);
}
/*
* Take the passed line buffer, insert leading tabs where possible, and
* output on standard output (finally).
*/
static void
tabulate(struct buffer *buf)
{
wchar_t *cp;
size_t b, t;
/*
* Toss trailing blanks in the output line.
*/
for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
continue;
*++cp = '\0';
/*
* Count the leading blank space and tabulate.
*/
for (cp = buf->bptr; *cp == ' '; cp++)
continue;
b = cp - buf->bptr;
t = b / 8;
b = b % 8;
if (t > 0)
do
(void)putwchar(L'\t');
while (--t);
if (b > 0)
do
(void)putwchar(L' ');
while (--b);
while (*cp)
(void)putwchar(*cp++);
(void)putwchar(L'\n');
}
/*
* Initialize the output line with the appropriate number of
* leading blanks.
*/
static void
leadin(void)
{
size_t b;
buf_reset(&outbuf);
for (b = 0; b < pfx; b++)
buf_putc(&outbuf, ' ');
}
/*
* Is s1 a prefix of s2??
*/
static int
ispref(const wchar_t *s1, const wchar_t *s2)
{
while (*s1++ == *s2)
continue;
return *s1 == '\0';
}