PR/20818: Martin Husemann: Split does not handle >= 2G files.

This commit is contained in:
christos 2003-03-20 14:12:49 +00:00
parent 91ce94db76
commit 2bdc2add9c
2 changed files with 64 additions and 45 deletions

View File

@ -1,6 +1,7 @@
# $NetBSD: Makefile,v 1.3 1994/12/21 08:20:33 jtc Exp $ # $NetBSD: Makefile,v 1.4 2003/03/20 14:12:49 christos Exp $
# @(#)Makefile 8.1 (Berkeley) 6/6/93 # @(#)Makefile 8.1 (Berkeley) 6/6/93
WARNS=2
PROG= split PROG= split
.include <bsd.prog.mk> .include <bsd.prog.mk>

View File

@ -1,4 +1,4 @@
/* $NetBSD: split.c,v 1.8 1999/11/02 10:55:47 lukem Exp $ */ /* $NetBSD: split.c,v 1.9 2003/03/20 14:12:50 christos Exp $ */
/* /*
* Copyright (c) 1987, 1993, 1994 * Copyright (c) 1987, 1993, 1994
@ -43,7 +43,7 @@ __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\n\
#if 0 #if 0
static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94";
#endif #endif
__RCSID("$NetBSD: split.c,v 1.8 1999/11/02 10:55:47 lukem Exp $"); __RCSID("$NetBSD: split.c,v 1.9 2003/03/20 14:12:50 christos Exp $");
#endif /* not lint */ #endif /* not lint */
#include <sys/param.h> #include <sys/param.h>
@ -56,28 +56,26 @@ __RCSID("$NetBSD: split.c,v 1.8 1999/11/02 10:55:47 lukem Exp $");
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
#define DEFLINE 1000 /* Default num lines per file. */ #define DEFLINE 1000 /* Default num lines per file. */
long bytecnt; /* Byte count to split on. */ static int file_open; /* If a file open. */
long numlines; /* Line count to split on. */ static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
int file_open; /* If a file open. */ static char fname[MAXPATHLEN]; /* File name prefix. */
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
char bfr[MAXBSIZE]; /* I/O buffer. */
char fname[MAXPATHLEN]; /* File name prefix. */
int main __P((int, char **)); int main(int, char **);
void newfile __P((void)); static void newfile(void);
void split1 __P((void)); static void split1(unsigned long long);
void split2 __P((void)); static void split2(unsigned long long);
void usage __P((void)); static void usage(void) __attribute__((__noreturn__));
static unsigned long long bigwrite __P((int, const void *, unsigned long long));
int int
main(argc, argv) main(int argc, char *argv[])
int argc;
char *argv[];
{ {
int ch; int ch;
char *ep, *p; char *ep, *p;
unsigned long long bytecnt = 0; /* Byte count to split on. */
unsigned long long numlines = 0;/* Line count to split on. */
while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1) while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
switch (ch) { switch (ch) {
@ -105,18 +103,18 @@ main(argc, argv)
ifd = 0; ifd = 0;
break; break;
case 'b': /* Byte count. */ case 'b': /* Byte count. */
if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 || if ((bytecnt = strtoull(optarg, &ep, 10)) <= 0 ||
(*ep != '\0' && *ep != 'k' && *ep != 'm')) (*ep != '\0' && *ep != 'k' && *ep != 'm'))
errx(1, "%s: illegal byte count.", optarg); errx(1, "%s: illegal byte count.", optarg);
if (*ep == 'k') if (*ep == 'k')
bytecnt *= 1024; bytecnt *= 1024;
else if (*ep == 'm') else if (*ep == 'm')
bytecnt *= 1048576; bytecnt *= 1024 * 1024;
break; break;
case 'l': /* Line count. */ case 'l': /* Line count. */
if (numlines != 0) if (numlines != 0)
usage(); usage();
if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) if ((numlines = strtoull(optarg, &ep, 10)) <= 0 || *ep)
errx(1, "%s: illegal line count.", optarg); errx(1, "%s: illegal line count.", optarg);
break; break;
default: default:
@ -145,23 +143,24 @@ main(argc, argv)
ifd = 0; ifd = 0;
if (bytecnt) { if (bytecnt) {
split1(); split1(bytecnt);
exit (0); } else {
split2(numlines);
} }
split2(); return 0;
exit(0);
} }
/* /*
* split1 -- * split1 --
* Split the input by bytes. * Split the input by bytes.
*/ */
void static void
split1() split1(unsigned long long bytecnt)
{ {
long bcnt; unsigned long long bcnt, dist;
int dist, len; ssize_t len;
char *C; char *C;
char bfr[MAXBSIZE];
for (bcnt = 0;;) for (bcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) { switch (len = read(ifd, bfr, MAXBSIZE)) {
@ -177,26 +176,26 @@ split1()
} }
if (bcnt + len >= bytecnt) { if (bcnt + len >= bytecnt) {
dist = bytecnt - bcnt; dist = bytecnt - bcnt;
if (write(ofd, bfr, dist) != dist) if (bigwrite(ofd, bfr, dist) != dist)
err(1, "write"); err(1, "write");
len -= dist; len -= dist;
for (C = bfr + dist; len >= bytecnt; for (C = bfr + dist; len >= bytecnt;
len -= bytecnt, C += bytecnt) { len -= bytecnt, C += bytecnt) {
newfile(); newfile();
if (write(ofd, if (bigwrite(ofd,
C, (int)bytecnt) != bytecnt) C, (int)bytecnt) != bytecnt)
err(1, "write"); err(1, "write");
} }
if (len) { if (len) {
newfile(); newfile();
if (write(ofd, C, len) != len) if (bigwrite(ofd, C, len) != len)
err(1, "write"); err(1, "write");
} else } else
file_open = 0; file_open = 0;
bcnt = len; bcnt = len;
} else { } else {
bcnt += len; bcnt += len;
if (write(ofd, bfr, len) != len) if (bigwrite(ofd, bfr, len) != len)
err(1, "write"); err(1, "write");
} }
} }
@ -206,12 +205,13 @@ split1()
* split2 -- * split2 --
* Split the input by lines. * Split the input by lines.
*/ */
void static void
split2() split2(unsigned long long numlines)
{ {
long lcnt; unsigned long long lcnt, bcnt;
int len, bcnt; ssize_t len;
char *Ce, *Cs; char *Ce, *Cs;
char bfr[MAXBSIZE];
for (lcnt = 0;;) for (lcnt = 0;;)
switch (len = read(ifd, bfr, MAXBSIZE)) { switch (len = read(ifd, bfr, MAXBSIZE)) {
@ -228,7 +228,7 @@ split2()
for (Cs = Ce = bfr; len--; Ce++) for (Cs = Ce = bfr; len--; Ce++)
if (*Ce == '\n' && ++lcnt == numlines) { if (*Ce == '\n' && ++lcnt == numlines) {
bcnt = Ce - Cs + 1; bcnt = Ce - Cs + 1;
if (write(ofd, Cs, bcnt) != bcnt) if (bigwrite(ofd, Cs, bcnt) != bcnt)
err(1, "write"); err(1, "write");
lcnt = 0; lcnt = 0;
Cs = Ce + 1; Cs = Ce + 1;
@ -239,7 +239,7 @@ split2()
} }
if (Cs < Ce) { if (Cs < Ce) {
bcnt = Ce - Cs; bcnt = Ce - Cs;
if (write(ofd, Cs, bcnt) != bcnt) if (bigwrite(ofd, Cs, bcnt) != bcnt)
err(1, "write"); err(1, "write");
} }
} }
@ -249,10 +249,10 @@ split2()
* newfile -- * newfile --
* Open a new output file. * Open a new output file.
*/ */
void static void
newfile() newfile(void)
{ {
static long fnum; static int fnum;
static int defname; static int defname;
static char *fpnt; static char *fpnt;
@ -285,10 +285,28 @@ newfile()
err(1, "%s", fname); err(1, "%s", fname);
} }
void static unsigned long long
usage() bigwrite(int fd, const void *buf, unsigned long long len)
{
const char *ptr = buf;
unsigned long long sofar = 0;
while (len != 0) {
ssize_t w, nw = (len > INT_MAX) ? INT_MAX : (ssize_t)len;
if ((w = write(fd, ptr, nw)) == -1)
return sofar;
len -= w;
ptr += w;
sofar += w;
}
return sofar;
}
static void
usage(void)
{ {
(void)fprintf(stderr, (void)fprintf(stderr,
"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n"); "Usage: %s [-b byte_count] [-l line_count] [file [prefix]]\n", getprogname());
exit(1); exit(1);
} }