fe7f0860c0
end pointer has changed (full buffer is used instead of first DEFLLEN bytes) the end pointer cannot be shared for crec and prec, we need to pass different value in each case
321 lines
8.3 KiB
C
321 lines
8.3 KiB
C
/* $NetBSD: msort.c,v 1.6 2000/10/17 15:16:27 jdolecek Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* Peter McIlroy.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "sort.h"
|
|
#include "fsort.h"
|
|
|
|
#ifndef lint
|
|
__RCSID("$NetBSD: msort.c,v 1.6 2000/10/17 15:16:27 jdolecek Exp $");
|
|
__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
|
|
#endif /* not lint */
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
/* Subroutines using comparisons: merge sort and check order */
|
|
#define DELETE (1)
|
|
#define LALIGN(n) ((n+3) & ~3)
|
|
|
|
typedef struct mfile {
|
|
u_char *end;
|
|
short flno;
|
|
struct recheader rec[1];
|
|
} MFILE;
|
|
typedef struct tmfile {
|
|
u_char *end;
|
|
short flno;
|
|
struct trecheader rec[1];
|
|
} TMFILE;
|
|
u_char *wts, *wts1 = 0;
|
|
struct mfile *cfilebuf;
|
|
|
|
static int cmp __P((struct recheader *, struct recheader *));
|
|
static int insert __P((struct mfile **, struct mfile **, int, int));
|
|
|
|
void
|
|
fmerge(binno, files, nfiles, get, outfp, fput, ftbl)
|
|
union f_handle files;
|
|
int binno, nfiles;
|
|
int (*get) __P((int, union f_handle, int, struct recheader *, u_char *,
|
|
struct field *));
|
|
FILE *outfp;
|
|
void (*fput) __P((const struct recheader *, FILE *));
|
|
struct field *ftbl;
|
|
{
|
|
FILE *tout;
|
|
int i, j, last;
|
|
void (*put)(const struct recheader *, FILE *);
|
|
struct tempfile *l_fstack;
|
|
|
|
wts = ftbl->weights;
|
|
if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
|
|
wts1 = (ftbl->flags & R) ? Rascii : ascii;
|
|
if (!cfilebuf)
|
|
cfilebuf = malloc(DEFLLEN + sizeof(TMFILE));
|
|
|
|
i = min(16, nfiles) * LALIGN(DEFLLEN+sizeof(TMFILE));
|
|
if (!buffer || i > bufsize) {
|
|
buffer = buffer ? realloc(buffer, i) : malloc(i);
|
|
if (!buffer)
|
|
err(2, NULL);
|
|
if (!linebuf && !SINGL_FLD) {
|
|
linebuf_size = DEFLLEN;
|
|
linebuf = malloc(linebuf_size);
|
|
}
|
|
}
|
|
|
|
if (binno >= 0)
|
|
l_fstack = fstack + files.top;
|
|
else
|
|
l_fstack = fstack;
|
|
while (nfiles) {
|
|
put = putrec;
|
|
for (j = 0; j < nfiles; j += 16) {
|
|
if (nfiles <= 16) {
|
|
tout = outfp;
|
|
put = fput;
|
|
}
|
|
else
|
|
tout = ftmp();
|
|
last = min(16, nfiles - j);
|
|
if (binno < 0) {
|
|
for (i = 0; i < last; i++)
|
|
if (!(l_fstack[i+MAXFCT-1-16].fp =
|
|
fopen(files.names[j + i], "r")))
|
|
err(2, "%s", files.names[j+i]);
|
|
merge(MAXFCT-1-16, last, get, tout, put, ftbl);
|
|
}
|
|
else {
|
|
for (i = 0; i< last; i++)
|
|
rewind(l_fstack[i+j].fp);
|
|
merge(files.top+j, last, get, tout, put, ftbl);
|
|
}
|
|
if (nfiles > 16) l_fstack[j/16].fp = tout;
|
|
}
|
|
nfiles = (nfiles + 15) / 16;
|
|
if (nfiles == 1)
|
|
nfiles = 0;
|
|
if (binno < 0) {
|
|
binno = 0;
|
|
get = geteasy;
|
|
files.top = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
merge(infl0, nfiles, get, outfp, put, ftbl)
|
|
int infl0, nfiles;
|
|
int (*get) __P((int, union f_handle, int, struct recheader *, u_char *,
|
|
struct field *));
|
|
void (*put)(const struct recheader *, FILE *);
|
|
FILE *outfp;
|
|
struct field *ftbl;
|
|
{
|
|
int c, i, j;
|
|
union f_handle dummy = {0};
|
|
struct mfile *flist[16], *cfile;
|
|
for (i = j = 0; i < nfiles; i++) {
|
|
cfile = (MFILE *) (buffer +
|
|
i * LALIGN(DEFLLEN + sizeof(TMFILE)));
|
|
cfile->flno = j + infl0;
|
|
cfile->end = cfile->rec->data + DEFLLEN;
|
|
for (c = 1; c == 1;) {
|
|
if (EOF == (c = get(j+infl0, dummy, nfiles,
|
|
cfile->rec, cfile->end, ftbl))) {
|
|
--i;
|
|
--nfiles;
|
|
break;
|
|
}
|
|
if (i)
|
|
c = insert(flist, &cfile, i, !DELETE);
|
|
else
|
|
flist[0] = cfile;
|
|
}
|
|
j++;
|
|
}
|
|
cfile = cfilebuf;
|
|
cfile->flno = flist[0]->flno;
|
|
cfile->end = cfile->rec->data + DEFLLEN;
|
|
while (nfiles) {
|
|
for (c = 1; c == 1;) {
|
|
if (EOF == (c = get(cfile->flno, dummy, nfiles,
|
|
cfile->rec, cfile->end, ftbl))) {
|
|
put(flist[0]->rec, outfp);
|
|
memmove(flist, flist + 1,
|
|
sizeof(MFILE *) * (--nfiles));
|
|
cfile->flno = flist[0]->flno;
|
|
break;
|
|
}
|
|
if (!(c = insert(flist, &cfile, nfiles, DELETE)))
|
|
put(cfile->rec, outfp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
|
|
* otherwise just inserts *rec in flist.
|
|
*/
|
|
static int
|
|
insert(flist, rec, ttop, delete)
|
|
struct mfile **flist, **rec;
|
|
int delete, ttop; /* delete = 0 or 1 */
|
|
{
|
|
struct mfile *tmprec;
|
|
int top, mid, bot = 0, cmpv = 1;
|
|
tmprec = *rec;
|
|
top = ttop;
|
|
for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
|
|
cmpv = cmp(tmprec->rec, flist[mid]->rec);
|
|
if (cmpv < 0)
|
|
top = mid;
|
|
else if (cmpv > 0)
|
|
bot = mid;
|
|
else {
|
|
if (!UNIQUE)
|
|
bot = mid - 1;
|
|
break;
|
|
}
|
|
}
|
|
if (delete) {
|
|
if (UNIQUE) {
|
|
if (!bot && cmpv)
|
|
cmpv = cmp(tmprec->rec, flist[0]->rec);
|
|
if (!cmpv)
|
|
return(1);
|
|
}
|
|
tmprec = flist[0];
|
|
if (bot)
|
|
memmove(flist, flist+1, bot * sizeof(MFILE **));
|
|
flist[bot] = *rec;
|
|
*rec = tmprec;
|
|
(*rec)->flno = (*flist)->flno;
|
|
return (0);
|
|
}
|
|
else {
|
|
if (!bot && !(UNIQUE && !cmpv)) {
|
|
cmpv = cmp(tmprec->rec, flist[0]->rec);
|
|
if (cmpv < 0)
|
|
bot = -1;
|
|
}
|
|
if (UNIQUE && !cmpv)
|
|
return (1);
|
|
bot++;
|
|
memmove(flist + bot+1, flist + bot,
|
|
(ttop - bot) * sizeof(MFILE **));
|
|
flist[bot] = *rec;
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check order on one file
|
|
*/
|
|
void
|
|
order(infile, get, ftbl)
|
|
union f_handle infile;
|
|
int (*get) __P((int, union f_handle, int, struct recheader *, u_char *,
|
|
struct field *));
|
|
struct field *ftbl;
|
|
{
|
|
u_char *crec_end, *prec_end, *trec_end;
|
|
int c;
|
|
struct recheader *crec, *prec, *trec;
|
|
|
|
if (!SINGL_FLD)
|
|
linebuf = malloc(DEFLLEN);
|
|
buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER)));
|
|
crec = (RECHEADER *) buffer;
|
|
crec_end = buffer + DEFLLEN + sizeof(TRECHEADER);
|
|
prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER));
|
|
prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER));
|
|
wts = ftbl->weights;
|
|
if (SINGL_FLD && (ftbl->flags & F))
|
|
wts1 = ftbl->flags & R ? Rascii : ascii;
|
|
else
|
|
wts1 = 0;
|
|
if (0 == get(-1, infile, 1, prec, prec_end, ftbl))
|
|
while (0 == get(-1, infile, 1, crec, crec_end, ftbl)) {
|
|
if (0 < (c = cmp(prec, crec))) {
|
|
crec->data[crec->length-1] = 0;
|
|
errx(1, "found disorder: %s", crec->data+crec->offset);
|
|
}
|
|
if (UNIQUE && !c) {
|
|
crec->data[crec->length-1] = 0;
|
|
errx(1, "found non-uniqueness: %s",
|
|
crec->data+crec->offset);
|
|
}
|
|
/*
|
|
* Swap pointers so that this record is on place pointed
|
|
* to by prec and new record is read to place pointed to by
|
|
* crec.
|
|
*/
|
|
trec = prec;
|
|
prec = crec;
|
|
crec = trec;
|
|
trec_end = prec_end;
|
|
prec_end = crec_end;
|
|
crec_end = trec_end;
|
|
}
|
|
exit(0);
|
|
}
|
|
|
|
static int
|
|
cmp(rec1, rec2)
|
|
struct recheader *rec1, *rec2;
|
|
{
|
|
int r;
|
|
u_char *pos1, *pos2, *end;
|
|
u_char *cwts;
|
|
for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
|
|
pos1 = rec1->data;
|
|
pos2 = rec2->data;
|
|
if (!SINGL_FLD && UNIQUE)
|
|
end = pos1 + min(rec1->offset, rec2->offset);
|
|
else
|
|
end = pos1 + min(rec1->length, rec2->length);
|
|
for (; pos1 < end; ) {
|
|
if ((r = cwts[*pos1++] - cwts[*pos2++]))
|
|
return (r);
|
|
}
|
|
}
|
|
return (0);
|
|
}
|