2021-03-22 06:28:55 +03:00
|
|
|
/* $NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $ */
|
2007-06-23 20:55:15 +04:00
|
|
|
|
|
|
|
/*-
|
|
|
|
* Copyright (c) 2007 The NetBSD Foundation, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Christos Zoulas.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
2021-03-22 06:28:55 +03:00
|
|
|
__RCSID("$NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $");
|
2007-06-23 20:55:15 +04:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <db.h>
|
|
|
|
#include <err.h>
|
|
|
|
#include <util.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
2010-04-25 04:54:44 +04:00
|
|
|
#include "extern.h"
|
|
|
|
|
|
|
|
static const HASHINFO hinfo = {
|
|
|
|
.bsize = 256,
|
|
|
|
.ffactor = 4,
|
|
|
|
.nelem = 32768,
|
|
|
|
.cachesize = 1024,
|
|
|
|
.hash = NULL,
|
|
|
|
.lorder = 0
|
|
|
|
};
|
2007-06-23 20:55:15 +04:00
|
|
|
|
|
|
|
static int comp(const char *, char **, size_t *);
|
|
|
|
|
|
|
|
/*
|
2007-06-23 20:56:56 +04:00
|
|
|
* Preserve only unique content lines in a file. Input lines that have
|
2007-06-23 20:55:15 +04:00
|
|
|
* content [alphanumeric characters before a comment] are white-space
|
|
|
|
* normalized and have their comments removed. Then they are placed
|
|
|
|
* in a hash table, and only the first instance of them is printed.
|
|
|
|
* Comment lines without any alphanumeric content are always printed
|
|
|
|
* since they are there to make the file "pretty". Comment lines with
|
|
|
|
* alphanumeric content are also placed into the hash table and only
|
|
|
|
* printed once.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
uniq(const char *fname)
|
|
|
|
{
|
|
|
|
DB *db;
|
|
|
|
DBT key;
|
|
|
|
static const DBT data = { NULL, 0 };
|
|
|
|
FILE *fp;
|
|
|
|
char *line;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
|
|
|
|
err(1, "Cannot create in memory database");
|
|
|
|
|
2007-06-23 20:56:56 +04:00
|
|
|
fp = efopen(fname, "r");
|
2007-06-23 20:55:15 +04:00
|
|
|
while ((line = fgetln(fp, &len)) != NULL) {
|
|
|
|
size_t complen = len;
|
|
|
|
char *compline;
|
|
|
|
if (!comp(line, &compline, &complen)) {
|
|
|
|
(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
|
|
|
|
line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
key.data = compline;
|
|
|
|
key.size = complen;
|
|
|
|
switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
|
|
|
|
case 0:
|
|
|
|
(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
|
|
|
|
line);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
break;
|
|
|
|
case -1:
|
|
|
|
err(1, "put");
|
2014-06-21 21:48:07 +04:00
|
|
|
/*NOTREACHED*/
|
2007-06-23 20:55:15 +04:00
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(void)fflush(stdout);
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* normalize whitespace in the original line and place a new string
|
2007-06-24 23:51:43 +04:00
|
|
|
* with whitespace converted to a single space in compline. If the line
|
2007-06-23 20:55:15 +04:00
|
|
|
* contains just comments, we preserve them. If it contains data and
|
|
|
|
* comments, we kill the comments. Return 1 if the line had actual
|
2007-06-24 23:51:43 +04:00
|
|
|
* contents, or 0 if it was just a comment without alphanumeric characters.
|
2007-06-23 20:55:15 +04:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
comp(const char *origline, char **compline, size_t *len)
|
|
|
|
{
|
|
|
|
const unsigned char *p;
|
|
|
|
unsigned char *q;
|
|
|
|
char *cline;
|
|
|
|
size_t l = *len, complen;
|
2007-06-24 23:51:43 +04:00
|
|
|
int hasalnum, iscomment;
|
2007-06-23 20:55:15 +04:00
|
|
|
|
2007-06-24 23:51:43 +04:00
|
|
|
/* Eat leading space */
|
2007-06-23 20:55:15 +04:00
|
|
|
for (p = (const unsigned char *)origline; l && *p && isspace(*p);
|
|
|
|
p++, l--)
|
|
|
|
continue;
|
2021-03-22 06:28:55 +03:00
|
|
|
if (*p == '\0' || l == 0)
|
|
|
|
return 0;
|
|
|
|
|
2007-06-23 20:55:15 +04:00
|
|
|
cline = emalloc(l + 1);
|
|
|
|
(void)memcpy(cline, p, l);
|
|
|
|
cline[l] = '\0';
|
|
|
|
|
|
|
|
complen = 0;
|
|
|
|
hasalnum = 0;
|
2007-06-24 23:51:43 +04:00
|
|
|
iscomment = 0;
|
|
|
|
|
2007-06-23 20:55:15 +04:00
|
|
|
for (q = (unsigned char *)cline; l && *p; p++, l--) {
|
|
|
|
if (isspace(*p)) {
|
2007-06-24 23:51:43 +04:00
|
|
|
if (complen && isspace(q[-1]))
|
2007-06-23 20:55:15 +04:00
|
|
|
continue;
|
2007-06-24 23:51:43 +04:00
|
|
|
*q++ = ' ';
|
|
|
|
complen++;
|
2007-06-23 20:55:15 +04:00
|
|
|
} else {
|
2007-06-24 23:51:43 +04:00
|
|
|
if (!iscomment && *p == '#') {
|
|
|
|
if (hasalnum)
|
2007-06-23 20:55:15 +04:00
|
|
|
break;
|
|
|
|
iscomment = 1;
|
|
|
|
} else
|
|
|
|
hasalnum |= isalnum(*p);
|
2007-06-24 23:51:43 +04:00
|
|
|
*q++ = *p;
|
|
|
|
complen++;
|
2007-06-23 20:55:15 +04:00
|
|
|
}
|
2007-06-24 23:51:43 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Eat trailing space */
|
|
|
|
while (complen && isspace(q[-1])) {
|
|
|
|
--q;
|
|
|
|
--complen;
|
2007-06-23 20:55:15 +04:00
|
|
|
}
|
|
|
|
*q = '\0';
|
2021-03-22 06:28:55 +03:00
|
|
|
if (!hasalnum) {
|
|
|
|
free(cline);
|
|
|
|
cline = NULL;
|
|
|
|
complen = 0;
|
|
|
|
}
|
2007-06-23 20:55:15 +04:00
|
|
|
*compline = cline;
|
|
|
|
*len = complen;
|
|
|
|
return hasalnum;
|
|
|
|
}
|