update with latest, from christos@deshaw.com

This commit is contained in:
cgd 1994-02-04 07:02:09 +00:00
parent 362ce33181
commit 30ac3de0e9
2 changed files with 190 additions and 119 deletions

View File

@ -31,8 +31,8 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" from: @(#)tsort.1 6.3 (Berkeley) 4/23/91
.\" $Id: tsort.1,v 1.2 1993/08/01 07:27:19 mycroft Exp $
.\" from: @(#)tsort.1 6.3 (Berkeley) 4/23/91
.\" $Id: tsort.1,v 1.3 1994/02/04 07:02:09 cgd Exp $
.\"
.Dd April 23, 1991
.Dt TSORT 1
@ -42,6 +42,7 @@
.Nd topological sort of a directed graph
.Sh SYNOPSIS
.Nm tsort
.Op Fl l
.Op Ar file
.Sh DESCRIPTION
.Nm Tsort
@ -62,6 +63,11 @@ This is useful when a node is not connected to any other nodes.
If the graph contains a cycle (and therefore cannot be properly sorted),
one of the arcs in the cycle is ignored and the sort continues.
Cycles are reported on standard error.
.Sh OPTIONS
.Bl -tag -width indent
The available options are as follows:
.It Fl l
Search for the longest cycle. Can take a long time.
.Sh SEE ALSO
.Xr ar 1
.Sh HISTORY

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 1989 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Michael Rendell of Memorial University of Newfoundland.
@ -35,29 +35,32 @@
*/
#ifndef lint
char copyright[] =
"@(#) Copyright (c) 1989 The Regents of the University of California.\n\
All rights reserved.\n";
static char copyright[] =
"@(#) Copyright (c) 1989, 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
/*static char sccsid[] = "from: @(#)tsort.c 5.3 (Berkeley) 6/1/90";*/
static char rcsid[] = "$Id: tsort.c,v 1.6 1993/12/01 22:31:06 cgd Exp $";
/* from: static char sccsid[] = "@(#)tsort.c 8.1 (Berkeley) 6/9/93"; */
static char *rcsid = "$Id: tsort.c,v 1.7 1994/02/04 07:02:11 cgd Exp $";
#endif /* not lint */
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <db.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/*
* Topological sort. Input is a list of pairs of strings seperated by
* Topological sort. Input is a list of pairs of strings separated by
* white space (spaces, tabs, and/or newlines); strings are written to
* standard output in sorted order, one per line.
*
* usage:
* tsort [inputfile]
* tsort [-l] [inputfile]
* If no input file is specified, standard input is read.
*
* Should be compatable with AT&T tsort HOWEVER the output is not identical
@ -71,19 +74,20 @@ static char rcsid[] = "$Id: tsort.c,v 1.6 1993/12/01 22:31:06 cgd Exp $";
#define HASHSIZE 53 /* doesn't need to be big */
#define NF_MARK 0x1 /* marker for cycle detection */
#define NF_ACYCLIC 0x2 /* this node is cycle free */
#define NF_NODEST 0x4 /* Unreachable */
typedef struct node_str NODE;
struct node_str {
char *n_name; /* name of this node */
NODE **n_prevp; /* pointer to previous node's n_next */
NODE *n_next; /* next node in graph */
NODE *n_hash; /* next node in hash table */
NODE **n_arcs; /* array of arcs to other nodes */
int n_narcs; /* number of arcs in n_arcs[] */
int n_arcsize; /* size of n_arcs[] array */
NODE **n_arcs; /* array of arcs to other nodes */
int n_refcnt; /* # of arcs pointing to this node */
int n_flags; /* NF_* */
char n_name[1]; /* name of this node */
};
typedef struct _buf {
@ -91,38 +95,64 @@ typedef struct _buf {
int b_bsize;
} BUF;
NODE *add_node(), *find_node();
void add_arc(), no_memory(), remove_node(), tsort();
char *grow_buf(), *malloc();
extern int errno;
DB *db;
NODE *graph;
NODE *hashtable[HASHSIZE];
NODE **cycle_buf;
NODE **longest_cycle;
int longest = 0;
int debug = 0;
void add_arc __P((char *, char *));
void err __P((const char *, ...));
int find_cycle __P((NODE *, NODE *, int, int));
NODE *get_node __P((char *));
void *grow_buf __P((void *, int));
void remove_node __P((NODE *));
void tsort __P((void));
void usage __P((void));
int
main(argc, argv)
int argc;
char **argv;
char *argv[];
{
register BUF *b;
register int c, n;
FILE *fp;
int bsize, nused;
int bsize, ch, nused;
BUF bufs[2];
if (argc < 2)
while ((ch = getopt(argc, argv, "dl")) != EOF)
switch(ch) {
case 'd':
debug = 1;
break;
case 'l':
longest = 1;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
switch(argc) {
case 0:
fp = stdin;
else if (argc > 2) {
(void)fprintf(stderr, "usage: tsort [ inputfile ]\n");
exit(1);
} else if (!(fp = fopen(argv[1], "r"))) {
(void)fprintf(stderr, "tsort: %s.\n", strerror(errno));
exit(1);
break;
case 1:
if ((fp = fopen(*argv, "r")) == NULL)
err("%s: %s", *argv, strerror(errno));
break;
default:
usage();
}
for (b = bufs, n = 2; --n >= 0; b++)
b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024);
b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
/* parse input and build the graph */
for (n = 0, c = getc(fp);;) {
@ -136,10 +166,8 @@ main(argc, argv)
bsize = b->b_bsize;
do {
b->b_buf[nused++] = c;
if (nused == bsize) {
bsize *= 2;
b->b_buf = grow_buf(b->b_buf, bsize);
}
if (nused == bsize)
b->b_buf = grow_buf(b->b_buf, bsize *= 2);
c = getc(fp);
} while (c != EOF && !isspace(c));
@ -150,10 +178,8 @@ main(argc, argv)
n = !n;
}
(void)fclose(fp);
if (n) {
(void)fprintf(stderr, "tsort: odd data count.\n");
exit(1);
}
if (n)
err("odd data count");
/* do the sort */
tsort();
@ -161,16 +187,14 @@ main(argc, argv)
}
/* double the size of oldbuf and return a pointer to the new buffer. */
char *
void *
grow_buf(bp, size)
char *bp;
void *bp;
int size;
{
char *realloc();
if (!(bp = realloc(bp, (u_int)size)))
no_memory();
return(bp);
if ((bp = realloc(bp, (u_int)size)) == NULL)
err("%s", strerror(errno));
return (bp);
}
/*
@ -185,16 +209,12 @@ add_arc(s1, s2)
NODE *n2;
int bsize, i;
n1 = find_node(s1);
if (!n1)
n1 = add_node(s1);
n1 = get_node(s1);
if (!strcmp(s1, s2))
return;
n2 = find_node(s2);
if (!n2)
n2 = add_node(s2);
n2 = get_node(s2);
/*
* Check if this arc is already here.
@ -202,7 +222,6 @@ add_arc(s1, s2)
for (i = 0; i < n1->n_narcs; i++)
if (n1->n_arcs[i] == n2)
return;
/*
* Add it.
*/
@ -210,79 +229,86 @@ add_arc(s1, s2)
if (!n1->n_arcsize)
n1->n_arcsize = 10;
bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize);
n1->n_arcs = grow_buf(n1->n_arcs, bsize);
n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
}
n1->n_arcs[n1->n_narcs++] = n2;
++n2->n_refcnt;
}
hash_string(s)
char *s;
{
register int hash, i;
for (hash = 0, i = 1; *s; s++, i++)
hash += *s * i;
return(hash % HASHSIZE);
}
/*
* find a node in the graph and return a pointer to it - returns null if not
* found.
*/
/* Find a node in the graph (insert if not found) and return a pointer to it. */
NODE *
find_node(name)
get_node(name)
char *name;
{
register NODE *n;
DBT data, key;
NODE *n;
for (n = hashtable[hash_string(name)]; n; n = n->n_hash)
if (!strcmp(n->n_name, name))
return(n);
return((NODE *)NULL);
}
if (db == NULL &&
(db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
err("db: open: %s", name, strerror(errno));
/* Add a node to the graph and return a pointer to it. */
NODE *
add_node(name)
char *name;
{
register NODE *n;
int hash;
key.data = name;
key.size = strlen(name) + 1;
if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name)))
no_memory();
switch((*db->get)(db, &key, &data, 0)) {
case 0:
bcopy(data.data, &n, sizeof(n));
return (n);
case 1:
break;
default:
case -1:
err("db: get %s: %s", name, strerror(errno));
}
if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
err("%s", strerror(errno));
n->n_narcs = 0;
n->n_arcsize = 0;
n->n_arcs = (NODE **)NULL;
n->n_arcs = NULL;
n->n_refcnt = 0;
n->n_flags = 0;
bcopy(name, n->n_name, key.size);
/* add to linked list */
/* Add to linked list. */
if (n->n_next = graph)
graph->n_prevp = &n->n_next;
n->n_prevp = &graph;
graph = n;
/* add to hash table */
hash = hash_string(name);
n->n_hash = hashtable[hash];
hashtable[hash] = n;
return(n);
/* Add to hash table. */
data.data = &n;
data.size = sizeof(n);
if ((*db->put)(db, &key, &data, 0))
err("db: put %s: %s", name, strerror(errno));
return (n);
}
/*
* Clear the NODEST flag from all nodes.
*/
void
clear_cycle()
{
NODE *n;
for (n = graph; n; n = n->n_next)
n->n_flags &= ~NF_NODEST;
}
/* do topological sort on graph */
void
tsort()
{
register NODE *n, *m, *next;
register NODE *n, *next;
register int cnt;
while (graph) {
/*
* keep getting rid of simple cases until there are none left,
* Keep getting rid of simple cases until there are none left,
* if there are any nodes still in the graph, then there is
* a cycle in it.
*/
@ -301,43 +327,39 @@ tsort()
if (!cycle_buf) {
/*
* allocate space for two cycle logs - one to be used
* Allocate space for two cycle logs - one to be used
* as scratch space, the other to save the longest
* cycle.
*/
for (cnt = 0, n = graph; n; n = n->n_next)
++cnt;
cycle_buf =
(NODE **)malloc((u_int)sizeof(NODE *) * cnt);
longest_cycle =
(NODE **)malloc((u_int)sizeof(NODE *) * cnt);
if (!cycle_buf || !longest_cycle)
no_memory();
cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
if (cycle_buf == NULL || longest_cycle == NULL)
err("%s", strerror(errno));
}
for (n = graph; n; n = n->n_next)
if (!(n->n_flags & NF_ACYCLIC)) {
for (m=graph; m; m=m->n_next)
m->n_flags &= ~NF_MARK;
if (cnt = find_cycle(n, n, 0, 0)) {
register int i;
(void)fprintf(stderr,
"tsort: cycle in data.\n");
"tsort: cycle in data\n");
for (i = 0; i < cnt; i++)
(void)fprintf(stderr,
"tsort: %s.\n", longest_cycle[i]->n_name);
"tsort: %s\n", longest_cycle[i]->n_name);
remove_node(n);
clear_cycle();
break;
} else
} else {
/* to avoid further checks */
n->n_flags = NF_ACYCLIC;
n->n_flags |= NF_ACYCLIC;
clear_cycle();
}
}
if (!n) {
(void)fprintf(stderr,
"tsort: internal error -- could not find cycle.\n");
exit(1);
}
if (!n)
err("internal error -- could not find cycle");
}
}
@ -358,7 +380,9 @@ remove_node(n)
n->n_next->n_prevp = n->n_prevp;
}
/* look for a path from node from to node to. */
/* look for the longest? cycle from node from to node to. */
int
find_cycle(from, to, longest_len, depth)
NODE *from, *to;
int depth, longest_len;
@ -370,9 +394,9 @@ find_cycle(from, to, longest_len, depth)
* avoid infinite loops and ignore portions of the graph known
* to be acyclic
*/
if (from->n_flags & (NF_MARK|NF_ACYCLIC))
return(0);
from->n_flags = NF_MARK;
if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
return (0);
from->n_flags |= NF_MARK;
for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
cycle_buf[depth] = *np;
@ -384,19 +408,60 @@ find_cycle(from, to, longest_len, depth)
longest_len * sizeof(NODE *));
}
} else {
if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
continue;
len = find_cycle(*np, to, longest_len, depth + 1);
if (len > longest_len) {
if (debug)
printf("%*s %s->%s %d\n", depth, "",
from->n_name, to->n_name, len);
if (len == 0)
(*np)->n_flags |= NF_NODEST;
if (len > longest_len)
longest_len = len;
if (len > 0 && !longest)
break;
}
}
}
return(longest_len);
from->n_flags &= ~NF_MARK;
return (longest_len);
}
void
no_memory()
usage()
{
(void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM));
(void)fprintf(stderr, "usage: tsort [-l] [file]\n");
exit(1);
}
#if __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif
void
#if __STDC__
err(const char *fmt, ...)
#else
err(fmt, va_alist)
char *fmt;
va_dcl
#endif
{
va_list ap;
#if __STDC__
va_start(ap, fmt);
#else
va_start(ap);
#endif
(void)fprintf(stderr, "tsort: ");
(void)vfprintf(stderr, fmt, ap);
va_end(ap);
(void)fprintf(stderr, "\n");
exit(1);
/* NOTREACHED */
}