update with latest, from christos@deshaw.com

This commit is contained in:
cgd 1994-02-04 07:02:09 +00:00
parent 362ce33181
commit 30ac3de0e9
2 changed files with 190 additions and 119 deletions

View File

@ -32,7 +32,7 @@
.\" SUCH DAMAGE. .\" SUCH DAMAGE.
.\" .\"
.\" from: @(#)tsort.1 6.3 (Berkeley) 4/23/91 .\" from: @(#)tsort.1 6.3 (Berkeley) 4/23/91
.\" $Id: tsort.1,v 1.2 1993/08/01 07:27:19 mycroft Exp $ .\" $Id: tsort.1,v 1.3 1994/02/04 07:02:09 cgd Exp $
.\" .\"
.Dd April 23, 1991 .Dd April 23, 1991
.Dt TSORT 1 .Dt TSORT 1
@ -42,6 +42,7 @@
.Nd topological sort of a directed graph .Nd topological sort of a directed graph
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm tsort .Nm tsort
.Op Fl l
.Op Ar file .Op Ar file
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm Tsort .Nm Tsort
@ -62,6 +63,11 @@ This is useful when a node is not connected to any other nodes.
If the graph contains a cycle (and therefore cannot be properly sorted), If the graph contains a cycle (and therefore cannot be properly sorted),
one of the arcs in the cycle is ignored and the sort continues. one of the arcs in the cycle is ignored and the sort continues.
Cycles are reported on standard error. Cycles are reported on standard error.
.Sh OPTIONS
.Bl -tag -width indent
The available options are as follows:
.It Fl l
Search for the longest cycle. Can take a long time.
.Sh SEE ALSO .Sh SEE ALSO
.Xr ar 1 .Xr ar 1
.Sh HISTORY .Sh HISTORY

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 1989 The Regents of the University of California. * Copyright (c) 1989, 1993
* All rights reserved. * The Regents of the University of California. All rights reserved.
* *
* This code is derived from software contributed to Berkeley by * This code is derived from software contributed to Berkeley by
* Michael Rendell of Memorial University of Newfoundland. * Michael Rendell of Memorial University of Newfoundland.
@ -35,29 +35,32 @@
*/ */
#ifndef lint #ifndef lint
char copyright[] = static char copyright[] =
"@(#) Copyright (c) 1989 The Regents of the University of California.\n\ "@(#) Copyright (c) 1989, 1993\n\
All rights reserved.\n"; The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */ #endif /* not lint */
#ifndef lint #ifndef lint
/*static char sccsid[] = "from: @(#)tsort.c 5.3 (Berkeley) 6/1/90";*/ /* from: static char sccsid[] = "@(#)tsort.c 8.1 (Berkeley) 6/9/93"; */
static char rcsid[] = "$Id: tsort.c,v 1.6 1993/12/01 22:31:06 cgd Exp $"; static char *rcsid = "$Id: tsort.c,v 1.7 1994/02/04 07:02:11 cgd Exp $";
#endif /* not lint */ #endif /* not lint */
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h>
#include <db.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
/* /*
* Topological sort. Input is a list of pairs of strings seperated by * Topological sort. Input is a list of pairs of strings separated by
* white space (spaces, tabs, and/or newlines); strings are written to * white space (spaces, tabs, and/or newlines); strings are written to
* standard output in sorted order, one per line. * standard output in sorted order, one per line.
* *
* usage: * usage:
* tsort [inputfile] * tsort [-l] [inputfile]
* If no input file is specified, standard input is read. * If no input file is specified, standard input is read.
* *
* Should be compatable with AT&T tsort HOWEVER the output is not identical * Should be compatable with AT&T tsort HOWEVER the output is not identical
@ -71,19 +74,20 @@ static char rcsid[] = "$Id: tsort.c,v 1.6 1993/12/01 22:31:06 cgd Exp $";
#define HASHSIZE 53 /* doesn't need to be big */ #define HASHSIZE 53 /* doesn't need to be big */
#define NF_MARK 0x1 /* marker for cycle detection */ #define NF_MARK 0x1 /* marker for cycle detection */
#define NF_ACYCLIC 0x2 /* this node is cycle free */ #define NF_ACYCLIC 0x2 /* this node is cycle free */
#define NF_NODEST 0x4 /* Unreachable */
typedef struct node_str NODE; typedef struct node_str NODE;
struct node_str { struct node_str {
char *n_name; /* name of this node */
NODE **n_prevp; /* pointer to previous node's n_next */ NODE **n_prevp; /* pointer to previous node's n_next */
NODE *n_next; /* next node in graph */ NODE *n_next; /* next node in graph */
NODE *n_hash; /* next node in hash table */ NODE **n_arcs; /* array of arcs to other nodes */
int n_narcs; /* number of arcs in n_arcs[] */ int n_narcs; /* number of arcs in n_arcs[] */
int n_arcsize; /* size of n_arcs[] array */ int n_arcsize; /* size of n_arcs[] array */
NODE **n_arcs; /* array of arcs to other nodes */
int n_refcnt; /* # of arcs pointing to this node */ int n_refcnt; /* # of arcs pointing to this node */
int n_flags; /* NF_* */ int n_flags; /* NF_* */
char n_name[1]; /* name of this node */
}; };
typedef struct _buf { typedef struct _buf {
@ -91,38 +95,64 @@ typedef struct _buf {
int b_bsize; int b_bsize;
} BUF; } BUF;
NODE *add_node(), *find_node(); DB *db;
void add_arc(), no_memory(), remove_node(), tsort();
char *grow_buf(), *malloc();
extern int errno;
NODE *graph; NODE *graph;
NODE *hashtable[HASHSIZE];
NODE **cycle_buf; NODE **cycle_buf;
NODE **longest_cycle; NODE **longest_cycle;
int longest = 0;
int debug = 0;
void add_arc __P((char *, char *));
void err __P((const char *, ...));
int find_cycle __P((NODE *, NODE *, int, int));
NODE *get_node __P((char *));
void *grow_buf __P((void *, int));
void remove_node __P((NODE *));
void tsort __P((void));
void usage __P((void));
int
main(argc, argv) main(argc, argv)
int argc; int argc;
char **argv; char *argv[];
{ {
register BUF *b; register BUF *b;
register int c, n; register int c, n;
FILE *fp; FILE *fp;
int bsize, nused; int bsize, ch, nused;
BUF bufs[2]; BUF bufs[2];
if (argc < 2) while ((ch = getopt(argc, argv, "dl")) != EOF)
switch(ch) {
case 'd':
debug = 1;
break;
case 'l':
longest = 1;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
switch(argc) {
case 0:
fp = stdin; fp = stdin;
else if (argc > 2) { break;
(void)fprintf(stderr, "usage: tsort [ inputfile ]\n"); case 1:
exit(1); if ((fp = fopen(*argv, "r")) == NULL)
} else if (!(fp = fopen(argv[1], "r"))) { err("%s: %s", *argv, strerror(errno));
(void)fprintf(stderr, "tsort: %s.\n", strerror(errno)); break;
exit(1); default:
usage();
} }
for (b = bufs, n = 2; --n >= 0; b++) for (b = bufs, n = 2; --n >= 0; b++)
b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024); b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
/* parse input and build the graph */ /* parse input and build the graph */
for (n = 0, c = getc(fp);;) { for (n = 0, c = getc(fp);;) {
@ -136,10 +166,8 @@ main(argc, argv)
bsize = b->b_bsize; bsize = b->b_bsize;
do { do {
b->b_buf[nused++] = c; b->b_buf[nused++] = c;
if (nused == bsize) { if (nused == bsize)
bsize *= 2; b->b_buf = grow_buf(b->b_buf, bsize *= 2);
b->b_buf = grow_buf(b->b_buf, bsize);
}
c = getc(fp); c = getc(fp);
} while (c != EOF && !isspace(c)); } while (c != EOF && !isspace(c));
@ -150,10 +178,8 @@ main(argc, argv)
n = !n; n = !n;
} }
(void)fclose(fp); (void)fclose(fp);
if (n) { if (n)
(void)fprintf(stderr, "tsort: odd data count.\n"); err("odd data count");
exit(1);
}
/* do the sort */ /* do the sort */
tsort(); tsort();
@ -161,15 +187,13 @@ main(argc, argv)
} }
/* double the size of oldbuf and return a pointer to the new buffer. */ /* double the size of oldbuf and return a pointer to the new buffer. */
char * void *
grow_buf(bp, size) grow_buf(bp, size)
char *bp; void *bp;
int size; int size;
{ {
char *realloc(); if ((bp = realloc(bp, (u_int)size)) == NULL)
err("%s", strerror(errno));
if (!(bp = realloc(bp, (u_int)size)))
no_memory();
return (bp); return (bp);
} }
@ -185,16 +209,12 @@ add_arc(s1, s2)
NODE *n2; NODE *n2;
int bsize, i; int bsize, i;
n1 = find_node(s1); n1 = get_node(s1);
if (!n1)
n1 = add_node(s1);
if (!strcmp(s1, s2)) if (!strcmp(s1, s2))
return; return;
n2 = find_node(s2); n2 = get_node(s2);
if (!n2)
n2 = add_node(s2);
/* /*
* Check if this arc is already here. * Check if this arc is already here.
@ -202,7 +222,6 @@ add_arc(s1, s2)
for (i = 0; i < n1->n_narcs; i++) for (i = 0; i < n1->n_narcs; i++)
if (n1->n_arcs[i] == n2) if (n1->n_arcs[i] == n2)
return; return;
/* /*
* Add it. * Add it.
*/ */
@ -210,79 +229,86 @@ add_arc(s1, s2)
if (!n1->n_arcsize) if (!n1->n_arcsize)
n1->n_arcsize = 10; n1->n_arcsize = 10;
bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize); n1->n_arcs = grow_buf(n1->n_arcs, bsize);
n1->n_arcsize = bsize / sizeof(*n1->n_arcs); n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
} }
n1->n_arcs[n1->n_narcs++] = n2; n1->n_arcs[n1->n_narcs++] = n2;
++n2->n_refcnt; ++n2->n_refcnt;
} }
hash_string(s) /* Find a node in the graph (insert if not found) and return a pointer to it. */
char *s;
{
register int hash, i;
for (hash = 0, i = 1; *s; s++, i++)
hash += *s * i;
return(hash % HASHSIZE);
}
/*
* find a node in the graph and return a pointer to it - returns null if not
* found.
*/
NODE * NODE *
find_node(name) get_node(name)
char *name; char *name;
{ {
register NODE *n; DBT data, key;
NODE *n;
for (n = hashtable[hash_string(name)]; n; n = n->n_hash) if (db == NULL &&
if (!strcmp(n->n_name, name)) (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
err("db: open: %s", name, strerror(errno));
key.data = name;
key.size = strlen(name) + 1;
switch((*db->get)(db, &key, &data, 0)) {
case 0:
bcopy(data.data, &n, sizeof(n));
return (n); return (n);
return((NODE *)NULL); case 1:
break;
default:
case -1:
err("db: get %s: %s", name, strerror(errno));
} }
/* Add a node to the graph and return a pointer to it. */ if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
NODE * err("%s", strerror(errno));
add_node(name)
char *name;
{
register NODE *n;
int hash;
if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name)))
no_memory();
n->n_narcs = 0; n->n_narcs = 0;
n->n_arcsize = 0; n->n_arcsize = 0;
n->n_arcs = (NODE **)NULL; n->n_arcs = NULL;
n->n_refcnt = 0; n->n_refcnt = 0;
n->n_flags = 0; n->n_flags = 0;
bcopy(name, n->n_name, key.size);
/* add to linked list */ /* Add to linked list. */
if (n->n_next = graph) if (n->n_next = graph)
graph->n_prevp = &n->n_next; graph->n_prevp = &n->n_next;
n->n_prevp = &graph; n->n_prevp = &graph;
graph = n; graph = n;
/* add to hash table */ /* Add to hash table. */
hash = hash_string(name); data.data = &n;
n->n_hash = hashtable[hash]; data.size = sizeof(n);
hashtable[hash] = n; if ((*db->put)(db, &key, &data, 0))
err("db: put %s: %s", name, strerror(errno));
return (n); return (n);
} }
/*
* Clear the NODEST flag from all nodes.
*/
void
clear_cycle()
{
NODE *n;
for (n = graph; n; n = n->n_next)
n->n_flags &= ~NF_NODEST;
}
/* do topological sort on graph */ /* do topological sort on graph */
void void
tsort() tsort()
{ {
register NODE *n, *m, *next; register NODE *n, *next;
register int cnt; register int cnt;
while (graph) { while (graph) {
/* /*
* keep getting rid of simple cases until there are none left, * Keep getting rid of simple cases until there are none left,
* if there are any nodes still in the graph, then there is * if there are any nodes still in the graph, then there is
* a cycle in it. * a cycle in it.
*/ */
@ -301,43 +327,39 @@ tsort()
if (!cycle_buf) { if (!cycle_buf) {
/* /*
* allocate space for two cycle logs - one to be used * Allocate space for two cycle logs - one to be used
* as scratch space, the other to save the longest * as scratch space, the other to save the longest
* cycle. * cycle.
*/ */
for (cnt = 0, n = graph; n; n = n->n_next) for (cnt = 0, n = graph; n; n = n->n_next)
++cnt; ++cnt;
cycle_buf = cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
(NODE **)malloc((u_int)sizeof(NODE *) * cnt); longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
longest_cycle = if (cycle_buf == NULL || longest_cycle == NULL)
(NODE **)malloc((u_int)sizeof(NODE *) * cnt); err("%s", strerror(errno));
if (!cycle_buf || !longest_cycle)
no_memory();
} }
for (n = graph; n; n = n->n_next) for (n = graph; n; n = n->n_next)
if (!(n->n_flags & NF_ACYCLIC)) { if (!(n->n_flags & NF_ACYCLIC)) {
for (m=graph; m; m=m->n_next)
m->n_flags &= ~NF_MARK;
if (cnt = find_cycle(n, n, 0, 0)) { if (cnt = find_cycle(n, n, 0, 0)) {
register int i; register int i;
(void)fprintf(stderr, (void)fprintf(stderr,
"tsort: cycle in data.\n"); "tsort: cycle in data\n");
for (i = 0; i < cnt; i++) for (i = 0; i < cnt; i++)
(void)fprintf(stderr, (void)fprintf(stderr,
"tsort: %s.\n", longest_cycle[i]->n_name); "tsort: %s\n", longest_cycle[i]->n_name);
remove_node(n); remove_node(n);
clear_cycle();
break; break;
} else } else {
/* to avoid further checks */ /* to avoid further checks */
n->n_flags = NF_ACYCLIC; n->n_flags |= NF_ACYCLIC;
clear_cycle();
}
} }
if (!n) { if (!n)
(void)fprintf(stderr, err("internal error -- could not find cycle");
"tsort: internal error -- could not find cycle.\n");
exit(1);
}
} }
} }
@ -358,7 +380,9 @@ remove_node(n)
n->n_next->n_prevp = n->n_prevp; n->n_next->n_prevp = n->n_prevp;
} }
/* look for a path from node from to node to. */
/* look for the longest? cycle from node from to node to. */
int
find_cycle(from, to, longest_len, depth) find_cycle(from, to, longest_len, depth)
NODE *from, *to; NODE *from, *to;
int depth, longest_len; int depth, longest_len;
@ -370,9 +394,9 @@ find_cycle(from, to, longest_len, depth)
* avoid infinite loops and ignore portions of the graph known * avoid infinite loops and ignore portions of the graph known
* to be acyclic * to be acyclic
*/ */
if (from->n_flags & (NF_MARK|NF_ACYCLIC)) if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
return (0); return (0);
from->n_flags = NF_MARK; from->n_flags |= NF_MARK;
for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
cycle_buf[depth] = *np; cycle_buf[depth] = *np;
@ -384,19 +408,60 @@ find_cycle(from, to, longest_len, depth)
longest_len * sizeof(NODE *)); longest_len * sizeof(NODE *));
} }
} else { } else {
if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
continue;
len = find_cycle(*np, to, longest_len, depth + 1); len = find_cycle(*np, to, longest_len, depth + 1);
if (len > longest_len) {
if (debug)
printf("%*s %s->%s %d\n", depth, "",
from->n_name, to->n_name, len);
if (len == 0)
(*np)->n_flags |= NF_NODEST;
if (len > longest_len)
longest_len = len; longest_len = len;
if (len > 0 && !longest)
break; break;
} }
} }
} from->n_flags &= ~NF_MARK;
return (longest_len); return (longest_len);
} }
void void
no_memory() usage()
{ {
(void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM)); (void)fprintf(stderr, "usage: tsort [-l] [file]\n");
exit(1); exit(1);
} }
#if __STDC__
#include <stdarg.h>
#else
#include <varargs.h>
#endif
void
#if __STDC__
err(const char *fmt, ...)
#else
err(fmt, va_alist)
char *fmt;
va_dcl
#endif
{
va_list ap;
#if __STDC__
va_start(ap, fmt);
#else
va_start(ap);
#endif
(void)fprintf(stderr, "tsort: ");
(void)vfprintf(stderr, fmt, ap);
va_end(ap);
(void)fprintf(stderr, "\n");
exit(1);
/* NOTREACHED */
}