version 1.4, 1997/01/15 23:43:26 |
version 1.5, 2001/03/26 22:53:33 |
|
|
/* $OpenBSD$ */ |
/* $OpenBSD$ */ |
/* $NetBSD: tsort.c,v 1.11 1996/01/17 20:37:53 mycroft Exp $ */ |
/* ex:ts=8 sw=4: |
|
*/ |
|
|
/* |
/* |
* Copyright (c) 1989, 1993, 1994 |
* Copyright (c) 1999-2001 Marc Espie. |
* The Regents of the University of California. All rights reserved. |
|
* |
* |
* This code is derived from software contributed to Berkeley by |
|
* Michael Rendell of Memorial University of Newfoundland. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* modification, are permitted provided that the following conditions |
* are met: |
* are met: |
|
|
* documentation and/or other materials provided with the distribution. |
* documentation and/or other materials provided with the distribution. |
* 3. All advertising materials mentioning features or use of this software |
* 3. All advertising materials mentioning features or use of this software |
* must display the following acknowledgement: |
* must display the following acknowledgement: |
* This product includes software developed by the University of |
* This product includes software developed by Marc Espie for the OpenBSD |
* California, Berkeley and its contributors. |
* Project. |
* 4. Neither the name of the University nor the names of its contributors |
|
* may be used to endorse or promote products derived from this software |
|
* without specific prior written permission. |
|
* |
* |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
* THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
* PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
* SUCH DAMAGE. |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
*/ |
|
|
#ifndef lint |
|
static char copyright[] = |
|
"@(#) Copyright (c) 1989, 1993, 1994\n\ |
|
The Regents of the University of California. All rights reserved.\n"; |
|
#endif /* not lint */ |
|
|
|
#ifndef lint |
|
#if 0 |
|
static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95"; |
|
#endif |
|
static char rcsid[] = "$OpenBSD$"; |
|
#endif /* not lint */ |
|
|
|
#include <sys/types.h> |
#include <sys/types.h> |
|
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <db.h> |
|
#include <err.h> |
#include <err.h> |
#include <errno.h> |
#include <limits.h> |
#include <fcntl.h> |
#include <stddef.h> |
|
#include <ohash.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
|
#include <sysexits.h> |
#include <unistd.h> |
#include <unistd.h> |
|
|
/* |
/* The complexity of topological sorting is O(e), where e is the |
* Topological sort. Input is a list of pairs of strings separated by |
* size of input. While reading input, vertices have to be identified, |
* white space (spaces, tabs, and/or newlines); strings are written to |
* thus add the complexity of e keys retrieval among v keys using |
* standard output in sorted order, one per line. |
* an appropriate data structure. This program uses open double hashing |
|
* for that purpose. See Knuth for the expected complexity of double |
|
* hashing (Brent variation should probably be used if v << e, as a user |
|
* option). |
* |
* |
* usage: |
* The algorithm used for longest cycle reporting is accurate, but somewhat |
* tsort [-l] [inputfile] |
* expensive. It may need to build all free paths of the graph (a free |
* If no input file is specified, standard input is read. |
* path is a path that never goes twice through the same node), whose |
|
* number can be as high as O(2^e). Usually, the number of free paths is |
|
* much smaller though. This program's author does not believe that a |
|
* significantly better worst-case complexity algorithm exists. |
* |
* |
* Should be compatable with AT&T tsort HOWEVER the output is not identical |
* In case of a hints file, the set of minimal nodes is maintained as a |
* (i.e. for most graphs there is more than one sorted order, and this tsort |
* heap. The resulting complexity is O(e+v log v) for the worst case. |
* usually generates a different one then the AT&T tsort). Also, cycle |
* The average should actually be near O(e). |
* reporting seems to be more accurate in this version (the AT&T tsort |
|
* sometimes says a node is in a cycle when it isn't). |
|
* |
* |
* Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 |
* The simple topological sort algorithm detects cycles. This program |
|
* goes further, breaking cycles through the use of simple heuristics. |
|
* Each cycle break checks the whole set of nodes, hence if c cycles break |
|
* are needed, this is an extra cost of O(c v). |
|
* |
|
* Possible heuristics are as follows: |
|
* - break cycle at node with lowest number of predecessors (default case), |
|
* - break longest cycle at node with lowest number of predecessors, |
|
* - break cycle at next node from the hints file. |
|
* |
|
* Except for the hints file case, which sets an explicit constraint on |
|
* which cycle to break, those heuristics locally result in the smallest |
|
* number of broken edges. |
|
* |
|
* Those are admittedly greedy strategies, as is the selection of the next |
|
* node from the hints file amongst equivalent candidates that is used for |
|
* `stable' topological sorting. |
*/ |
*/ |
#define HASHSIZE 53 /* doesn't need to be big */ |
|
#define NF_MARK 0x1 /* marker for cycle detection */ |
|
#define NF_ACYCLIC 0x2 /* this node is cycle free */ |
|
#define NF_NODEST 0x4 /* Unreachable */ |
|
|
|
typedef struct node_str NODE; |
#ifdef __GNUC__ |
|
#define UNUSED __attribute__((unused)) |
|
#else |
|
#define UNUSED |
|
#endif |
|
|
struct node_str { |
struct node; |
NODE **n_prevp; /* pointer to previous node's n_next */ |
|
NODE *n_next; /* next node in graph */ |
/* The set of arcs from a given node is stored as a linked list. */ |
NODE **n_arcs; /* array of arcs to other nodes */ |
struct link { |
int n_narcs; /* number of arcs in n_arcs[] */ |
struct link *next; |
int n_arcsize; /* size of n_arcs[] array */ |
struct node *node; |
int n_refcnt; /* # of arcs pointing to this node */ |
|
int n_flags; /* NF_* */ |
|
char n_name[1]; /* name of this node */ |
|
}; |
}; |
|
|
typedef struct _buf { |
struct node { |
char *b_buf; |
unsigned int refs; /* Number of arcs left, coming into this node . |
int b_bsize; |
* Note that nodes with a null count can't |
} BUF; |
* be part of cycles. */ |
|
struct link *arcs; /* List of forward arcs. */ |
|
|
DB *db; |
unsigned int order; /* Order of nodes according to a hint file. */ |
NODE *graph, **cycle_buf, **longest_cycle; |
|
int debug, longest, quiet; |
|
|
|
void add_arc __P((char *, char *)); |
/* Cycle detection algorithms build a free path of nodes. */ |
int find_cycle __P((NODE *, NODE *, int, int)); |
struct node *from; /* Previous node in the current path. */ |
NODE *get_node __P((char *)); |
|
void *grow_buf __P((void *, int)); |
|
void remove_node __P((NODE *)); |
|
void tsort __P((void)); |
|
void usage __P((void)); |
|
|
|
int |
unsigned int mark; /* Mark processed nodes in cycle discovery. */ |
main(argc, argv) |
struct link *traverse; /* Next link to traverse when backtracking. */ |
int argc; |
char k[1]; /* Name of this node. */ |
char *argv[]; |
}; |
|
|
|
#define HASH_START 9 |
|
|
|
struct array { |
|
unsigned int entries; |
|
struct node **t; |
|
}; |
|
|
|
static void nodes_init __P((struct ohash *)); |
|
static struct node *node_lookup __P((struct ohash *, const char *, const char *)); |
|
static void usage __P((void)); |
|
static struct node *new_node __P((const char *, const char *)); |
|
|
|
static void read_pairs __P((FILE *, struct ohash *, int, const char *)); |
|
static void split_nodes __P((struct ohash *, struct array *, struct array *)); |
|
static void insert_arc __P((struct node *, struct node *)); |
|
|
|
#ifdef DEBUG |
|
static void dump_node __P((struct node *)); |
|
static void dump_array __P((struct array *)); |
|
static void dump_hash __P((struct ohash *)); |
|
#endif |
|
static void read_hints __P((FILE *, struct ohash *, const char *)); |
|
static struct node *find_smallest_node __P((struct array *)); |
|
static struct node *find_good_cycle_break __P((struct array *)); |
|
static void print_cycle __P((struct array *)); |
|
static int find_cycle_with __P((struct node *, struct array *)); |
|
static struct node *find_predecessor __P((struct array *, struct node *)); |
|
static unsigned int traverse_node __P((struct node *, unsigned int, struct array *)); |
|
static struct node *find_longest_cycle __P((struct array *, struct array *)); |
|
|
|
static void heap_down __P((struct array *, unsigned int)); |
|
static void heapify __P((struct array *)); |
|
static struct node *dequeue __P((struct array *)); |
|
static void enqueue __P((struct array *, struct node *)); |
|
|
|
|
|
|
|
#define erealloc(n, s) emem(realloc(n, s)) |
|
static void *hash_alloc __P((size_t, void *)); |
|
static void hash_free __P((void *, size_t, void *)); |
|
static void* entry_alloc __P((size_t, void *)); |
|
static void *emalloc __P((size_t)); |
|
static void *emem __P((void *)); |
|
#define DEBUG_TRAVERSE 0 |
|
static struct ohash_info node_info = { |
|
offsetof(struct node, k), NULL, hash_alloc, hash_free, entry_alloc }; |
|
|
|
|
|
int main __P((int, char *[])); |
|
|
|
|
|
/*** |
|
*** Memory handling. |
|
***/ |
|
|
|
static void * |
|
emem(p) |
|
void *p; |
{ |
{ |
register BUF *b; |
if (p) |
register int c, n; |
return p; |
FILE *fp; |
else |
int bsize, ch, nused; |
errx(EX_SOFTWARE, "Memory exhausted"); |
BUF bufs[2]; |
} |
|
|
while ((ch = getopt(argc, argv, "dlq")) != -1) |
static void * |
switch (ch) { |
hash_alloc(s, u) |
case 'd': |
size_t s; |
debug = 1; |
void *u UNUSED; |
break; |
{ |
case 'l': |
return emem(calloc(s, 1)); |
longest = 1; |
} |
break; |
|
case 'q': |
|
quiet = 1; |
|
break; |
|
case '?': |
|
default: |
|
usage(); |
|
} |
|
argc -= optind; |
|
argv += optind; |
|
|
|
switch (argc) { |
static void |
case 0: |
hash_free(p, s, u) |
fp = stdin; |
void *p; |
break; |
size_t s UNUSED; |
case 1: |
void *u UNUSED; |
if ((fp = fopen(*argv, "r")) == NULL) |
{ |
err(1, "%s", *argv); |
free(p); |
break; |
} |
default: |
|
usage(); |
|
} |
|
|
|
for (b = bufs, n = 2; --n >= 0; b++) |
static void * |
b->b_buf = grow_buf(NULL, b->b_bsize = 1024); |
entry_alloc(s, u) |
|
size_t s; |
|
void *u UNUSED; |
|
{ |
|
return emalloc(s); |
|
} |
|
|
/* parse input and build the graph */ |
static void * |
for (n = 0, c = getc(fp);;) { |
emalloc(s) |
while (c != EOF && isspace(c)) |
size_t s; |
c = getc(fp); |
{ |
if (c == EOF) |
return emem(malloc(s)); |
break; |
} |
|
|
nused = 0; |
|
b = &bufs[n]; |
/*** |
bsize = b->b_bsize; |
*** Hash table. |
do { |
***/ |
b->b_buf[nused++] = c; |
|
if (nused == bsize) |
|
b->b_buf = grow_buf(b->b_buf, bsize *= 2); |
|
c = getc(fp); |
|
} while (c != EOF && !isspace(c)); |
|
|
|
b->b_buf[nused] = '\0'; |
/* Inserting and finding nodes in the hash structure. |
b->b_bsize = bsize; |
* We handle interval strings for efficiency wrt fgetln. */ |
if (n) |
static struct node * |
add_arc(bufs[0].b_buf, bufs[1].b_buf); |
new_node(start, end) |
n = !n; |
const char *start; |
} |
const char *end; |
(void)fclose(fp); |
{ |
if (n) |
struct node *n; |
errx(1, "odd data count"); |
|
|
|
/* do the sort */ |
n = ohash_create_entry(&node_info, start, &end); |
tsort(); |
n->from = NULL; |
exit(0); |
n->arcs = NULL; |
|
n->refs = 0; |
|
n->mark = 0; |
|
n->order = 0; |
|
n->traverse = NULL; |
|
return n; |
} |
} |
|
|
/* double the size of oldbuf and return a pointer to the new buffer. */ |
|
void * |
static void |
grow_buf(bp, size) |
nodes_init(h) |
void *bp; |
struct ohash *h; |
int size; |
|
{ |
{ |
if ((bp = realloc(bp, (u_int)size)) == NULL) |
ohash_init(h, HASH_START, &node_info); |
err(1, NULL); |
|
return (bp); |
|
} |
} |
|
|
/* |
static struct node * |
* add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in |
node_lookup(h, start, end) |
* the graph, then add them. |
struct ohash *h; |
*/ |
const char *start; |
void |
const char *end; |
add_arc(s1, s2) |
|
char *s1, *s2; |
|
{ |
{ |
register NODE *n1; |
unsigned int i; |
NODE *n2; |
struct node * n; |
int bsize, i; |
|
|
|
n1 = get_node(s1); |
i = ohash_qlookupi(h, start, &end); |
|
|
if (!strcmp(s1, s2)) |
n = ohash_find(h, i); |
|
if (n == NULL) |
|
n = ohash_insert(h, i, new_node(start, end)); |
|
return n; |
|
} |
|
|
|
#ifdef DEBUG |
|
static void |
|
dump_node(n) |
|
struct node *n; |
|
{ |
|
struct link *l; |
|
|
|
if (n->refs == 0) |
return; |
return; |
|
printf("%s (%u): ", n->k, n->refs); |
|
for (l = n->arcs; l != NULL; l = l->next) |
|
if (n->refs != 0) |
|
printf("%s(%u) ", l->node->k, l->node->refs); |
|
putchar('\n'); |
|
} |
|
|
n2 = get_node(s2); |
static void |
|
dump_array(a) |
|
struct array *a; |
|
{ |
|
unsigned int i; |
|
|
/* |
for (i = 0; i < a->entries; i++) |
* Check if this arc is already here. |
dump_node(a->t[i]); |
*/ |
} |
for (i = 0; i < n1->n_narcs; i++) |
|
if (n1->n_arcs[i] == n2) |
static void |
|
dump_hash(h) |
|
struct hash *h; |
|
{ |
|
unsigned int i; |
|
struct node *n; |
|
|
|
for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i)) |
|
dump_node(n); |
|
} |
|
#endif |
|
|
|
|
|
/*** |
|
*** Reading data. |
|
***/ |
|
|
|
static void |
|
insert_arc(a, b) |
|
struct node *a, *b; |
|
{ |
|
struct link *l; |
|
|
|
/* Check that this arc is not already present. */ |
|
for (l = a->arcs; l != NULL; l = l->next) { |
|
if (l->node == b) |
return; |
return; |
/* |
|
* Add it. |
|
*/ |
|
if (n1->n_narcs == n1->n_arcsize) { |
|
if (!n1->n_arcsize) |
|
n1->n_arcsize = 10; |
|
bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; |
|
n1->n_arcs = grow_buf(n1->n_arcs, bsize); |
|
n1->n_arcsize = bsize / sizeof(*n1->n_arcs); |
|
} |
} |
n1->n_arcs[n1->n_narcs++] = n2; |
b->refs++; |
++n2->n_refcnt; |
l = emalloc(sizeof(struct link)); |
|
l->node = b; |
|
l->next = a->arcs; |
|
a->arcs = l; |
} |
} |
|
|
/* Find a node in the graph (insert if not found) and return a pointer to it. */ |
static void |
NODE * |
read_pairs(f, h, reverse, name) |
get_node(name) |
FILE *f; |
char *name; |
struct ohash *h; |
|
int reverse; |
|
const char *name; |
{ |
{ |
DBT data, key; |
int toggle; |
NODE *n; |
struct node *a; |
|
size_t size; |
|
char *str; |
|
unsigned int o; |
|
|
if (db == NULL && |
o = 1; |
(db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) |
toggle = 1; |
err(1, "db: %s", name); |
a = NULL; |
|
|
|
while ((str = fgetln(f, &size)) != NULL) { |
|
char *sentinel; |
|
|
key.data = name; |
sentinel = str + size; |
key.size = strlen(name) + 1; |
for (;;) { |
|
char *e; |
|
|
switch ((*db->get)(db, &key, &data, 0)) { |
while (isspace(*str) && str < sentinel) |
case 0: |
str++; |
bcopy(data.data, &n, sizeof(n)); |
if (str == sentinel) |
return (n); |
break; |
case 1: |
for (e = str; !isspace(*e) && e < sentinel; e++) |
break; |
continue; |
default: |
if (toggle) { |
case -1: |
a = node_lookup(h, str, e); |
err(1, "db: %s", name); |
if (a->order == 0) |
|
a->order = o++; |
|
} else { |
|
struct node *b; |
|
|
|
b = node_lookup(h, str, e); |
|
assert(a != NULL); |
|
if (b != a) { |
|
if (reverse) |
|
insert_arc(b, a); |
|
else |
|
insert_arc(a, b); |
|
} |
|
} |
|
toggle = !toggle; |
|
str = e; |
|
} |
} |
} |
|
if (toggle == 0) |
|
errx(EX_DATAERR, "odd number of pairs in %s", name); |
|
if (!feof(f)) |
|
err(EX_IOERR, "error reading %s", name); |
|
} |
|
|
if ((n = malloc(sizeof(NODE) + key.size)) == NULL) |
static void |
err(1, NULL); |
read_hints(f, h, name) |
|
FILE *f; |
|
struct ohash *h; |
|
const char *name; |
|
{ |
|
char *str; |
|
size_t size; |
|
unsigned int i; |
|
|
n->n_narcs = 0; |
i = 1; |
n->n_arcsize = 0; |
|
n->n_arcs = NULL; |
|
n->n_refcnt = 0; |
|
n->n_flags = 0; |
|
bcopy(name, n->n_name, key.size); |
|
|
|
/* Add to linked list. */ |
while ((str = fgetln(f, &size)) != NULL) { |
if ((n->n_next = graph) != NULL) |
char *sentinel; |
graph->n_prevp = &n->n_next; |
|
n->n_prevp = &graph; |
|
graph = n; |
|
|
|
/* Add to hash table. */ |
sentinel = str + size; |
data.data = &n; |
for (;;) { |
data.size = sizeof(n); |
char *e; |
if ((*db->put)(db, &key, &data, 0)) |
struct node *a; |
err(1, "db: %s", name); |
|
return (n); |
while (isspace(*str) && str < sentinel) |
|
str++; |
|
if (str == sentinel) |
|
break; |
|
for (e = str; !isspace(*e) && e < sentinel; e++) |
|
continue; |
|
a = node_lookup(h, str, e); |
|
if (a->order != 0) |
|
errx(EX_DATAERR, |
|
"duplicate node %s in hints file %s", |
|
a->k, name); |
|
else |
|
a->order = i++; |
|
str = e; |
|
} |
|
} |
} |
} |
|
|
|
|
|
/*** |
|
*** Standard heap handling routines. |
|
***/ |
|
|
/* |
static void |
* Clear the NODEST flag from all nodes. |
heap_down(h, i) |
*/ |
struct array *h; |
void |
unsigned int i; |
clear_cycle() |
|
{ |
{ |
NODE *n; |
unsigned int j; |
|
struct node *swap; |
|
|
for (n = graph; n != NULL; n = n->n_next) |
for (; (j=2*i+1) < h->entries; i = j) { |
n->n_flags &= ~NF_NODEST; |
if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order) |
|
j++; |
|
if (h->t[i]->order <= h->t[j]->order) |
|
break; |
|
swap = h->t[i]; |
|
h->t[i] = h->t[j]; |
|
h->t[j] = swap; |
|
} |
} |
} |
|
|
/* do topological sort on graph */ |
static void |
void |
heapify(h) |
tsort() |
struct array *h; |
{ |
{ |
register NODE *n, *next; |
unsigned int i; |
register int cnt, i; |
|
|
|
while (graph != NULL) { |
for (i = h->entries; i != 0;) |
/* |
heap_down(h, --i); |
* Keep getting rid of simple cases until there are none left, |
} |
* if there are any nodes still in the graph, then there is |
|
* a cycle in it. |
|
*/ |
|
do { |
|
for (cnt = 0, n = graph; n != NULL; n = next) { |
|
next = n->n_next; |
|
if (n->n_refcnt == 0) { |
|
remove_node(n); |
|
++cnt; |
|
} |
|
} |
|
} while (graph != NULL && cnt); |
|
|
|
if (graph == NULL) |
#define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] ) |
|
|
|
static struct node * |
|
dequeue(h) |
|
struct array *h; |
|
{ |
|
struct node *n; |
|
|
|
if (h->entries == 0) |
|
n = NULL; |
|
else { |
|
n = h->t[0]; |
|
if (--h->entries != 0) { |
|
h->t[0] = h->t[h->entries]; |
|
heap_down(h, 0); |
|
} |
|
} |
|
return n; |
|
} |
|
|
|
#define ENQUEUE(h, n) do { \ |
|
if (hints_flag) \ |
|
enqueue((h), (n)); \ |
|
else \ |
|
(h)->t[(h)->entries++] = (n); \ |
|
} while(0); |
|
|
|
static void |
|
enqueue(h, n) |
|
struct array *h; |
|
struct node *n; |
|
{ |
|
unsigned int i, j; |
|
struct node *swap; |
|
|
|
h->t[h->entries++] = n; |
|
for (i = h->entries-1; i > 0; i = j) { |
|
j = (i-1)/2; |
|
if (h->t[j]->order < h->t[i]->order) |
break; |
break; |
|
swap = h->t[j]; |
|
h->t[j] = h->t[i]; |
|
h->t[i] = swap; |
|
} |
|
} |
|
|
if (!cycle_buf) { |
|
/* |
/*** |
* Allocate space for two cycle logs - one to be used |
*** Search through hash array for nodes. |
* as scratch space, the other to save the longest |
***/ |
* cycle. |
|
*/ |
/* Split nodes into unrefed nodes/live nodes. */ |
for (cnt = 0, n = graph; n != NULL; n = n->n_next) |
static void |
++cnt; |
split_nodes(hash, heap, remaining) |
cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); |
struct ohash *hash; |
longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); |
struct array *heap; |
if (cycle_buf == NULL || longest_cycle == NULL) |
struct array *remaining; |
err(1, NULL); |
{ |
|
|
|
struct node *n; |
|
unsigned int i; |
|
|
|
heap->t = emalloc(sizeof(struct node *) * ohash_entries(hash)); |
|
remaining->t = emalloc(sizeof(struct node *) * ohash_entries(hash)); |
|
heap->entries = 0; |
|
remaining->entries = 0; |
|
|
|
for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) { |
|
if (n->refs == 0) |
|
heap->t[heap->entries++] = n; |
|
else |
|
remaining->t[remaining->entries++] = n; |
|
} |
|
} |
|
|
|
/* Good point to break a cycle: live node with as few refs as possible. */ |
|
static struct node * |
|
find_good_cycle_break(h) |
|
struct array *h; |
|
{ |
|
unsigned int i; |
|
unsigned int best; |
|
struct node *u; |
|
|
|
best = UINT_MAX; |
|
u = NULL; |
|
|
|
assert(h->entries != 0); |
|
for (i = 0; i < h->entries; i++) { |
|
struct node *n = h->t[i]; |
|
/* No need to look further. */ |
|
if (n->refs == 1) |
|
return n; |
|
if (n->refs != 0 && n->refs < best) { |
|
best = n->refs; |
|
u = n; |
} |
} |
for (n = graph; n != NULL; n = n->n_next) |
} |
if (!(n->n_flags & NF_ACYCLIC)) |
assert(u != NULL); |
if (cnt = find_cycle(n, n, 0, 0)) { |
return u; |
if (!quiet) { |
} |
warnx("cycle in data"); |
|
for (i = 0; i < cnt; i++) |
/* Retrieve the node with the smallest order. */ |
warnx("%s", |
static struct node * |
longest_cycle[i]->n_name); |
find_smallest_node(h) |
} |
struct array *h; |
remove_node(n); |
{ |
clear_cycle(); |
unsigned int i; |
break; |
unsigned int best; |
} else { |
struct node *u; |
/* to avoid further checks */ |
|
n->n_flags |= NF_ACYCLIC; |
|
clear_cycle(); |
|
} |
|
|
|
if (n == NULL) |
best = UINT_MAX; |
errx(1, "internal error -- could not find cycle"); |
u = NULL; |
|
|
|
assert(h->entries != 0); |
|
for (i = 0; i < h->entries; i++) { |
|
struct node *n = h->t[i]; |
|
if (n->refs != 0 && n->order < best) { |
|
best = n->order; |
|
u = n; |
|
} |
} |
} |
|
assert(u != NULL); |
|
return u; |
} |
} |
|
|
/* print node and remove from graph (does not actually free node) */ |
|
void |
/*** |
remove_node(n) |
*** Graph algorithms. |
register NODE *n; |
***/ |
|
|
|
/* Explore the nodes reachable from i to find a cycle containing it, store |
|
* it in c. This may fail. */ |
|
static int |
|
find_cycle_with(i, c) |
|
struct node *i; |
|
struct array *c; |
{ |
{ |
register NODE **np; |
struct node *n; |
register int i; |
|
|
|
(void)printf("%s\n", n->n_name); |
n = i; |
for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) |
/* XXX Previous cycle findings may have left this pointer non-null. */ |
--(*np)->n_refcnt; |
i->from = NULL; |
n->n_narcs = 0; |
|
*n->n_prevp = n->n_next; |
for (;;) { |
if (n->n_next) |
/* Note that all marks are reversed before this code exits. */ |
n->n_next->n_prevp = n->n_prevp; |
n->mark = 1; |
|
if (n->traverse) |
|
n->traverse = n->traverse->next; |
|
else |
|
n->traverse = n->arcs; |
|
/* Skip over dead nodes. */ |
|
while (n->traverse && n->traverse->node->refs == 0) |
|
n->traverse = n->traverse->next; |
|
if (n->traverse) { |
|
struct node *go = n->traverse->node; |
|
|
|
if (go->mark) { |
|
if (go == i) { |
|
c->entries = 0; |
|
for (; n != NULL; n = n->from) |
|
c->t[c->entries++] = n; |
|
return 1; |
|
} |
|
} else { |
|
go->from = n; |
|
n = go; |
|
} |
|
} else { |
|
n->mark = 0; |
|
n = n->from; |
|
if (n == NULL) |
|
return 0; |
|
} |
|
} |
} |
} |
|
|
|
/* Find a live predecessor of node n. This is a slow routine, as it needs |
|
* to go through the whole array, but it is not needed often. |
|
*/ |
|
static struct node * |
|
find_predecessor(a, n) |
|
struct array *a; |
|
struct node *n; |
|
{ |
|
unsigned int i; |
|
|
/* look for the longest? cycle from node from to node to. */ |
for (i = 0; i < a->entries; i++) { |
int |
struct node *m; |
find_cycle(from, to, longest_len, depth) |
|
NODE *from, *to; |
m = a->t[i]; |
int depth, longest_len; |
if (m->refs != 0) { |
|
struct link *l; |
|
|
|
for (l = m->arcs; l != NULL; l = l->next) |
|
if (l->node == n) |
|
return m; |
|
} |
|
} |
|
assert(1 == 0); |
|
return NULL; |
|
} |
|
|
|
/* Traverse all strongly connected components reachable from node n. |
|
Start numbering them at o. Return the maximum order reached. |
|
Update the largest cycle found so far. |
|
*/ |
|
static unsigned int |
|
traverse_node(n, o, c) |
|
struct node *n; |
|
unsigned int o; |
|
struct array *c; |
{ |
{ |
register NODE **np; |
unsigned int min, max; |
register int i, len; |
|
|
|
/* |
n->from = NULL; |
* avoid infinite loops and ignore portions of the graph known |
min = o; |
* to be acyclic |
max = ++o; |
*/ |
|
if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) |
|
return (0); |
|
from->n_flags |= NF_MARK; |
|
|
|
for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { |
for (;;) { |
cycle_buf[depth] = *np; |
n->mark = o; |
if (*np == to) { |
if (DEBUG_TRAVERSE) |
if (depth + 1 > longest_len) { |
printf("%s(%d) ", n->k, n->mark); |
longest_len = depth + 1; |
/* Find next arc to explore. */ |
(void)memcpy((char *)longest_cycle, |
if (n->traverse) |
(char *)cycle_buf, |
n->traverse = n->traverse->next; |
longest_len * sizeof(NODE *)); |
else |
|
n->traverse = n->arcs; |
|
/* Skip over dead nodes. */ |
|
while (n->traverse && n->traverse->node->refs == 0) |
|
n->traverse = n->traverse->next; |
|
/* If arc left. */ |
|
if (n->traverse) { |
|
struct node *go; |
|
|
|
go = n->traverse->node; |
|
/* Optimisation: if go->mark < min, we already |
|
* visited this strongly-connected component in |
|
* a previous pass. Hence, this can yield no new |
|
* cycle. */ |
|
|
|
/* Not part of the current path: go for it. */ |
|
if (go->mark == 0 || go->mark == min) { |
|
go->from = n; |
|
n = go; |
|
o++; |
|
if (o > max) |
|
max = o; |
|
/* Part of the current path: check cycle length. */ |
|
} else if (go->mark > min) { |
|
if (DEBUG_TRAVERSE) |
|
printf("%d\n", o - go->mark + 1); |
|
if (o - go->mark + 1 > c->entries) { |
|
struct node *t; |
|
unsigned int i; |
|
|
|
c->entries = o - go->mark + 1; |
|
i = 0; |
|
c->t[i++] = go; |
|
for (t = n; t != go; t = t->from) |
|
c->t[i++] = t; |
|
} |
} |
} |
|
|
|
/* No arc left: backtrack. */ |
} else { |
} else { |
if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) |
n->mark = min; |
continue; |
n = n->from; |
len = find_cycle(*np, to, longest_len, depth + 1); |
if (!n) |
|
return max; |
|
o--; |
|
} |
|
} |
|
} |
|
|
if (debug) |
static void |
(void)printf("%*s %s->%s %d\n", depth, "", |
print_cycle(c) |
from->n_name, to->n_name, len); |
struct array *c; |
|
{ |
|
unsigned int i; |
|
|
if (len == 0) |
/* Printing in reverse order, since cycle discoveries finds reverse |
(*np)->n_flags |= NF_NODEST; |
* edges. */ |
|
for (i = c->entries; i != 0;) { |
|
i--; |
|
warnx("%s", c->t[i]->k); |
|
} |
|
} |
|
|
if (len > longest_len) |
static struct node * |
longest_len = len; |
find_longest_cycle(h, c) |
|
struct array *h; |
|
struct array *c; |
|
{ |
|
unsigned int i; |
|
unsigned int o; |
|
unsigned int best; |
|
struct node *n; |
|
static int notfirst = 0; |
|
|
if (len > 0 && !longest) |
assert(h->entries != 0); |
break; |
|
|
/* No cycle found yet. */ |
|
c->entries = 0; |
|
|
|
/* Reset the set of marks, except the first time around. */ |
|
if (notfirst) { |
|
for (i = 0; i < h->entries; i++) |
|
h->t[i]->mark = 0; |
|
} else |
|
notfirst = 1; |
|
|
|
o = 0; |
|
|
|
/* Traverse the array. Each unmarked, live node heralds a |
|
* new set of strongly connected components. */ |
|
for (i = 0; i < h->entries; i++) { |
|
n = h->t[i]; |
|
if (n->refs != 0 && n->mark == 0) { |
|
/* Each call to traverse_node uses a separate |
|
* interval of numbers to mark nodes. */ |
|
o++; |
|
o = traverse_node(n, o, c); |
} |
} |
} |
} |
from->n_flags &= ~NF_MARK; |
|
return (longest_len); |
assert(c->entries != 0); |
|
n = c->t[0]; |
|
best = n->refs; |
|
for (i = 0; i < c->entries; i++) { |
|
if (c->t[i]->refs < best) { |
|
n = c->t[i]; |
|
best = n->refs; |
|
} |
|
} |
|
return n; |
} |
} |
|
|
void |
|
|
#define plural(n) ((n) > 1 ? "s" : "") |
|
|
|
int |
|
main(argc, argv) |
|
int argc; |
|
char *argv[]; |
|
{ |
|
struct ohash pairs; |
|
int reverse_flag, quiet_flag, long_flag, |
|
warn_flag, hints_flag, verbose_flag; |
|
|
|
reverse_flag = quiet_flag = long_flag = |
|
warn_flag = hints_flag = verbose_flag = 0; |
|
nodes_init(&pairs); |
|
|
|
{ |
|
int c; |
|
|
|
while ((c = getopt(argc, argv, "h:flqrvw")) != -1) { |
|
switch(c) { |
|
case 'h': { |
|
FILE *f; |
|
|
|
f = fopen(optarg, "r"); |
|
if (f == NULL) |
|
err(EX_NOINPUT, "Can't open hint file %s", |
|
optarg); |
|
read_hints(f, &pairs, optarg); |
|
fclose(f); |
|
} |
|
/*FALLTHRU*/ |
|
case 'f': |
|
if (hints_flag == 1) |
|
usage(); |
|
hints_flag = 1; |
|
break; |
|
case 'l': |
|
long_flag = 1; |
|
break; |
|
case 'q': |
|
quiet_flag = 1; |
|
break; |
|
case 'r': |
|
reverse_flag = 1; |
|
break; |
|
case 'v': |
|
verbose_flag = 1; |
|
break; |
|
case 'w': |
|
warn_flag = 1; |
|
break; |
|
default: |
|
usage(); |
|
} |
|
} |
|
|
|
argc -= optind; |
|
argv += optind; |
|
} |
|
|
|
switch(argc) { |
|
case 1: { |
|
FILE *f; |
|
|
|
f = fopen(argv[0], "r"); |
|
if (f == NULL) |
|
err(EX_NOINPUT, "Can't open file %s", argv[1]); |
|
read_pairs(f, &pairs, reverse_flag, argv[1]); |
|
fclose(f); |
|
break; |
|
} |
|
case 0: |
|
read_pairs(stdin, &pairs, reverse_flag, "stdin"); |
|
break; |
|
default: |
|
usage(); |
|
} |
|
|
|
{ |
|
struct array aux; /* Unrefed nodes/cycle reporting. */ |
|
struct array remaining; |
|
unsigned int broken_arcs, broken_cycles; |
|
unsigned int left; |
|
|
|
broken_arcs = 0; |
|
broken_cycles = 0; |
|
|
|
split_nodes(&pairs, &aux, &remaining); |
|
ohash_delete(&pairs); |
|
|
|
if (hints_flag) |
|
heapify(&aux); |
|
|
|
left = remaining.entries + aux.entries; |
|
while (left != 0) { |
|
|
|
/* Standard topological sort. */ |
|
while (aux.entries) { |
|
struct link *l; |
|
struct node *n; |
|
|
|
n = DEQUEUE(&aux); |
|
printf("%s\n", n->k); |
|
left--; |
|
/* We can't free nodes, as we don't know which |
|
* entry we can remove in the hash table. We |
|
* rely on refs == 0 to recognize live nodes. |
|
* Decrease ref count of live nodes, enter new |
|
* candidates into the unrefed list. */ |
|
for (l = n->arcs; l != NULL; l = l->next) |
|
if (l->node->refs != 0 && |
|
--l->node->refs == 0) { |
|
ENQUEUE(&aux, l->node); |
|
} |
|
} |
|
/* There are still cycles to break. */ |
|
if (left != 0) { |
|
struct node *n; |
|
|
|
broken_cycles++; |
|
/* XXX Simple cycle detection and long cycle |
|
* detection are mutually exclusive. */ |
|
|
|
if (long_flag) { |
|
n = find_longest_cycle(&remaining, &aux); |
|
} else { |
|
if (hints_flag) |
|
n = find_smallest_node(&remaining); |
|
else |
|
n = find_good_cycle_break(&remaining); |
|
if (!quiet_flag) { |
|
while (!find_cycle_with(n, &aux)) |
|
n = find_predecessor(&remaining, n); |
|
} |
|
} |
|
|
|
if (!quiet_flag) { |
|
warnx("cycle in data"); |
|
print_cycle(&aux); |
|
} |
|
|
|
if (verbose_flag) |
|
warnx("%u edge%s broken", n->refs, |
|
plural(n->refs)); |
|
broken_arcs += n->refs; |
|
n->refs = 0; |
|
/* Reinitialization, cycle reporting uses aux. */ |
|
aux.t[0] = n; |
|
aux.entries = 1; |
|
} |
|
} |
|
if (verbose_flag && broken_cycles != 0) |
|
warnx("%u cycle%s broken, for a total of %u edge%s", |
|
broken_cycles, plural(broken_cycles), |
|
broken_arcs, plural(broken_arcs)); |
|
if (warn_flag) |
|
exit(broken_cycles < 256 ? broken_cycles : 255); |
|
else |
|
exit(EX_OK); |
|
} |
|
} |
|
|
|
|
|
extern char *__progname; |
|
|
|
static void |
usage() |
usage() |
{ |
{ |
(void)fprintf(stderr, "usage: tsort [-lq] [file]\n"); |
fprintf(stderr, "Usage: %s [-h file] [-flqrvw] [file]\n", __progname); |
exit(1); |
exit(EX_USAGE); |
} |
} |