src/usr.bin/tsort/tsort.c - diff

Return to tsort.c CVS log

Up to [local] / src / usr.bin / tsort

Diff for /src/usr.bin/tsort/tsort.c between version 1.4 and 1.5

-version 1.4, 1997/01/15 23:43:26
+version 1.5, 2001/03/26 22:53:33
 Line 1
 Line 1
 Line 1
- /*      $OpenBSD$       */
+ /* $OpenBSD$ */
- /*      $NetBSD: tsort.c,v 1.11 1996/01/17 20:37:53 mycroft Exp $       */
+ /* ex:ts=8 sw=4:
+  */
  /*
-  * Copyright (c) 1989, 1993, 1994
+  * Copyright (c) 1999-2001 Marc Espie.
-  *      The Regents of the University of California.  All rights reserved.
   *
-  * This code is derived from software contributed to Berkeley by
-  * Michael Rendell of Memorial University of Newfoundland.
-  *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
-Line 18
+Line 15
 Line 18
 Line 15
   *    documentation and/or other materials provided with the distribution.
   * 3. All advertising materials mentioning features or use of this software
   *    must display the following acknowledgement:
-  *      This product includes software developed by the University of
+  * This product includes software developed by Marc Espie for the OpenBSD
-  *      California, Berkeley and its contributors.
+  * Project.
-  * 4. Neither the name of the University nor the names of its contributors
-  *    may be used to endorse or promote products derived from this software
-  *    without specific prior written permission.
   *
-  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
-  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
-  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  * SUCH DAMAGE.
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
- #ifndef lint
- static char copyright[] =
- "@(#) Copyright (c) 1989, 1993, 1994\n\
-         The Regents of the University of California.  All rights reserved.\n";
- #endif /* not lint */
- #ifndef lint
- #if 0
- static char sccsid[] = "@(#)tsort.c     8.3 (Berkeley) 5/4/95";
- #endif
- static char rcsid[] = "$OpenBSD$";
- #endif /* not lint */
  #include <sys/types.h>
+ #include <assert.h>
  #include <ctype.h>
- #include <db.h>
  #include <err.h>
- #include <errno.h>
+ #include <limits.h>
- #include <fcntl.h>
+ #include <stddef.h>
+ #include <ohash.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+ #include <sysexits.h>
  #include <unistd.h>
- /*
+ /* The complexity of topological sorting is O(e), where e is the
-  *  Topological sort.  Input is a list of pairs of strings separated by
+  * size of input.  While reading input, vertices have to be identified,
-  *  white space (spaces, tabs, and/or newlines); strings are written to
+  * thus add the complexity of e keys retrieval among v keys using
-  *  standard output in sorted order, one per line.
+  * an appropriate data structure.  This program uses open double hashing
+  * for that purpose.  See Knuth for the expected complexity of double
+  * hashing (Brent variation should probably be used if v << e, as a user
+  * option).
   *
-  *  usage:
+  * The algorithm used for longest cycle reporting is accurate, but somewhat
-  *     tsort [-l] [inputfile]
+  * expensive.  It may need to build all free paths of the graph (a free
-  *  If no input file is specified, standard input is read.
+  * path is a path that never goes twice through the same node), whose
+  * number can be as high as O(2^e).  Usually, the number of free paths is
+  * much smaller though.  This program's author does not believe that a
+  * significantly better worst-case complexity algorithm exists.
   *
-  *  Should be compatable with AT&T tsort HOWEVER the output is not identical
+  * In case of a hints file, the set of minimal nodes is maintained as a
-  *  (i.e. for most graphs there is more than one sorted order, and this tsort
+  * heap.  The resulting complexity is O(e+v log v) for the worst case.
-  *  usually generates a different one then the AT&T tsort).  Also, cycle
+  * The average should actually be near O(e).
-  *  reporting seems to be more accurate in this version (the AT&T tsort
-  *  sometimes says a node is in a cycle when it isn't).
   *
-  *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
+  * The simple topological sort algorithm detects cycles.  This program
+  * goes further, breaking cycles through the use of simple heuristics.
+  * Each cycle break checks the whole set of nodes, hence if c cycles break
+  * are needed, this is an extra cost of O(c v).
+  *
+  * Possible heuristics are as follows:
+  * - break cycle at node with lowest number of predecessors (default case),
+  * - break longest cycle at node with lowest number of predecessors,
+  * - break cycle at next node from the hints file.
+  *
+  * Except for the hints file case, which sets an explicit constraint on
+  * which cycle to break, those heuristics locally result in the smallest
+  * number of broken edges.
+  *
+  * Those are admittedly greedy strategies, as is the selection of the next
+  * node from the hints file amongst equivalent candidates that is used for
+  * `stable' topological sorting.
   */
- #define HASHSIZE        53              /* doesn't need to be big */
- #define NF_MARK         0x1             /* marker for cycle detection */
- #define NF_ACYCLIC      0x2             /* this node is cycle free */
- #define NF_NODEST       0x4             /* Unreachable */
- typedef struct node_str NODE;
+ #ifdef __GNUC__
+ #define UNUSED  __attribute__((unused))
+ #else
+ #define UNUSED
+ #endif
- struct node_str {
+ struct node;
-         NODE **n_prevp;                 /* pointer to previous node's n_next */
-         NODE *n_next;                   /* next node in graph */
+ /* The set of arcs from a given node is stored as a linked list.  */
-         NODE **n_arcs;                  /* array of arcs to other nodes */
+ struct link {
-         int n_narcs;                    /* number of arcs in n_arcs[] */
+         struct link *next;
-         int n_arcsize;                  /* size of n_arcs[] array */
+         struct node *node;
-         int n_refcnt;                   /* # of arcs pointing to this node */
-         int n_flags;                    /* NF_* */
-         char n_name[1];                 /* name of this node */
  };
- typedef struct _buf {
+ struct node {
-         char *b_buf;
+         unsigned int refs;      /* Number of arcs left, coming into this node .
-         int b_bsize;
+                                  * Note that nodes with a null count can't
- } BUF;
+                                  * be part of cycles.  */
+         struct link  *arcs;     /* List of forward arcs.  */
- DB *db;
+         unsigned int order;     /* Order of nodes according to a hint file.  */
- NODE *graph, **cycle_buf, **longest_cycle;
- int debug, longest, quiet;
- void     add_arc __P((char *, char *));
+         /* Cycle detection algorithms build a free path of nodes.  */
- int      find_cycle __P((NODE *, NODE *, int, int));
+         struct node  *from;     /* Previous node in the current path.  */
- NODE    *get_node __P((char *));
- void    *grow_buf __P((void *, int));
- void     remove_node __P((NODE *));
- void     tsort __P((void));
- void     usage __P((void));
- int
+         unsigned int mark;      /* Mark processed nodes in cycle discovery.  */
- main(argc, argv)
+         struct link  *traverse; /* Next link to traverse when backtracking.  */
-         int argc;
+         char         k[1];      /* Name of this node.  */
-         char *argv[];
+ };
+ #define HASH_START 9
+ struct array {
+         unsigned int entries;
+         struct node  **t;
+ };
+ static void nodes_init __P((struct ohash *));
+ static struct node *node_lookup __P((struct ohash *, const char *, const char *));
+ static void usage __P((void));
+ static struct node *new_node __P((const char *, const char *));
+ static void read_pairs __P((FILE *, struct ohash *, int, const char *));
+ static void split_nodes __P((struct ohash *, struct array *, struct array *));
+ static void insert_arc __P((struct node *, struct node *));
+ #ifdef DEBUG
+ static void dump_node __P((struct node *));
+ static void dump_array __P((struct array *));
+ static void dump_hash __P((struct ohash *));
+ #endif
+ static void read_hints __P((FILE *, struct ohash *, const char *));
+ static struct node *find_smallest_node __P((struct array *));
+ static struct node *find_good_cycle_break __P((struct array *));
+ static void print_cycle __P((struct array *));
+ static int find_cycle_with __P((struct node *, struct array *));
+ static struct node *find_predecessor __P((struct array *, struct node *));
+ static unsigned int traverse_node __P((struct node *, unsigned int, struct array *));
+ static struct node *find_longest_cycle __P((struct array *, struct array *));
+ static void heap_down __P((struct array *, unsigned int));
+ static void heapify __P((struct array *));
+ static struct node *dequeue __P((struct array *));
+ static void enqueue __P((struct array *, struct node *));
+ #define erealloc(n, s)  emem(realloc(n, s))
+ static void *hash_alloc __P((size_t, void *));
+ static void hash_free __P((void *, size_t, void *));
+ static void* entry_alloc __P((size_t, void *));
+ static void *emalloc __P((size_t));
+ static void *emem __P((void *));
+ #define DEBUG_TRAVERSE 0
+ static struct ohash_info node_info = {
+         offsetof(struct node, k), NULL, hash_alloc, hash_free, entry_alloc };
+ int main __P((int, char *[]));
+ /***
+  *** Memory handling.
+  ***/
+ static void *
+ emem(p)
+         void            *p;
  {
-         register BUF *b;
+         if (p)
-         register int c, n;
+                 return p;
-         FILE *fp;
+         else
-         int bsize, ch, nused;
+                 errx(EX_SOFTWARE, "Memory exhausted");
-         BUF bufs[2];
+ }
-         while ((ch = getopt(argc, argv, "dlq")) != -1)
+ static void *
-                 switch (ch) {
+ hash_alloc(s, u)
-                 case 'd':
+         size_t s;
-                         debug = 1;
+         void *u         UNUSED;
-                         break;
+ {
-                 case 'l':
+         return emem(calloc(s, 1));
-                         longest = 1;
+ }
-                         break;
-                 case 'q':
-                         quiet = 1;
-                         break;
-                 case '?':
-                 default:
-                         usage();
-                 }
-         argc -= optind;
-         argv += optind;
-         switch (argc) {
+ static void
-         case 0:
+ hash_free(p, s, u)
-                 fp = stdin;
+         void *p;
-                 break;
+         size_t s        UNUSED;
-         case 1:
+         void *u         UNUSED;
-                 if ((fp = fopen(*argv, "r")) == NULL)
+ {
-                         err(1, "%s", *argv);
+         free(p);
-                 break;
+ }
-         default:
-                 usage();
-         }
-         for (b = bufs, n = 2; --n >= 0; b++)
+ static void *
-                 b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
+ entry_alloc(s, u)
+         size_t s;
+         void *u         UNUSED;
+ {
+         return emalloc(s);
+ }
-         /* parse input and build the graph */
+ static void *
-         for (n = 0, c = getc(fp);;) {
+ emalloc(s)
-                 while (c != EOF && isspace(c))
+         size_t s;
-                         c = getc(fp);
+ {
-                 if (c == EOF)
+         return emem(malloc(s));
-                         break;
+ }
-                 nused = 0;
-                 b = &bufs[n];
+ /***
-                 bsize = b->b_bsize;
+  *** Hash table.
-                 do {
+  ***/
-                         b->b_buf[nused++] = c;
-                         if (nused == bsize)
-                                 b->b_buf = grow_buf(b->b_buf, bsize *= 2);
-                         c = getc(fp);
-                 } while (c != EOF && !isspace(c));
-                 b->b_buf[nused] = '\0';
+ /* Inserting and finding nodes in the hash structure.
-                 b->b_bsize = bsize;
+  * We handle interval strings for efficiency wrt fgetln.  */
-                 if (n)
+ static struct node *
-                         add_arc(bufs[0].b_buf, bufs[1].b_buf);
+ new_node(start, end)
-                 n = !n;
+         const char      *start;
-         }
+         const char      *end;
-         (void)fclose(fp);
+ {
-         if (n)
+         struct node     *n;
-                 errx(1, "odd data count");
-         /* do the sort */
+         n = ohash_create_entry(&node_info, start, &end);
-         tsort();
+         n->from = NULL;
-         exit(0);
+         n->arcs = NULL;
+         n->refs = 0;
+         n->mark = 0;
+         n->order = 0;
+         n->traverse = NULL;
+         return n;
  }
- /* double the size of oldbuf and return a pointer to the new buffer. */
- void *
+ static void
- grow_buf(bp, size)
+ nodes_init(h)
-         void *bp;
+         struct ohash    *h;
-         int size;
  {
-         if ((bp = realloc(bp, (u_int)size)) == NULL)
+         ohash_init(h, HASH_START, &node_info);
-                 err(1, NULL);
-         return (bp);
  }
- /*
+ static struct node *
-  * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
+ node_lookup(h, start, end)
-  * the graph, then add them.
+         struct ohash    *h;
-  */
+         const char      *start;
- void
+         const char      *end;
- add_arc(s1, s2)
-         char *s1, *s2;
  {
-         register NODE *n1;
+         unsigned int    i;
-         NODE *n2;
+         struct node *   n;
-         int bsize, i;
-         n1 = get_node(s1);
+         i = ohash_qlookupi(h, start, &end);
-         if (!strcmp(s1, s2))
+         n = ohash_find(h, i);
+         if (n == NULL)
+                 n = ohash_insert(h, i, new_node(start, end));
+         return n;
+ }
+ #ifdef DEBUG
+ static void
+ dump_node(n)
+     struct node         *n;
+ {
+         struct link     *l;
+         if (n->refs == 0)
                  return;
+         printf("%s (%u): ", n->k, n->refs);
+         for (l = n->arcs; l != NULL; l = l->next)
+                 if (n->refs != 0)
+                 printf("%s(%u) ", l->node->k, l->node->refs);
+         putchar('\n');
+ }
-         n2 = get_node(s2);
+ static void
+ dump_array(a)
+         struct array    *a;
+ {
+         unsigned int    i;
-         /*
+         for (i = 0; i < a->entries; i++)
-          * Check if this arc is already here.
+                 dump_node(a->t[i]);
-          */
+ }
-         for (i = 0; i < n1->n_narcs; i++)
-                 if (n1->n_arcs[i] == n2)
+ static void
+ dump_hash(h)
+         struct hash     *h;
+ {
+         unsigned int    i;
+         struct node     *n;
+         for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))
+                 dump_node(n);
+ }
+ #endif
+ /***
+  *** Reading data.
+  ***/
+ static void
+ insert_arc(a, b)
+         struct node     *a, *b;
+ {
+         struct link     *l;
+         /* Check that this arc is not already present.  */
+         for (l = a->arcs; l != NULL; l = l->next) {
+                 if (l->node == b)
                          return;
-         /*
-          * Add it.
-          */
-         if (n1->n_narcs == n1->n_arcsize) {
-                 if (!n1->n_arcsize)
-                         n1->n_arcsize = 10;
-                 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
-                 n1->n_arcs = grow_buf(n1->n_arcs, bsize);
-                 n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
          }
-         n1->n_arcs[n1->n_narcs++] = n2;
+         b->refs++;
-         ++n2->n_refcnt;
+         l = emalloc(sizeof(struct link));
+         l->node = b;
+         l->next = a->arcs;
+         a->arcs = l;
  }
- /* Find a node in the graph (insert if not found) and return a pointer to it. */
+ static void
- NODE *
+ read_pairs(f, h, reverse, name)
- get_node(name)
+         FILE            *f;
-         char *name;
+         struct ohash    *h;
+         int             reverse;
+         const char      *name;
  {
-         DBT data, key;
+         int             toggle;
-         NODE *n;
+         struct node     *a;
+         size_t          size;
+         char            *str;
+         unsigned int    o;
-         if (db == NULL &&
+         o = 1;
-             (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
+         toggle = 1;
-                 err(1, "db: %s", name);
+         a = NULL;
+         while ((str = fgetln(f, &size)) != NULL) {
+                 char *sentinel;
-         key.data = name;
+                 sentinel = str + size;
-         key.size = strlen(name) + 1;
+                 for (;;) {
+                         char *e;
-         switch ((*db->get)(db, &key, &data, 0)) {
+                         while (isspace(*str) && str < sentinel)
-         case 0:
+                                 str++;
-                 bcopy(data.data, &n, sizeof(n));
+                         if (str == sentinel)
-                 return (n);
+                                 break;
-         case 1:
+                         for (e = str; !isspace(*e) && e < sentinel; e++)
-                 break;
+                                 continue;
-         default:
+                         if (toggle) {
-         case -1:
+                                 a = node_lookup(h, str, e);
-                 err(1, "db: %s", name);
+                                 if (a->order == 0)
+                                         a->order = o++;
+                         } else {
+                                 struct node *b;
+                                 b = node_lookup(h, str, e);
+                                 assert(a != NULL);
+                                 if (b != a) {
+                                         if (reverse)
+                                                 insert_arc(b, a);
+                                         else
+                                                 insert_arc(a, b);
+                                 }
+                         }
+                         toggle = !toggle;
+                         str = e;
+                 }
          }
+         if (toggle == 0)
+                 errx(EX_DATAERR, "odd number of pairs in %s", name);
+         if (!feof(f))
+                 err(EX_IOERR, "error reading %s", name);
+ }
-         if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
+ static void
-                 err(1, NULL);
+ read_hints(f, h, name)
+         FILE            *f;
+         struct ohash    *h;
+         const char      *name;
+ {
+         char            *str;
+         size_t          size;
+         unsigned int    i;
-         n->n_narcs = 0;
+         i = 1;
-         n->n_arcsize = 0;
-         n->n_arcs = NULL;
-         n->n_refcnt = 0;
-         n->n_flags = 0;
-         bcopy(name, n->n_name, key.size);
-         /* Add to linked list. */
+         while ((str = fgetln(f, &size)) != NULL) {
-         if ((n->n_next = graph) != NULL)
+                 char *sentinel;
-                 graph->n_prevp = &n->n_next;
-         n->n_prevp = &graph;
-         graph = n;
-         /* Add to hash table. */
+                 sentinel = str + size;
-         data.data = &n;
+                 for (;;) {
-         data.size = sizeof(n);
+                         char *e;
-         if ((*db->put)(db, &key, &data, 0))
+                         struct node *a;
-                 err(1, "db: %s", name);
-         return (n);
+                         while (isspace(*str) && str < sentinel)
+                                 str++;
+                         if (str == sentinel)
+                                 break;
+                         for (e = str; !isspace(*e) && e < sentinel; e++)
+                                 continue;
+                         a = node_lookup(h, str, e);
+                         if (a->order != 0)
+                                 errx(EX_DATAERR,
+                                         "duplicate node %s in hints file %s",
+                                         a->k, name);
+                         else
+                                 a->order = i++;
+                         str = e;
+                 }
+         }
  }
+ /***
+  *** Standard heap handling routines.
+  ***/
- /*
+ static void
-  * Clear the NODEST flag from all nodes.
+ heap_down(h, i)
-  */
+         struct array    *h;
- void
+         unsigned int    i;
- clear_cycle()
  {
-         NODE *n;
+         unsigned int    j;
+         struct node     *swap;
-         for (n = graph; n != NULL; n = n->n_next)
+         for (; (j=2*i+1) < h->entries; i = j) {
-                 n->n_flags &= ~NF_NODEST;
+                 if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)
+                         j++;
+                 if (h->t[i]->order <= h->t[j]->order)
+                         break;
+                 swap = h->t[i];
+                 h->t[i] = h->t[j];
+                 h->t[j] = swap;
+         }
  }
- /* do topological sort on graph */
+ static void
- void
+ heapify(h)
- tsort()
+         struct array    *h;
  {
-         register NODE *n, *next;
+         unsigned int    i;
-         register int cnt, i;
-         while (graph != NULL) {
+         for (i = h->entries; i != 0;)
-                 /*
+                 heap_down(h, --i);
-                  * Keep getting rid of simple cases until there are none left,
+ }
-                  * if there are any nodes still in the graph, then there is
-                  * a cycle in it.
-                  */
-                 do {
-                         for (cnt = 0, n = graph; n != NULL; n = next) {
-                                 next = n->n_next;
-                                 if (n->n_refcnt == 0) {
-                                         remove_node(n);
-                                         ++cnt;
-                                 }
-                         }
-                 } while (graph != NULL && cnt);
-                 if (graph == NULL)
+ #define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
+ static struct node *
+ dequeue(h)
+         struct array    *h;
+ {
+         struct node     *n;
+         if (h->entries == 0)
+                 n = NULL;
+         else {
+                 n = h->t[0];
+                 if (--h->entries != 0) {
+                     h->t[0] = h->t[h->entries];
+                     heap_down(h, 0);
+                 }
+         }
+         return n;
+ }
+ #define ENQUEUE(h, n) do {                      \
+         if (hints_flag)                         \
+                 enqueue((h), (n));              \
+         else                                    \
+                 (h)->t[(h)->entries++] = (n);   \
+         } while(0);
+ static void
+ enqueue(h, n)
+         struct array    *h;
+         struct node     *n;
+ {
+         unsigned int    i, j;
+         struct node     *swap;
+         h->t[h->entries++] = n;
+         for (i = h->entries-1; i > 0; i = j) {
+                 j = (i-1)/2;
+                 if (h->t[j]->order < h->t[i]->order)
                          break;
+                 swap = h->t[j];
+                 h->t[j] = h->t[i];
+                 h->t[i] = swap;
+         }
+ }
-                 if (!cycle_buf) {
-                         /*
+ /***
-                          * Allocate space for two cycle logs - one to be used
+  *** Search through hash array for nodes.
-                          * as scratch space, the other to save the longest
+  ***/
-                          * cycle.
-                          */
+ /* Split nodes into unrefed nodes/live nodes.  */
-                         for (cnt = 0, n = graph; n != NULL; n = n->n_next)
+ static void
-                                 ++cnt;
+ split_nodes(hash, heap, remaining)
-                         cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
+         struct ohash    *hash;
-                         longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
+         struct array    *heap;
-                         if (cycle_buf == NULL || longest_cycle == NULL)
+         struct array    *remaining;
-                                 err(1, NULL);
+ {
+         struct node *n;
+         unsigned int i;
+         heap->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
+         remaining->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
+         heap->entries = 0;
+         remaining->entries = 0;
+         for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {
+                 if (n->refs == 0)
+                         heap->t[heap->entries++] = n;
+                 else
+                         remaining->t[remaining->entries++] = n;
+         }
+ }
+ /* Good point to break a cycle: live node with as few refs as possible. */
+ static struct node *
+ find_good_cycle_break(h)
+         struct array    *h;
+ {
+         unsigned int    i;
+         unsigned int    best;
+         struct node     *u;
+         best = UINT_MAX;
+         u = NULL;
+         assert(h->entries != 0);
+         for (i = 0; i < h->entries; i++) {
+                 struct node *n = h->t[i];
+                 /* No need to look further. */
+                 if (n->refs == 1)
+                         return n;
+                 if (n->refs != 0 && n->refs < best) {
+                         best = n->refs;
+                         u = n;
                  }
-                 for (n = graph; n != NULL; n = n->n_next)
+         }
-                         if (!(n->n_flags & NF_ACYCLIC))
+         assert(u != NULL);
-                                 if (cnt = find_cycle(n, n, 0, 0)) {
+         return u;
-                                         if (!quiet) {
+ }
-                                                 warnx("cycle in data");
-                                                 for (i = 0; i < cnt; i++)
+ /*  Retrieve the node with the smallest order.  */
-                                                         warnx("%s",
+ static struct node *
-                                                             longest_cycle[i]->n_name);
+ find_smallest_node(h)
-                                         }
+         struct array    *h;
-                                         remove_node(n);
+ {
-                                         clear_cycle();
+         unsigned int    i;
-                                         break;
+         unsigned int    best;
-                                 } else {
+         struct node     *u;
-                                         /* to avoid further checks */
-                                         n->n_flags  |= NF_ACYCLIC;
-                                         clear_cycle();
-                                 }
-                 if (n == NULL)
+         best = UINT_MAX;
-                         errx(1, "internal error -- could not find cycle");
+         u = NULL;
+         assert(h->entries != 0);
+         for (i = 0; i < h->entries; i++) {
+                 struct node *n = h->t[i];
+                 if (n->refs != 0 && n->order < best) {
+                         best = n->order;
+                         u = n;
+                 }
          }
+         assert(u != NULL);
+         return u;
  }
- /* print node and remove from graph (does not actually free node) */
- void
+ /***
- remove_node(n)
+  *** Graph algorithms.
-         register NODE *n;
+  ***/
+ /* Explore the nodes reachable from i to find a cycle containing it, store
+  * it in c.  This may fail.  */
+ static int
+ find_cycle_with(i, c)
+         struct node     *i;
+         struct array    *c;
  {
-         register NODE **np;
+         struct node     *n;
-         register int i;
-         (void)printf("%s\n", n->n_name);
+         n = i;
-         for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
+         /* XXX Previous cycle findings may have left this pointer non-null.  */
-                 --(*np)->n_refcnt;
+         i->from = NULL;
-         n->n_narcs = 0;
-         *n->n_prevp = n->n_next;
+         for (;;) {
-         if (n->n_next)
+                 /* Note that all marks are reversed before this code exits.  */
-                 n->n_next->n_prevp = n->n_prevp;
+                 n->mark = 1;
+                 if (n->traverse)
+                         n->traverse = n->traverse->next;
+                 else
+                         n->traverse = n->arcs;
+                 /* Skip over dead nodes.  */
+                 while (n->traverse && n->traverse->node->refs == 0)
+                         n->traverse = n->traverse->next;
+                 if (n->traverse) {
+                         struct node *go = n->traverse->node;
+                         if (go->mark) {
+                                 if (go == i) {
+                                         c->entries = 0;
+                                         for (; n != NULL; n = n->from)
+                                                 c->t[c->entries++] = n;
+                                         return 1;
+                                 }
+                         } else {
+                             go->from = n;
+                             n = go;
+                         }
+                 } else {
+                         n->mark = 0;
+                         n = n->from;
+                         if (n == NULL)
+                                 return 0;
+                 }
+         }
  }
+ /* Find a live predecessor of node n.  This is a slow routine, as it needs
+  * to go through the whole array, but it is not needed often.
+  */
+ static struct node *
+ find_predecessor(a, n)
+         struct array *a;
+         struct node *n;
+ {
+         unsigned int i;
- /* look for the longest? cycle from node from to node to. */
+         for (i = 0; i < a->entries; i++) {
- int
+                 struct node *m;
- find_cycle(from, to, longest_len, depth)
-         NODE *from, *to;
+                 m = a->t[i];
-         int depth, longest_len;
+                 if (m->refs != 0) {
+                         struct link *l;
+                         for (l = m->arcs; l != NULL; l = l->next)
+                                 if (l->node == n)
+                                         return m;
+                 }
+         }
+         assert(1 == 0);
+         return NULL;
+ }
+ /* Traverse all strongly connected components reachable from node n.
+    Start numbering them at o. Return the maximum order reached.
+    Update the largest cycle found so far.
+  */
+ static unsigned int
+ traverse_node(n, o, c)
+         struct node     *n;
+         unsigned int    o;
+         struct array    *c;
  {
-         register NODE **np;
+         unsigned int    min, max;
-         register int i, len;
-         /*
+         n->from = NULL;
-          * avoid infinite loops and ignore portions of the graph known
+         min = o;
-          * to be acyclic
+         max = ++o;
-          */
-         if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
-                 return (0);
-         from->n_flags |= NF_MARK;
-         for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
+         for (;;) {
-                 cycle_buf[depth] = *np;
+                 n->mark = o;
-                 if (*np == to) {
+                 if (DEBUG_TRAVERSE)
-                         if (depth + 1 > longest_len) {
+                         printf("%s(%d) ", n->k, n->mark);
-                                 longest_len = depth + 1;
+                 /* Find next arc to explore.  */
-                                 (void)memcpy((char *)longest_cycle,
+                 if (n->traverse)
-                                     (char *)cycle_buf,
+                         n->traverse = n->traverse->next;
-                                     longest_len * sizeof(NODE *));
+                 else
+                         n->traverse = n->arcs;
+                 /* Skip over dead nodes.  */
+                 while (n->traverse && n->traverse->node->refs == 0)
+                         n->traverse = n->traverse->next;
+                 /* If arc left.  */
+                 if (n->traverse) {
+                         struct node     *go;
+                         go = n->traverse->node;
+                         /* Optimisation: if go->mark < min, we already
+                          * visited this strongly-connected component in
+                          * a previous pass.  Hence, this can yield no new
+                          * cycle.  */
+                         /* Not part of the current path: go for it.  */
+                         if (go->mark == 0 || go->mark == min) {
+                                 go->from = n;
+                                 n = go;
+                                 o++;
+                                 if (o > max)
+                                         max = o;
+                         /* Part of the current path: check cycle length.  */
+                         } else if (go->mark > min) {
+                                 if (DEBUG_TRAVERSE)
+                                         printf("%d\n", o - go->mark + 1);
+                                 if (o - go->mark + 1 > c->entries) {
+                                         struct node *t;
+                                         unsigned int i;
+                                         c->entries = o - go->mark + 1;
+                                         i = 0;
+                                         c->t[i++] = go;
+                                         for (t = n; t != go; t = t->from)
+                                                 c->t[i++] = t;
+                                 }
                          }
+                 /* No arc left: backtrack.  */
                  } else {
-                         if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
+                         n->mark = min;
-                                 continue;
+                         n = n->from;
-                         len = find_cycle(*np, to, longest_len, depth + 1);
+                         if (!n)
+                                 return max;
+                         o--;
+                 }
+         }
+ }
-                         if (debug)
+ static void
-                                 (void)printf("%*s %s->%s %d\n", depth, "",
+ print_cycle(c)
-                                     from->n_name, to->n_name, len);
+         struct array    *c;
+ {
+         unsigned int    i;
-                         if (len == 0)
+         /* Printing in reverse order, since cycle discoveries finds reverse
-                                 (*np)->n_flags |= NF_NODEST;
+          * edges.  */
+         for (i = c->entries; i != 0;) {
+                 i--;
+                 warnx("%s", c->t[i]->k);
+         }
+ }
-                         if (len > longest_len)
+ static struct node *
-                                 longest_len = len;
+ find_longest_cycle(h, c)
+         struct array    *h;
+         struct array    *c;
+ {
+         unsigned int    i;
+         unsigned int    o;
+         unsigned int    best;
+         struct node     *n;
+         static int      notfirst = 0;
-                         if (len > 0 && !longest)
+         assert(h->entries != 0);
-                                 break;
+         /* No cycle found yet.  */
+         c->entries = 0;
+         /* Reset the set of marks, except the first time around.  */
+         if (notfirst) {
+                 for (i = 0; i < h->entries; i++)
+                         h->t[i]->mark = 0;
+         } else
+                 notfirst = 1;
+         o = 0;
+         /* Traverse the array.  Each unmarked, live node heralds a
+          * new set of strongly connected components.  */
+         for (i = 0; i < h->entries; i++) {
+                 n = h->t[i];
+                 if (n->refs != 0 && n->mark == 0) {
+                         /* Each call to traverse_node uses a separate
+                          * interval of numbers to mark nodes.  */
+                         o++;
+                         o = traverse_node(n, o, c);
                  }
          }
-         from->n_flags &= ~NF_MARK;
-         return (longest_len);
+         assert(c->entries != 0);
+         n = c->t[0];
+         best = n->refs;
+         for (i = 0; i < c->entries; i++) {
+                 if (c->t[i]->refs < best) {
+                         n = c->t[i];
+                         best = n->refs;
+                 }
+         }
+         return n;
  }
- void
+ #define plural(n) ((n) > 1 ? "s" : "")
+ int
+ main(argc, argv)
+     int                 argc;
+     char                *argv[];
+ {
+         struct ohash    pairs;
+         int             reverse_flag, quiet_flag, long_flag,
+                             warn_flag, hints_flag, verbose_flag;
+         reverse_flag = quiet_flag = long_flag =
+                 warn_flag = hints_flag = verbose_flag = 0;
+         nodes_init(&pairs);
+         {
+             int c;
+             while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {
+                     switch(c) {
+                     case 'h': {
+                             FILE *f;
+                             f = fopen(optarg, "r");
+                             if (f == NULL)
+                                     err(EX_NOINPUT, "Can't open hint file %s",
+                                         optarg);
+                             read_hints(f, &pairs, optarg);
+                             fclose(f);
+                     }
+                             /*FALLTHRU*/
+                     case 'f':
+                             if (hints_flag == 1)
+                                 usage();
+                             hints_flag = 1;
+                             break;
+                     case 'l':
+                             long_flag = 1;
+                             break;
+                     case 'q':
+                             quiet_flag = 1;
+                             break;
+                     case 'r':
+                             reverse_flag = 1;
+                             break;
+                     case 'v':
+                             verbose_flag = 1;
+                             break;
+                     case 'w':
+                             warn_flag = 1;
+                             break;
+                     default:
+                             usage();
+                     }
+             }
+             argc -= optind;
+             argv += optind;
+         }
+         switch(argc) {
+         case 1: {
+                 FILE *f;
+                 f = fopen(argv[0], "r");
+                 if (f == NULL)
+                         err(EX_NOINPUT, "Can't open file %s", argv[1]);
+                 read_pairs(f, &pairs, reverse_flag, argv[1]);
+                 fclose(f);
+                 break;
+         }
+         case 0:
+                 read_pairs(stdin, &pairs, reverse_flag, "stdin");
+                 break;
+         default:
+                 usage();
+         }
+         {
+             struct array        aux;    /* Unrefed nodes/cycle reporting.  */
+             struct array        remaining;
+             unsigned int        broken_arcs, broken_cycles;
+             unsigned int        left;
+             broken_arcs = 0;
+             broken_cycles = 0;
+             split_nodes(&pairs, &aux, &remaining);
+             ohash_delete(&pairs);
+             if (hints_flag)
+                     heapify(&aux);
+             left = remaining.entries + aux.entries;
+             while (left != 0) {
+                     /* Standard topological sort.  */
+                     while (aux.entries) {
+                             struct link *l;
+                             struct node *n;
+                             n = DEQUEUE(&aux);
+                             printf("%s\n", n->k);
+                             left--;
+                             /* We can't free nodes, as we don't know which
+                              * entry we can remove in the hash table.  We
+                              * rely on refs == 0 to recognize live nodes.
+                              * Decrease ref count of live nodes, enter new
+                              * candidates into the unrefed list.  */
+                             for (l = n->arcs; l != NULL; l = l->next)
+                                     if (l->node->refs != 0 &&
+                                         --l->node->refs == 0) {
+                                             ENQUEUE(&aux, l->node);
+                                     }
+                     }
+                     /* There are still cycles to break.  */
+                     if (left != 0) {
+                             struct node *n;
+                             broken_cycles++;
+                             /* XXX Simple cycle detection and long cycle
+                              * detection are mutually exclusive.  */
+                             if (long_flag) {
+                                     n = find_longest_cycle(&remaining, &aux);
+                             } else {
+                                     if (hints_flag)
+                                             n = find_smallest_node(&remaining);
+                                     else
+                                             n = find_good_cycle_break(&remaining);
+                                     if (!quiet_flag) {
+                                             while (!find_cycle_with(n, &aux))
+                                                     n = find_predecessor(&remaining, n);
+                                     }
+                             }
+                             if (!quiet_flag) {
+                                     warnx("cycle in data");
+                                     print_cycle(&aux);
+                             }
+                             if (verbose_flag)
+                                     warnx("%u edge%s broken", n->refs,
+                                         plural(n->refs));
+                             broken_arcs += n->refs;
+                             n->refs = 0;
+                             /* Reinitialization, cycle reporting uses aux.  */
+                             aux.t[0] = n;
+                             aux.entries = 1;
+                     }
+             }
+             if (verbose_flag && broken_cycles != 0)
+                     warnx("%u cycle%s broken, for a total of %u edge%s",
+                         broken_cycles, plural(broken_cycles),
+                         broken_arcs, plural(broken_arcs));
+             if (warn_flag)
+                     exit(broken_cycles < 256 ? broken_cycles : 255);
+             else
+                     exit(EX_OK);
+         }
+ }
+ extern char *__progname;
+ static void
  usage()
  {
-         (void)fprintf(stderr, "usage: tsort [-lq] [file]\n");
+         fprintf(stderr, "Usage: %s [-h file] [-flqrvw] [file]\n", __progname);
-         exit(1);
+         exit(EX_USAGE);
  }