version 1.46, 2003/07/31 02:53:57 |
version 1.47, 2003/07/31 20:00:03 |
|
|
{ |
{ |
struct line *p; |
struct line *p; |
int j, h; |
int j, h; |
|
int sz; |
|
|
rewind(fd); |
rewind(fd); |
p = emalloc(3 * sizeof(struct line)); |
sz = 100; |
|
p = emalloc((sz + 3) * sizeof(struct line)); |
for (j = 0; (h = readhash(fd));) { |
for (j = 0; (h = readhash(fd));) { |
p = erealloc(p, (++j + 3) * sizeof(struct line)); |
if (j == sz) { |
p[j].value = h; |
sz = sz * 3 / 2; |
|
p = erealloc(p, (sz + 3) * sizeof(struct line)); |
|
} |
|
p[++j].value = h; |
} |
} |
len[i] = j; |
len[i] = j; |
file[i] = p; |
file[i] = p; |
|
|
return (0); |
return (0); |
} |
} |
|
|
#define HASHMASK (16 - 1) /* for masking out 16 bytes */ |
|
|
|
/* |
/* |
* hashing has the effect of |
* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
* arranging line in 7-bit bytes and then |
|
* summing 1-s complement in 16-bit hunks |
|
*/ |
*/ |
static int |
static int |
readhash(FILE *f) |
readhash(FILE *f) |
{ |
{ |
unsigned int shift; |
int i, t, space; |
int t, space; |
int sum; |
long sum; |
|
|
|
sum = 1; |
sum = 1; |
space = 0; |
space = 0; |
if (!bflag && !wflag) { |
if (!bflag && !wflag) { |
if (iflag) |
if (iflag) |
for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { |
for (i = 0; (t = getc(f)) != '\n'; i++) { |
if (t == EOF) { |
if (t == EOF) { |
if (shift == 0) |
if (i == 0) |
return (0); |
return (0); |
break; |
break; |
} |
} |
sum += (long)chrtran[t] << (shift &= HASHMASK); |
sum = sum * 127 + chrtran[t]; |
} |
} |
else |
else |
for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { |
for (i = 0; (t = getc(f)) != '\n'; i++) { |
if (t == EOF) { |
if (t == EOF) { |
if (shift == 0) |
if (i == 0) |
return (0); |
return (0); |
break; |
break; |
} |
} |
sum += (long)t << (shift &= HASHMASK); |
sum = sum * 127 + t; |
} |
} |
} else { |
} else { |
for (shift = 0;;) { |
for (i = 0;;) { |
switch (t = getc(f)) { |
switch (t = getc(f)) { |
case '\t': |
case '\t': |
case ' ': |
case ' ': |
|
|
continue; |
continue; |
default: |
default: |
if (space && !wflag) { |
if (space && !wflag) { |
shift += 7; |
i++; |
space = 0; |
space = 0; |
} |
} |
sum += (long)chrtran[t] << (shift &= HASHMASK); |
sum = sum * 127 + chrtran[t]; |
shift += 7; |
i++; |
continue; |
continue; |
case EOF: |
case EOF: |
if (shift == 0) |
if (i == 0) |
return (0); |
return (0); |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
case '\n': |
case '\n': |
|
|
break; |
break; |
} |
} |
} |
} |
return (sum); |
/* |
|
* There is a remote possibility that we end up with a zero sum. |
|
* Zero is used as an EOF marker, so return 1 instead. |
|
*/ |
|
return (sum == 0 ? 1 : sum); |
} |
} |
|
|
int |
int |