Annotation of src/usr.bin/sort/append.c, Revision 1.7
1.7 ! millert 1: /* $OpenBSD: append.c,v 1.6 2001/02/04 21:27:00 ericj Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (c) 1993
5: * The Regents of the University of California. All rights reserved.
6: *
7: * This code is derived from software contributed to Berkeley by
8: * Peter McIlroy.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the University of
21: * California, Berkeley and its contributors.
22: * 4. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: */
38:
39: #ifndef lint
40: #if 0
41: static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93";
42: #else
1.7 ! millert 43: static char rcsid[] = "$OpenBSD: append.c,v 1.6 2001/02/04 21:27:00 ericj Exp $";
1.1 millert 44: #endif
45: #endif /* not lint */
46:
47: #include "sort.h"
48:
49: #include <stdlib.h>
50: #include <string.h>
51:
52: #define OUTPUT { \
53: if ((n = cpos - ppos) > 1) { \
54: for (; ppos < cpos; ++ppos) \
55: *ppos -= odepth; \
56: ppos -= n; \
57: radixsort((const u_char **)ppos, n, wts1, REC_D); \
58: for (; ppos < cpos; ppos++) { \
59: prec = (RECHEADER *) (*ppos - sizeof(TRECHEADER));\
60: put(prec, fp); \
61: } \
1.2 millert 62: } else \
63: put(prec, fp); \
1.1 millert 64: }
65:
66: /*
67: * copy sorted lines to output; check for uniqueness
68: */
69: void
70: append(keylist, nelem, depth, fp, put, ftbl)
71: u_char **keylist;
72: int nelem;
1.6 ericj 73: int depth;
1.1 millert 74: FILE *fp;
75: void (*put)(RECHEADER *, FILE *);
76: struct field *ftbl;
77: {
1.6 ericj 78: u_char *wts, *wts1;
79: int n, odepth;
80: u_char **cpos, **ppos, **lastkey;
81: u_char *cend, *pend, *start;
82: RECHEADER *crec, *prec;
1.1 millert 83:
1.7 ! millert 84: if (*keylist == NULL)
1.1 millert 85: return;
86: wts1 = wts = ftbl[0].weights;
87: if ((!UNIQUE) && SINGL_FLD) {
88: if (ftbl[0].flags & F && ftbl[0].flags & R)
89: wts1 = Rascii;
90: else if (ftbl[0].flags & F)
91: wts1 = ascii;
92: odepth = depth;
93: }
94: lastkey = keylist + nelem;
95: depth += sizeof(TRECHEADER);
96: if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
97: ppos = keylist;
98: prec = (RECHEADER *) (*ppos - depth);
99: if (UNIQUE)
100: put(prec, fp);
1.6 ericj 101: for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
1.1 millert 102: crec = (RECHEADER *) (*cpos - depth);
103: if (crec->length == prec->length) {
1.6 ericj 104: /*
105: * Set pend and cend so that trailing NUL and
106: * record separator is ignored.
107: */
108: pend = (u_char *)&prec->data + prec->length - 2;
109: cend = (u_char *)&crec->data + crec->length - 2;
1.1 millert 110: for (start = *cpos; cend >= start; cend--) {
111: if (wts[*cend] != wts[*pend])
112: break;
113: pend--;
114: }
115: if (pend + 1 != *ppos) {
1.2 millert 116: if (!UNIQUE)
117: OUTPUT
118: else
1.1 millert 119: put(crec, fp);
120: ppos = cpos;
121: prec = crec;
122: }
123: } else {
1.2 millert 124: if (!UNIQUE)
125: OUTPUT
126: else
1.1 millert 127: put(crec, fp);
128: ppos = cpos;
129: prec = crec;
130: }
131: }
1.2 millert 132: if (!UNIQUE)
1.4 mickey 133: OUTPUT
1.1 millert 134: } else if (UNIQUE) {
135: ppos = keylist;
136: prec = (RECHEADER *) (*ppos - depth);
137: put(prec, fp);
1.6 ericj 138: for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
1.1 millert 139: crec = (RECHEADER *) (*cpos - depth);
140: if (crec->offset == prec->offset) {
1.6 ericj 141: /*
142: * Set pend and cend so that trailing NUL and
143: * record separator is ignored.
144: */
145: pend = (u_char *)&prec->data + prec->offset - 2;
146: cend = (u_char *)&crec->data + crec->offset - 2;
1.1 millert 147: for (start = *cpos; cend >= start; cend--) {
148: if (wts[*cend] != wts[*pend])
149: break;
150: pend--;
151: }
152: if (pend + 1 != *ppos) {
153: ppos = cpos;
154: prec = crec;
155: put(prec, fp);
156: }
157: } else {
158: ppos = cpos;
159: prec = crec;
160: put(prec, fp);
161: }
162: }
163: } else for (cpos = keylist; cpos < lastkey; cpos++) {
164: crec = (RECHEADER *) (*cpos - depth);
165: put(crec, fp);
166: }
167: }
168:
169: /*
170: * output the already sorted eol bin.
171: */
172: void
173: rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
174: u_char *buffer, *bufend;
175: int binno, nfiles;
176: union f_handle infl0;
177: FILE *outfp;
178: {
1.3 millert 179: RECHEADER *rec;
1.2 millert 180:
1.1 millert 181: rec = (RECHEADER *) buffer;
182: if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
183: putline(rec, outfp);
184: while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
185: bufend, 0) == 0) {
186: if (!UNIQUE)
187: putline(rec, outfp);
188: }
189: }
190: }
191:
192: /*
193: * append plain text--used after sorting the biggest bin.
194: */
195: void
196: concat(a, b)
197: FILE *a, *b;
198: {
199: int nread;
200: char buffer[4096];
201:
202: rewind(b);
203: while ((nread = fread(buffer, 1, 4096, b)) > 0)
204: EWRITE(buffer, 1, nread, a);
205: }