version 1.11, 2015/04/01 21:27:18 |
version 1.12, 2015/04/01 21:33:01 |
|
|
void |
void |
tmp_file_atexit(const char *tmp_file) |
tmp_file_atexit(const char *tmp_file) |
{ |
{ |
if (tmp_file) { |
struct CLEANABLE_FILE *item; |
struct CLEANABLE_FILE *item = |
|
sort_malloc(sizeof(struct CLEANABLE_FILE)); |
item = sort_malloc(sizeof(struct CLEANABLE_FILE)); |
item->fn = sort_strdup(tmp_file); |
item->fn = sort_strdup(tmp_file); |
LIST_INSERT_HEAD(&tmp_files, item, files); |
LIST_INSERT_HEAD(&tmp_files, item, files); |
} |
|
} |
} |
|
|
/* |
/* |
|
|
file_is_tmp(const char *fn) |
file_is_tmp(const char *fn) |
{ |
{ |
struct CLEANABLE_FILE *item; |
struct CLEANABLE_FILE *item; |
bool ret = false; |
|
|
|
if (fn) { |
LIST_FOREACH(item, &tmp_files, files) { |
LIST_FOREACH(item, &tmp_files, files) { |
if (item->fn != NULL && strcmp(item->fn, fn) == 0) |
if (item != NULL && item->fn != NULL) |
return true; |
if (strcmp(item->fn, fn) == 0) { |
|
ret = true; |
|
break; |
|
} |
|
} |
|
} |
} |
|
|
return ret; |
return false; |
} |
} |
|
|
/* |
/* |
|
|
void |
void |
file_list_init(struct file_list *fl, bool tmp) |
file_list_init(struct file_list *fl, bool tmp) |
{ |
{ |
if (fl) { |
fl->count = 0; |
fl->count = 0; |
fl->sz = 0; |
fl->sz = 0; |
fl->fns = NULL; |
fl->fns = NULL; |
fl->tmp = tmp; |
fl->tmp = tmp; |
|
} |
|
} |
} |
|
|
/* |
/* |
|
|
void |
void |
file_list_add(struct file_list *fl, char *fn, bool allocate) |
file_list_add(struct file_list *fl, char *fn, bool allocate) |
{ |
{ |
if (fl && fn) { |
if (fl->count >= fl->sz) { |
if (fl->count >= fl->sz || fl->fns == NULL) { |
fl->fns = sort_reallocarray(fl->fns, |
fl->fns = sort_reallocarray(fl->fns, |
fl->sz ? fl->sz : (fl->sz = 1), 2 * sizeof(char *)); |
fl->sz ? fl->sz : (fl->sz = 1), 2 * sizeof(char *)); |
fl->sz *= 2; |
fl->sz *= 2; |
|
} |
|
fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn; |
|
fl->count += 1; |
|
} |
} |
|
fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn; |
|
fl->count += 1; |
} |
} |
|
|
/* |
/* |
|
|
void |
void |
file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate) |
file_list_populate(struct file_list *fl, int argc, char **argv, bool allocate) |
{ |
{ |
if (fl && argv) { |
int i; |
int i; |
|
|
|
for (i = 0; i < argc; i++) |
for (i = 0; i < argc; i++) |
file_list_add(fl, argv[i], allocate); |
file_list_add(fl, argv[i], allocate); |
} |
|
} |
} |
|
|
/* |
/* |
|
|
void |
void |
file_list_clean(struct file_list *fl) |
file_list_clean(struct file_list *fl) |
{ |
{ |
if (fl) { |
if (fl->fns) { |
if (fl->fns) { |
size_t i; |
size_t i; |
|
|
|
for (i = 0; i < fl->count; i++) { |
for (i = 0; i < fl->count; i++) { |
if (fl->fns[i]) { |
if (fl->fns[i]) { |
if (fl->tmp) |
if (fl->tmp) |
unlink(fl->fns[i]); |
unlink(fl->fns[i]); |
sort_free(fl->fns[i]); |
sort_free(fl->fns[i]); |
fl->fns[i] = 0; |
fl->fns[i] = NULL; |
} |
|
} |
} |
sort_free(fl->fns); |
|
fl->fns = NULL; |
|
} |
} |
fl->sz = 0; |
sort_free(fl->fns); |
fl->count = 0; |
fl->fns = NULL; |
fl->tmp = false; |
|
} |
} |
|
fl->sz = 0; |
|
fl->count = 0; |
|
fl->tmp = false; |
} |
} |
|
|
/* |
/* |
|
|
void |
void |
sort_list_init(struct sort_list *l) |
sort_list_init(struct sort_list *l) |
{ |
{ |
if (l) { |
l->count = 0; |
l->count = 0; |
l->size = 0; |
l->size = 0; |
l->memsize = sizeof(struct sort_list); |
l->memsize = sizeof(struct sort_list); |
l->list = NULL; |
l->list = NULL; |
|
} |
|
} |
} |
|
|
/* |
/* |
|
|
void |
void |
sort_list_add(struct sort_list *l, struct bwstring *str) |
sort_list_add(struct sort_list *l, struct bwstring *str) |
{ |
{ |
if (l && str) { |
size_t indx = l->count; |
size_t indx = l->count; |
|
|
|
if ((l->list == NULL) || (indx >= l->size)) { |
if ((l->list == NULL) || (indx >= l->size)) { |
size_t newsize = (l->size + 1) + 1024; |
size_t newsize = (l->size + 1) + 1024; |
|
|
l->list = sort_reallocarray(l->list, newsize, |
l->list = sort_reallocarray(l->list, newsize, |
sizeof(struct sort_list_item *)); |
sizeof(struct sort_list_item *)); |
l->memsize += (newsize - l->size) * |
l->memsize += (newsize - l->size) * |
sizeof(struct sort_list_item *); |
sizeof(struct sort_list_item *); |
l->size = newsize; |
l->size = newsize; |
} |
|
l->list[indx] = sort_list_item_alloc(); |
|
sort_list_item_set(l->list[indx], str); |
|
l->memsize += sort_list_item_size(l->list[indx]); |
|
l->count += 1; |
|
} |
} |
|
l->list[indx] = sort_list_item_alloc(); |
|
sort_list_item_set(l->list[indx], str); |
|
l->memsize += sort_list_item_size(l->list[indx]); |
|
l->count += 1; |
} |
} |
|
|
/* |
/* |
|
|
void |
void |
sort_list_clean(struct sort_list *l) |
sort_list_clean(struct sort_list *l) |
{ |
{ |
if (l) { |
if (l->list) { |
if (l->list) { |
size_t i; |
size_t i; |
|
|
|
for (i = 0; i < l->count; i++) { |
for (i = 0; i < l->count; i++) { |
struct sort_list_item *item; |
struct sort_list_item *item; |
|
|
item = l->list[i]; |
item = l->list[i]; |
|
|
if (item) { |
if (item) { |
sort_list_item_clean(item); |
sort_list_item_clean(item); |
sort_free(item); |
sort_free(item); |
l->list[i] = NULL; |
l->list[i] = NULL; |
} |
|
} |
} |
sort_free(l->list); |
|
l->list = NULL; |
|
} |
} |
l->count = 0; |
sort_free(l->list); |
l->size = 0; |
l->list = NULL; |
l->memsize = sizeof(struct sort_list); |
|
} |
} |
|
l->count = 0; |
|
l->size = 0; |
|
l->memsize = sizeof(struct sort_list); |
} |
} |
|
|
/* |
/* |
|
|
void |
void |
sort_list_dump(struct sort_list *l, const char *fn) |
sort_list_dump(struct sort_list *l, const char *fn) |
{ |
{ |
if (l && fn) { |
FILE *f; |
FILE *f; |
|
|
|
f = openfile(fn, "w"); |
f = openfile(fn, "w"); |
if (f == NULL) |
if (f == NULL) |
err(2, "%s", fn); |
err(2, "%s", fn); |
|
|
if (l->list) { |
if (l->list) { |
size_t i; |
size_t i; |
if (!sort_opts_vals.uflag) { |
|
for (i = 0; i < l->count; ++i) |
if (!sort_opts_vals.uflag) { |
bwsfwrite(l->list[i]->str, f, |
for (i = 0; i < l->count; ++i) |
sort_opts_vals.zflag); |
bwsfwrite(l->list[i]->str, f, |
} else { |
sort_opts_vals.zflag); |
struct sort_list_item *last_printed_item = NULL; |
} else { |
struct sort_list_item *item; |
struct sort_list_item *last_printed_item = NULL; |
for (i = 0; i < l->count; ++i) { |
struct sort_list_item *item; |
item = l->list[i]; |
for (i = 0; i < l->count; ++i) { |
if ((last_printed_item == NULL) || |
item = l->list[i]; |
list_coll(&last_printed_item, &item)) { |
if ((last_printed_item == NULL) || |
bwsfwrite(item->str, f, sort_opts_vals.zflag); |
list_coll(&last_printed_item, &item)) { |
last_printed_item = item; |
bwsfwrite(item->str, f, sort_opts_vals.zflag); |
} |
last_printed_item = item; |
} |
} |
} |
} |
} |
} |
|
|
closefile(f, fn); |
|
} |
} |
|
|
|
closefile(f, fn); |
} |
} |
|
|
/* |
/* |
|
|
fr->bsz += bsz1; |
fr->bsz += bsz1; |
remsz += bsz1; |
remsz += bsz1; |
} else { |
} else { |
if (remsz > 0 && fr->strbeg>0) |
if (remsz > 0 && fr->strbeg > 0) { |
bcopy(fr->buffer + fr->strbeg, |
memmove(fr->buffer, |
fr->buffer, remsz); |
fr->buffer + fr->strbeg, remsz); |
|
} |
fr->strbeg = 0; |
fr->strbeg = 0; |
search_start = remsz; |
search_start = remsz; |
bsz1 = fread(fr->buffer + remsz, 1, |
bsz1 = fread(fr->buffer + remsz, 1, |
|
|
fr->buffer - fr->strbeg); |
fr->buffer - fr->strbeg); |
|
|
fr->strbeg = (strend - fr->buffer) + 1; |
fr->strbeg = (strend - fr->buffer) + 1; |
|
|
} else { |
} else { |
size_t len = 0; |
size_t len = 0; |
|
|
|
|
static void |
static void |
file_reader_clean(struct file_reader *fr) |
file_reader_clean(struct file_reader *fr) |
{ |
{ |
if (fr) { |
if (fr->mmapaddr) |
if (fr->mmapaddr) |
munmap(fr->mmapaddr, fr->mmapsize); |
munmap(fr->mmapaddr, fr->mmapsize); |
|
|
|
if (fr->fd) |
if (fr->fd) |
close(fr->fd); |
close(fr->fd); |
|
|
sort_free(fr->buffer); |
sort_free(fr->buffer); |
|
|
if (fr->file) |
if (fr->file) |
if (fr->file != stdin) |
if (fr->file != stdin) |
closefile(fr->file, fr->fname); |
closefile(fr->file, fr->fname); |
|
|
sort_free(fr->fname); |
sort_free(fr->fname); |
|
|
memset(fr, 0, sizeof(struct file_reader)); |
memset(fr, 0, sizeof(struct file_reader)); |
} |
|
} |
} |
|
|
void |
void |
file_reader_free(struct file_reader *fr) |
file_reader_free(struct file_reader *fr) |
{ |
{ |
if (fr) { |
file_reader_clean(fr); |
file_reader_clean(fr); |
sort_free(fr); |
sort_free(fr); |
|
} |
|
} |
} |
|
|
int |
int |
|
|
static int |
static int |
file_header_cmp(struct file_header *f1, struct file_header *f2) |
file_header_cmp(struct file_header *f1, struct file_header *f2) |
{ |
{ |
|
int ret; |
|
|
if (f1 == f2) |
if (f1 == f2) |
return 0; |
return 0; |
else { |
if (f1->fr == NULL) |
if (f1->fr == NULL) { |
return (f2->fr == NULL) ? 0 : 1; |
return (f2->fr == NULL) ? 0 : 1; |
if (f2->fr == NULL) |
} else if (f2->fr == NULL) |
return -1; |
return -1; |
|
else { |
|
int ret; |
|
|
|
ret = list_coll(&(f1->si), &(f2->si)); |
ret = list_coll(&(f1->si), &(f2->si)); |
if (!ret) |
if (!ret) |
return (f1->file_pos < f2->file_pos) ? -1 : 1; |
return (f1->file_pos < f2->file_pos) ? -1 : 1; |
return ret; |
return ret; |
} |
|
} |
|
} |
} |
|
|
/* |
/* |
|
|
static void |
static void |
file_header_init(struct file_header **fh, const char *fn, size_t file_pos) |
file_header_init(struct file_header **fh, const char *fn, size_t file_pos) |
{ |
{ |
if (fh && fn) { |
struct bwstring *line; |
struct bwstring *line; |
|
|
|
*fh = sort_malloc(sizeof(struct file_header)); |
*fh = sort_malloc(sizeof(struct file_header)); |
(*fh)->file_pos = file_pos; |
(*fh)->file_pos = file_pos; |
(*fh)->fr = file_reader_init(fn); |
(*fh)->fr = file_reader_init(fn); |
if ((*fh)->fr == NULL) { |
if ((*fh)->fr == NULL) { |
err(2, "Cannot open %s for reading", |
err(2, "Cannot open %s for reading", |
strcmp(fn, "-") == 0 ? "stdin" : fn); |
strcmp(fn, "-") == 0 ? "stdin" : fn); |
} |
|
line = file_reader_readline((*fh)->fr); |
|
if (line == NULL) { |
|
file_reader_free((*fh)->fr); |
|
(*fh)->fr = NULL; |
|
(*fh)->si = NULL; |
|
} else { |
|
(*fh)->si = sort_list_item_alloc(); |
|
sort_list_item_set((*fh)->si, line); |
|
} |
|
} |
} |
|
line = file_reader_readline((*fh)->fr); |
|
if (line == NULL) { |
|
file_reader_free((*fh)->fr); |
|
(*fh)->fr = NULL; |
|
(*fh)->si = NULL; |
|
} else { |
|
(*fh)->si = sort_list_item_alloc(); |
|
sort_list_item_set((*fh)->si, line); |
|
} |
} |
} |
|
|
/* |
/* |
|
|
static void |
static void |
file_header_close(struct file_header **fh) |
file_header_close(struct file_header **fh) |
{ |
{ |
if (fh && *fh) { |
if ((*fh)->fr) { |
if ((*fh)->fr) { |
file_reader_free((*fh)->fr); |
file_reader_free((*fh)->fr); |
(*fh)->fr = NULL; |
(*fh)->fr = NULL; |
|
} |
|
if ((*fh)->si) { |
|
sort_list_item_clean((*fh)->si); |
|
sort_free((*fh)->si); |
|
(*fh)->si = NULL; |
|
} |
|
sort_free(*fh); |
|
*fh = NULL; |
|
} |
} |
|
if ((*fh)->si) { |
|
sort_list_item_clean((*fh)->si); |
|
sort_free((*fh)->si); |
|
(*fh)->si = NULL; |
|
} |
|
sort_free(*fh); |
|
*fh = NULL; |
} |
} |
|
|
/* |
/* |
|
|
static void |
static void |
file_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp) |
file_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp) |
{ |
{ |
if (fh && fh->fr && f_out && fh->si && fh->si->str) { |
if (sort_opts_vals.uflag) { |
if (sort_opts_vals.uflag) { |
if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) { |
if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) { |
|
bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); |
|
if (lp->str) |
|
bwsfree(lp->str); |
|
lp->str = bwsdup(fh->si->str); |
|
} |
|
} else |
|
bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); |
bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); |
} |
if (lp->str) |
|
bwsfree(lp->str); |
|
lp->str = bwsdup(fh->si->str); |
|
} |
|
} else |
|
bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); |
} |
} |
|
|
/* |
/* |
|
|
static void |
static void |
file_header_read_next(struct file_header *fh) |
file_header_read_next(struct file_header *fh) |
{ |
{ |
if (fh && fh->fr) { |
struct bwstring *tmp; |
struct bwstring *tmp; |
|
|
|
tmp = file_reader_readline(fh->fr); |
tmp = file_reader_readline(fh->fr); |
if (tmp == NULL) { |
if (tmp == NULL) { |
file_reader_free(fh->fr); |
file_reader_free(fh->fr); |
fh->fr = NULL; |
fh->fr = NULL; |
if (fh->si) { |
if (fh->si) { |
sort_list_item_clean(fh->si); |
sort_list_item_clean(fh->si); |
sort_free(fh->si); |
sort_free(fh->si); |
fh->si = NULL; |
fh->si = NULL; |
} |
|
} else { |
|
if (fh->si == NULL) |
|
fh->si = sort_list_item_alloc(); |
|
sort_list_item_set(fh->si, tmp); |
|
} |
} |
|
} else { |
|
if (fh->si == NULL) |
|
fh->si = sort_list_item_alloc(); |
|
sort_list_item_set(fh->si, tmp); |
} |
} |
} |
} |
|
|
|
|
static void |
static void |
merge_files_array(size_t argc, char **argv, const char *fn_out) |
merge_files_array(size_t argc, char **argv, const char *fn_out) |
{ |
{ |
if (argv && fn_out) { |
struct file_header **fh; |
struct file_header **fh; |
FILE *f_out; |
FILE *f_out; |
size_t i; |
size_t i; |
|
|
|
f_out = openfile(fn_out, "w"); |
f_out = openfile(fn_out, "w"); |
|
|
if (f_out == NULL) |
if (f_out == NULL) |
err(2, "%s", fn_out); |
err(2, "%s", fn_out); |
|
|
fh = sort_reallocarray(NULL, argc + 1, |
fh = sort_reallocarray(NULL, argc + 1, sizeof(struct file_header *)); |
sizeof(struct file_header *)); |
|
|
|
for (i = 0; i < argc; i++) |
for (i = 0; i < argc; i++) |
file_header_init(fh + i, argv[i], (size_t) i); |
file_header_init(fh + i, argv[i], i); |
|
|
file_headers_merge(argc, fh, f_out); |
file_headers_merge(argc, fh, f_out); |
|
|
for (i = 0; i < argc; i++) |
for (i = 0; i < argc; i++) |
file_header_close(fh + i); |
file_header_close(fh + i); |
|
|
sort_free(fh); |
sort_free(fh); |
|
|
closefile(f_out, fn_out); |
closefile(f_out, fn_out); |
} |
|
} |
} |
|
|
/* |
/* |
|
|
static int |
static int |
shrink_file_list(struct file_list *fl) |
shrink_file_list(struct file_list *fl) |
{ |
{ |
if (fl == NULL || (size_t)fl->count < max_open_files) |
struct file_list new_fl; |
|
size_t indx = 0; |
|
|
|
if (fl->count < max_open_files) |
return 0; |
return 0; |
else { |
|
struct file_list new_fl; |
|
size_t indx = 0; |
|
|
|
file_list_init(&new_fl, true); |
file_list_init(&new_fl, true); |
while (indx < fl->count) { |
while (indx < fl->count) { |
char *fnew; |
char *fnew; |
size_t num; |
size_t num; |
|
|
num = fl->count - indx; |
num = fl->count - indx; |
fnew = new_tmp_file_name(); |
fnew = new_tmp_file_name(); |
|
|
if ((size_t) num >= max_open_files) |
if (num >= max_open_files) |
num = max_open_files - 1; |
num = max_open_files - 1; |
merge_files_array(num, fl->fns + indx, fnew); |
merge_files_array(num, fl->fns + indx, fnew); |
if (fl->tmp) { |
if (fl->tmp) { |
size_t i; |
size_t i; |
|
|
for (i = 0; i < num; i++) |
for (i = 0; i < num; i++) |
unlink(fl->fns[indx + i]); |
unlink(fl->fns[indx + i]); |
} |
|
file_list_add(&new_fl, fnew, false); |
|
indx += num; |
|
} |
} |
fl->tmp = false; /* already taken care of */ |
file_list_add(&new_fl, fnew, false); |
file_list_clean(fl); |
indx += num; |
|
} |
|
fl->tmp = false; /* already taken care of */ |
|
file_list_clean(fl); |
|
|
fl->count = new_fl.count; |
fl->count = new_fl.count; |
fl->fns = new_fl.fns; |
fl->fns = new_fl.fns; |
fl->sz = new_fl.sz; |
fl->sz = new_fl.sz; |
fl->tmp = new_fl.tmp; |
fl->tmp = new_fl.tmp; |
|
|
return 1; |
return 1; |
} |
|
} |
} |
|
|
/* |
/* |
|
|
void |
void |
merge_files(struct file_list *fl, const char *fn_out) |
merge_files(struct file_list *fl, const char *fn_out) |
{ |
{ |
if (fl && fn_out) { |
while (shrink_file_list(fl)) |
while (shrink_file_list(fl)); |
; |
|
|
merge_files_array(fl->count, fl->fns, fn_out); |
merge_files_array(fl->count, fl->fns, fn_out); |
} |
|
} |
} |
|
|
static const char * |
static const char * |
|
|
err(2, "Radix sort cannot be used with these sort options"); |
err(2, "Radix sort cannot be used with these sort options"); |
|
|
/* |
/* |
* to handle stable sort and the unique cases in the |
* To handle stable sort and the unique cases in the |
* right order, we need stable basic algorithm |
* right order, we need to use a stable algorithm. |
*/ |
*/ |
if (sort_opts_vals.sflag) { |
if (sort_opts_vals.sflag) { |
switch (sort_opts_vals.sort_method){ |
switch (sort_opts_vals.sort_method){ |