- Full Source
-
1: /* sort - sort lines of text (with all kinds of options).
2: Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation
3:
4: This program is free software; you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 2, or (at your option)
7: any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program; if not, write to the Free Software
16: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17:
18: Written December 1988 by Mike Haertel.
19: The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20: or (US mail) as Mike Haertel c/o Free Software Foundation. */
21:
22: #include <config.h>
23:
24: /* Get isblank from GNU libc. */
25: #define _GNU_SOURCE
26:
27: #include <sys/types.h>
28: #include <signal.h>
29: #include <stdio.h>
30: #ifndef ENABLE_ASSERTIONS
31: # define NDEBUG 1
32: #endif
33: #include <assert.h>
34: #include "system.h"
35: #include "long-options.h"
36: #include "error.h"
37: #include "xstrtod.h"
38:
39: #ifdef HAVE_LIMITS_H
40: #include <limits.h>
41: #else
42: #ifndef UCHAR_MAX
43: #define UCHAR_MAX 255
44: #endif
45: #endif
46: #ifndef STDC_HEADERS
47: char *malloc ();
48: char *realloc ();
49: void free ();
50: #endif
51:
52: /* Undefine, to avoid warning about redefinition on some systems. */
53: #undef min
54: #define min(a, b) ((a) < (b) ? (a) : (b))
55:
56: #define UCHAR_LIM (UCHAR_MAX + 1)
57: #define UCHAR(c) ((unsigned char) (c))
58:
59: #ifndef DEFAULT_TMPDIR
60: #define DEFAULT_TMPDIR "/tmp"
61: #endif
62:
63: /* Use this as exit status in case of error, not EXIT_FAILURE. This
64: is necessary because EXIT_FAILURE is usually 1 and POSIX requires
65: that sort exit with status 1 IFF invoked with -c and the input is
66: not properly sorted. Any other irregular exit must exit with a
67: status code greater than 1. */
68: #define SORT_FAILURE 2
69:
70: /* The kind of blanks for '-b' to skip in various options. */
71: enum blanktype { bl_start, bl_end, bl_both };
72:
73: /* The character marking end of line. Default to \n. */
74: int eolchar = '\n';
75:
76: /* Lines are held in core as counted strings. */
77: struct line
78: {
79: char *text; /* Text of the line. */
80: int length; /* Length not including final newline. */
81: char *keybeg; /* Start of first key. */
82: char *keylim; /* Limit of first key. */
83: };
84:
85: /* Arrays of lines. */
86: struct lines
87: {
88: struct line *lines; /* Dynamically allocated array of lines. */
89: int used; /* Number of slots used. */
90: int alloc; /* Number of slots allocated. */
91: int limit; /* Max number of slots to allocate. */
92: };
93:
94: /* Input buffers. */
95: struct buffer
96: {
97: char *buf; /* Dynamically allocated buffer. */
98: int used; /* Number of bytes used. */
99: int alloc; /* Number of bytes allocated. */
100: int left; /* Number of bytes left after line parsing. */
101: };
102:
103: struct keyfield
104: {
105: int sword; /* Zero-origin 'word' to start at. */
106: int schar; /* Additional characters to skip. */
107: int skipsblanks; /* Skip leading white space at start. */
108: int eword; /* Zero-origin first word after field. */
109: int echar; /* Additional characters in field. */
110: int skipeblanks; /* Skip trailing white space at finish. */
111: int *ignore; /* Boolean array of characters to ignore. */
112: char *translate; /* Translation applied to characters. */
113: int numeric; /* Flag for numeric comparison. Handle
114: strings of digits with optional decimal
115: point, but no exponential notation. */
116: int general_numeric; /* Flag for general, numeric comparison.
117: Handle numbers in exponential notation. */
118: int month; /* Flag for comparison by month name. */
119: int reverse; /* Reverse the sense of comparison. */
120: struct keyfield *next; /* Next keyfield to try. */
121: };
122:
123: struct month
124: {
125: char *name;
126: int val;
127: };
128:
129: /* The name this program was run with. */
130: char *program_name;
131:
132: /* Table of white space. */
133: static int blanks[UCHAR_LIM];
134:
135: /* Table of non-printing characters. */
136: static int nonprinting[UCHAR_LIM];
137:
138: /* Table of non-dictionary characters (not letters, digits, or blanks). */
139: static int nondictionary[UCHAR_LIM];
140:
141: /* Translation table folding lower case to upper. */
142: static char fold_toupper[UCHAR_LIM];
143:
144: /* Table mapping 3-letter month names to integers.
145: Alphabetic order allows binary search. */
146: static struct month const monthtab[] =
147: {
148: {"APR", 4},
149: {"AUG", 8},
150: {"DEC", 12},
151: {"FEB", 2},
152: {"JAN", 1},
153: {"JUL", 7},
154: {"JUN", 6},
155: {"MAR", 3},
156: {"MAY", 5},
157: {"NOV", 11},
158: {"OCT", 10},
159: {"SEP", 9}
160: };
161:
162: /* During the merge phase, the number of files to merge at once. */
163: #define NMERGE 16
164:
165: /* Initial buffer size for in core sorting. Will not grow unless a
166: line longer than this is seen. */
167: static int sortalloc = 512 * 1024;
168:
169: /* Initial buffer size for in core merge buffers. Bear in mind that
170: up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
171: static int mergealloc = 16 * 1024;
172:
173: /* Guess of average line length. */
174: static int linelength = 30;
175:
176: /* Maximum number of elements for the array(s) of struct line's, in bytes. */
177: #define LINEALLOC (256 * 1024)
178:
179: /* Prefix for temporary file names. */
180: static char *temp_file_prefix;
181:
182: /* Flag to reverse the order of all comparisons. */
183: static int reverse;
184:
185: /* Flag for stable sort. This turns off the last ditch bytewise
186: comparison of lines, and instead leaves lines in the same order
187: they were read if all keys compare equal. */
188: static int stable;
189:
190: /* Tab character separating fields. If NUL, then fields are separated
191: by the empty string between a non-whitespace character and a whitespace
192: character. */
193: static char tab;
194:
195: /* Flag to remove consecutive duplicate lines from the output.
196: Only the last of a sequence of equal lines will be output. */
197: static int unique;
198:
199: /* Nonzero if any of the input files are the standard input. */
200: static int have_read_stdin;
201:
202: /* Lists of key field comparisons to be tried. */
203: static struct keyfield keyhead;
204:
205: static void
206: usage (int status)
207: {
208: if (status != 0)
209: fprintf (stderr, _("Try `%s --help' for more information.\n"),
210: program_name);
211: else
212: {
213: printf (_("\
214: Usage: %s [OPTION]... [FILE]...\n\
215: "),
216: program_name);
217: printf (_("\
218: Write sorted concatenation of all FILE(s) to standard output.\n\
219: \n\
220: +POS1 [-POS2] start a key at POS1, end it before POS2\n\
221: -b ignore leading blanks in sort fields or keys\n\
222: -c check if given files already sorted, do not sort\n\
223: -d consider only [a-zA-Z0-9 ] characters in keys\n\
224: -f fold lower case to upper case characters in keys\n\
225: -g compare according to general numerical value, imply -b\n\
226: -i consider only [\\040-\\0176] characters in keys\n\
227: -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
228: -m merge already sorted files, do not sort\n\
229: -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
230: -n compare according to string numerical value, imply -b\n\
231: -o FILE write result on FILE instead of standard output\n\
232: -r reverse the result of comparisons\n\
233: -s stabilize sort by disabling last resort comparison\n\
234: -t SEP use SEParator instead of non- to whitespace transition\n\
235: -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
236: -u with -c, check for strict ordering;\n\
237: with -m, only output the first of an equal sequence\n\
238: -z end lines with 0 byte, not newline, for find -print0\n\
239: --help display this help and exit\n\
240: --version output version information and exit\n\
241: \n\
242: POS is F[.C][OPTS], where F is the field number and C the character\n\
243: position in the field, both counted from zero. OPTS is made up of one\n\
244: or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
245: for that key. If no key given, use the entire line as key. With no\n\
246: FILE, or when FILE is -, read standard input.\n\
247: ")
248: , DEFAULT_TMPDIR);
249: puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
250: }
251: /* Don't use EXIT_FAILURE here in case it is defined to be 1.
252: POSIX requires that sort return 1 IFF invoked with -c and
253: the input is not properly sorted. */
254: assert (status == 0 || status == SORT_FAILURE);
255: exit (status);
256: }
257:
258: /* The list of temporary files. */
259: static struct tempnode
260: {
261: char *name;
262: struct tempnode *next;
263: } temphead;
264:
265: /* Clean up any remaining temporary files. */
266:
267: static void
268: cleanup (void)
269: {
270: struct tempnode *node;
271:
272: for (node = temphead.next; node; node = node->next)
273: unlink (node->name);
274: }
275:
276: /* Allocate N bytes of memory dynamically, with error checking. */
277:
278: static char *
279: xmalloc (unsigned int n)
280: {
281: char *p;
282:
283: p = malloc (n);
284: if (p == 0)
285: {
286: error (0, 0, _("virtual memory exhausted"));
287: cleanup ();
288: exit (SORT_FAILURE);
289: }
290: return p;
291: }
292:
293: /* Change the size of an allocated block of memory P to N bytes,
294: with error checking.
295: If P is NULL, run xmalloc.
296: If N is 0, run free and return NULL. */
297:
298: static char *
299: xrealloc (char *p, unsigned int n)
300: {
301: if (p == 0)
302: return xmalloc (n);
303: if (n == 0)
304: {
305: free (p);
306: return 0;
307: }
308: p = realloc (p, n);
309: if (p == 0)
310: {
311: error (0, 0, _("virtual memory exhausted"));
312: cleanup ();
313: exit (SORT_FAILURE);
314: }
315: return p;
316: }
317:
318: static FILE *
319: xtmpfopen (const char *file)
320: {
321: FILE *fp;
322: int fd;
323:
324: fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
325: if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
326: {
327: error (0, errno, "%s", file);
328: cleanup ();
329: exit (SORT_FAILURE);
330: }
331:
332: return fp;
333: }
334:
335: static FILE *
336: xfopen (const char *file, const char *how)
337: {
338: FILE *fp;
339:
340: if (strcmp (file, "-") == 0)
341: {
342: fp = stdin;
343: }
344: else
345: {
346: if ((fp = fopen (file, how)) == NULL)
347: {
348: error (0, errno, "%s", file);
349: cleanup ();
350: exit (SORT_FAILURE);
351: }
352: }
353:
354: if (fp == stdin)
355: have_read_stdin = 1;
356: return fp;
357: }
358:
359: static void
360: xfclose (FILE *fp)
361: {
362: if (fp == stdin)
363: {
364: /* Allow reading stdin from tty more than once. */
365: if (feof (fp))
366: clearerr (fp);
367: }
368: else if (fp == stdout)
369: {
370: if (fflush (fp) != 0)
371: {
372: error (0, errno, _("flushing file"));
373: cleanup ();
374: exit (SORT_FAILURE);
375: }
376: }
377: else
378: {
379: if (fclose (fp) != 0)
380: {
381: error (0, errno, _("error closing file"));
382: cleanup ();
383: exit (SORT_FAILURE);
384: }
385: }
386: }
387:
388: static void
389: write_bytes (const char *buf, size_t n_bytes, FILE *fp)
390: {
391: if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
392: {
393: error (0, errno, _("write error"));
394: cleanup ();
395: exit (SORT_FAILURE);
396: }
397: }
398:
399: /* Return a name for a temporary file. */
400:
401: static char *
402: tempname (void)
403: {
404: static unsigned int seq;
405: int len = strlen (temp_file_prefix);
406: char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
407: struct tempnode *node;
408:
409: node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
410: sprintf (name,
411: "%s%ssort%5.5d%5.5d",
412: temp_file_prefix,
413: (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
414: (unsigned int) getpid () & 0xffff, seq);
415:
416: /* Make sure that SEQ's value fits in 5 digits. */
417: ++seq;
418: if (seq >= 100000)
419: seq = 0;
420:
421: node->name = name;
422: node->next = temphead.next;
423: temphead.next = node;
424: return name;
425: }
426:
427: /* Search through the list of temporary files for NAME;
428: remove it if it is found on the list. */
429:
430: static void
431: zaptemp (char *name)
432: {
433: struct tempnode *node, *temp;
434:
435: for (node = &temphead; node->next; node = node->next)
436: if (!strcmp (name, node->next->name))
437: break;
438: if (node->next)
439: {
440: temp = node->next;
441: unlink (temp->name);
442: free (temp->name);
443: node->next = temp->next;
444: free ((char *) temp);
445: }
446: }
447:
448: /* Initialize the character class tables. */
449:
450: static void
451: inittables (void)
452: {
453: int i;
454:
455: for (i = 0; i < UCHAR_LIM; ++i)
456: {
457: if (ISBLANK (i))
458: blanks[i] = 1;
459: if (!ISPRINT (i))
460: nonprinting[i] = 1;
461: if (!ISALNUM (i) && !ISBLANK (i))
462: nondictionary[i] = 1;
463: if (ISLOWER (i))
464: fold_toupper[i] = toupper (i);
465: else
466: fold_toupper[i] = i;
467: }
468: }
469:
470: /* Initialize BUF, allocating ALLOC bytes initially. */
471:
472: static void
473: initbuf (struct buffer *buf, int alloc)
474: {
475: buf->alloc = alloc;
476: buf->buf = xmalloc (buf->alloc);
477: buf->used = buf->left = 0;
478: }
479:
480: /* Fill BUF reading from FP, moving buf->left bytes from the end
481: of buf->buf to the beginning first. If EOF is reached and the
482: file wasn't terminated by a newline, supply one. Return a count
483: of bytes buffered. */
484:
485: static int
486: fillbuf (struct buffer *buf, FILE *fp)
487: {
488: int cc;
489:
490: memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
491: buf->used = buf->left;
492:
493: while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used)))
494: {
495: if (buf->used == buf->alloc)
496: {
497: buf->alloc *= 2;
498: buf->buf = xrealloc (buf->buf, buf->alloc);
499: }
500: cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
501: if (ferror (fp))
502: {
503: error (0, errno, _("read error"));
504: cleanup ();
505: exit (SORT_FAILURE);
506: }
507: buf->used += cc;
508: }
509:
510: if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar)
511: {
512: if (buf->used == buf->alloc)
513: {
514: buf->alloc *= 2;
515: buf->buf = xrealloc (buf->buf, buf->alloc);
516: }
517: buf->buf[buf->used++] = eolchar;
518: }
519:
520: return buf->used;
521: }
522:
523: /* Initialize LINES, allocating space for ALLOC lines initially.
524: LIMIT is the maximum possible number of lines to allocate space
525: for, ever. */
526:
527: static void
528: initlines (struct lines *lines, int alloc, int limit)
529: {
530: lines->alloc = alloc;
531: lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
532: lines->used = 0;
533: lines->limit = limit;
534: }
535:
536: /* Return a pointer to the first character of the field specified
537: by KEY in LINE. */
538:
539: static char *
540: begfield (const struct line *line, const struct keyfield *key)
541: {
542: register char *ptr = line->text, *lim = ptr + line->length;
543: register int sword = key->sword, schar = key->schar;
544:
545: if (tab)
546: while (ptr < lim && sword--)
547: {
548: while (ptr < lim && *ptr != tab)
549: ++ptr;
550: if (ptr < lim)
551: ++ptr;
552: }
553: else
554: while (ptr < lim && sword--)
555: {
556: while (ptr < lim && blanks[UCHAR (*ptr)])
557: ++ptr;
558: while (ptr < lim && !blanks[UCHAR (*ptr)])
559: ++ptr;
560: }
561:
562: if (key->skipsblanks)
563: while (ptr < lim && blanks[UCHAR (*ptr)])
564: ++ptr;
565:
566: if (ptr + schar <= lim)
567: ptr += schar;
568: else
569: ptr = lim;
570:
571: return ptr;
572: }
573:
574: /* Return the limit of (a pointer to the first character after) the field
575: in LINE specified by KEY. */
576:
577: static char *
578: limfield (const struct line *line, const struct keyfield *key)
579: {
580: register char *ptr = line->text, *lim = ptr + line->length;
581: register int eword = key->eword, echar = key->echar;
582:
583: /* Note: from the POSIX spec:
584: The leading field separator itself is included in
585: a field when -t is not used. FIXME: move this comment up... */
586:
587: /* Move PTR past EWORD fields or to one past the last byte on LINE,
588: whichever comes first. If there are more than EWORD fields, leave
589: PTR pointing at the beginning of the field having zero-based index,
590: EWORD. If a delimiter character was specified (via -t), then that
591: `beginning' is the first character following the delimiting TAB.
592: Otherwise, leave PTR pointing at the first `blank' character after
593: the preceding field. */
594: if (tab)
595: while (ptr < lim && eword--)
596: {
597: while (ptr < lim && *ptr != tab)
598: ++ptr;
599: if (ptr < lim && (eword || echar > 0))
600: ++ptr;
601: }
602: else
603: while (ptr < lim && eword--)
604: {
605: while (ptr < lim && blanks[UCHAR (*ptr)])
606: ++ptr;
607: while (ptr < lim && !blanks[UCHAR (*ptr)])
608: ++ptr;
609: }
610:
611: #ifdef POSIX_UNSPECIFIED
612: /* The following block of code makes GNU sort incompatible with
613: standard Unix sort, so it's ifdef'd out for now.
614: The POSIX spec isn't clear on how to interpret this.
615: FIXME: request clarification.
616:
617: From: kwzh@gnu.ai.mit.edu (Karl Heuer)
618: Date: Thu, 30 May 96 12:20:41 -0400
619:
620: [...]I believe I've found another bug in `sort'.
621:
622: $ cat /tmp/sort.in
623: a b c 2 d
624: pq rs 1 t
625: $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
626: a b c 2 d
627: pq rs 1 t
628: $ /bin/sort +0.6 -0.7 </tmp/sort.in
629: pq rs 1 t
630: a b c 2 d
631:
632: Unix sort produced the answer I expected: sort on the single character
633: in column 6. GNU sort produced different results, because it disagrees
634: on the interpretation of the key-end spec "-M.N". Unix sort reads this
635: as "skip M fields, then N characters"; but GNU sort wants it to mean
636: "skip M fields, then either N characters or the rest of the current
637: field, whichever comes first". This extra clause applies only to
638: key-ends, not key-starts.
639: */
640:
641: /* Make LIM point to the end of (one byte past) the current field. */
642: if (tab)
643: {
644: char *newlim;
645: newlim = memchr (ptr, tab, lim - ptr);
646: if (newlim)
647: lim = newlim;
648: }
649: else
650: {
651: char *newlim;
652: newlim = ptr;
653: while (newlim < lim && blanks[UCHAR (*newlim)])
654: ++newlim;
655: while (newlim < lim && !blanks[UCHAR (*newlim)])
656: ++newlim;
657: lim = newlim;
658: }
659: #endif
660:
661: /* If we're skipping leading blanks, don't start counting characters
662: until after skipping past any leading blanks. */
663: if (key->skipsblanks)
664: while (ptr < lim && blanks[UCHAR (*ptr)])
665: ++ptr;
666:
667: /* Advance PTR by ECHAR (if possible), but no further than LIM. */
668: if (ptr + echar <= lim)
669: ptr += echar;
670: else
671: ptr = lim;
672:
673: return ptr;
674: }
675:
676: /* FIXME */
677:
678: void
679: trim_trailing_blanks (const char *a_start, char **a_end)
680: {
681: while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
682: --(*a_end);
683: }
684:
685: /* Find the lines in BUF, storing pointers and lengths in LINES.
686: Also replace newlines in BUF with NULs. */
687:
688: static void
689: findlines (struct buffer *buf, struct lines *lines)
690: {
691: register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
692: struct keyfield *key = keyhead.next;
693:
694: lines->used = 0;
695:
696: while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))
697: && lines->used < lines->limit)
698: {
699: /* There are various places in the code that rely on a NUL
700: being at the end of in-core lines; NULs inside the lines
701: will not cause trouble, though. */
702: *ptr = '\0';
703:
704: if (lines->used == lines->alloc)
705: {
706: lines->alloc *= 2;
707: lines->lines = (struct line *)
708: xrealloc ((char *) lines->lines,
709: lines->alloc * sizeof (struct line));
710: }
711:
712: lines->lines[lines->used].text = beg;
713: lines->lines[lines->used].length = ptr - beg;
714:
715: /* Precompute the position of the first key for efficiency. */
716: if (key)
717: {
718: if (key->eword >= 0)
719: lines->lines[lines->used].keylim =
720: limfield (&lines->lines[lines->used], key);
721: else
722: lines->lines[lines->used].keylim = ptr;
723:
724: if (key->sword >= 0)
725: lines->lines[lines->used].keybeg =
726: begfield (&lines->lines[lines->used], key);
727: else
728: {
729: if (key->skipsblanks)
730: while (blanks[UCHAR (*beg)])
731: ++beg;
732: lines->lines[lines->used].keybeg = beg;
733: }
734: if (key->skipeblanks)
735: {
736: trim_trailing_blanks (lines->lines[lines->used].keybeg,
737: &lines->lines[lines->used].keylim);
738: }
739: }
740: else
741: {
742: lines->lines[lines->used].keybeg = 0;
743: lines->lines[lines->used].keylim = 0;
744: }
745:
746: ++lines->used;
747: beg = ptr + 1;
748: }
749:
750: buf->left = lim - beg;
751: }
752:
753: /* Compare strings A and B containing decimal fractions < 1. Each string
754: should begin with a decimal point followed immediately by the digits
755: of the fraction. Strings not of this form are considered to be zero. */
756:
757: static int
758: fraccompare (register const char *a, register const char *b)
759: {
760: register int tmpa = *a;
761: register int tmpb = *b;
762:
763: if (tmpa == '.' && tmpb == '.')
764: {
765: do
766: tmpa = *++a, tmpb = *++b;
767: while (tmpa == tmpb && ISDIGIT (tmpa));
768: if (ISDIGIT (tmpa) && ISDIGIT (tmpb))
769: return tmpa - tmpb;
770: if (ISDIGIT (tmpa))
771: {
772: while (tmpa == '0')
773: tmpa = *++a;
774: if (ISDIGIT (tmpa))
775: return 1;
776: return 0;
777: }
778: if (ISDIGIT (tmpb))
779: {
780: while (tmpb == '0')
781: tmpb = *++b;
782: if (ISDIGIT (tmpb))
783: return -1;
784: return 0;
785: }
786: return 0;
787: }
788: else if (tmpa == '.')
789: {
790: do
791: tmpa = *++a;
792: while (tmpa == '0');
793: if (ISDIGIT (tmpa))
794: return 1;
795: return 0;
796: }
797: else if (tmpb == '.')
798: {
799: do
800: tmpb = *++b;
801: while (tmpb == '0');
802: if (ISDIGIT (tmpb))
803: return -1;
804: return 0;
805: }
806: return 0;
807: }
808:
809: /* Compare strings A and B as numbers without explicitly converting them to
810: machine numbers. Comparatively slow for short strings, but asymptotically
811: hideously fast. */
812:
813: static int
814: numcompare (register const char *a, register const char *b)
815: {
816: register int tmpa, tmpb, loga, logb, tmp;
817:
818: tmpa = UCHAR (*a);
819: tmpb = UCHAR (*b);
820:
821: while (blanks[tmpa])
822: tmpa = UCHAR (*++a);
823: while (blanks[tmpb])
824: tmpb = UCHAR (*++b);
825:
826: if (tmpa == '-')
827: {
828: do
829: tmpa = *++a;
830: while (tmpa == '0');
831: if (tmpb != '-')
832: {
833: if (tmpa == '.')
834: do
835: tmpa = *++a;
836: while (tmpa == '0');
837: if (ISDIGIT (tmpa))
838: return -1;
839: while (tmpb == '0')
840: tmpb = *++b;
841: if (tmpb == '.')
842: do
843: tmpb = *++b;
844: while (tmpb == '0');
845: if (ISDIGIT (tmpb))
846: return -1;
847: return 0;
848: }
849: do
850: tmpb = *++b;
851: while (tmpb == '0');
852:
853: while (tmpa == tmpb && ISDIGIT (tmpa))
854: tmpa = *++a, tmpb = *++b;
855:
856: if ((tmpa == '.' && !ISDIGIT (tmpb))
857: || (tmpb == '.' && !ISDIGIT (tmpa)))
858: return -fraccompare (a, b);
859:
860: if (ISDIGIT (tmpa))
861: for (loga = 1; ISDIGIT (*++a); ++loga)
862: ;
863: else
864: loga = 0;
865:
866: if (ISDIGIT (tmpb))
867: for (logb = 1; ISDIGIT (*++b); ++logb)
868: ;
869: else
870: logb = 0;
871:
872: if ((tmp = logb - loga) != 0)
873: return tmp;
874:
875: if (!loga)
876: return 0;
877:
878: return tmpb - tmpa;
879: }
880: else if (tmpb == '-')
881: {
882: do
883: tmpb = *++b;
884: while (tmpb == '0');
885: if (tmpb == '.')
886: do
887: tmpb = *++b;
888: while (tmpb == '0');
889: if (ISDIGIT (tmpb))
890: return 1;
891: while (tmpa == '0')
892: tmpa = *++a;
893: if (tmpa == '.')
894: do
895: tmpa = *++a;
896: while (tmpa == '0');
897: if (ISDIGIT (tmpa))
898: return 1;
899: return 0;
900: }
901: else
902: {
903: while (tmpa == '0')
904: tmpa = *++a;
905: while (tmpb == '0')
906: tmpb = *++b;
907:
908: while (tmpa == tmpb && ISDIGIT (tmpa))
909: tmpa = *++a, tmpb = *++b;
910:
911: if ((tmpa == '.' && !ISDIGIT (tmpb))
912: || (tmpb == '.' && !ISDIGIT (tmpa)))
913: return fraccompare (a, b);
914:
915: if (ISDIGIT (tmpa))
916: for (loga = 1; ISDIGIT (*++a); ++loga)
917: ;
918: else
919: loga = 0;
920:
921: if (ISDIGIT (tmpb))
922: for (logb = 1; ISDIGIT (*++b); ++logb)
923: ;
924: else
925: logb = 0;
926:
927: if ((tmp = loga - logb) != 0)
928: return tmp;
929:
930: if (!loga)
931: return 0;
932:
933: return tmpa - tmpb;
934: }
935: }
936:
937: static int
938: general_numcompare (const char *sa, const char *sb)
939: {
940: double a, b;
941: /* FIXME: add option to warn about failed conversions. */
942: /* FIXME: maybe add option to try expensive FP conversion
943: only if A and B can't be compared more cheaply/accurately. */
944: if (xstrtod (sa, NULL, &a))
945: {
946: a = 0;
947: }
948: if (xstrtod (sb, NULL, &b))
949: {
950: b = 0;
951: }
952: return a == b ? 0 : a < b ? -1 : 1;
953: }
954:
955: /* Return an integer <= 12 associated with month name S with length LEN,
956: 0 if the name in S is not recognized. */
957:
958: static int
959: getmonth (const char *s, int len)
960: {
961: char month[4];
962: register int i, lo = 0, hi = 12;
963:
964: while (len > 0 && blanks[UCHAR(*s)])
965: ++s, --len;
966:
967: if (len < 3)
968: return 0;
969:
970: for (i = 0; i < 3; ++i)
971: month[i] = fold_toupper[UCHAR (s[i])];
972: month[3] = '\0';
973:
974: while (hi - lo > 1)
975: if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
976: hi = (lo + hi) / 2;
977: else
978: lo = (lo + hi) / 2;
979: if (!strcmp (month, monthtab[lo].name))
980: return monthtab[lo].val;
981: return 0;
982: }
983:
984: /* Compare two lines A and B trying every key in sequence until there
985: are no more keys or a difference is found. */
986:
987: static int
988: keycompare (const struct line *a, const struct line *b)
989: {
990: register char *texta, *textb, *lima, *limb;
991: register unsigned char *translate;
992: register int *ignore;
993: struct keyfield *key;
994: int diff = 0, iter = 0, lena, lenb;
995:
996: for (key = keyhead.next; key; key = key->next, ++iter)
997: {
998: ignore = key->ignore;
999: translate = (unsigned char *) key->translate;
1000:
1001: /* Find the beginning and limit of each field. */
1002: if (iter || a->keybeg == NULL || b->keybeg == NULL)
1003: {
1004: if (key->eword >= 0)
1005: lima = limfield (a, key), limb = limfield (b, key);
1006: else
1007: lima = a->text + a->length, limb = b->text + b->length;
1008:
1009: if (key->sword >= 0)
1010: texta = begfield (a, key), textb = begfield (b, key);
1011: else
1012: {
1013: texta = a->text, textb = b->text;
1014: if (key->skipsblanks)
1015: {
1016: while (texta < lima && blanks[UCHAR (*texta)])
1017: ++texta;
1018: while (textb < limb && blanks[UCHAR (*textb)])
1019: ++textb;
1020: }
1021: }
1022: }
1023: else
1024: {
1025: /* For the first iteration only, the key positions have
1026: been precomputed for us. */
1027: texta = a->keybeg, lima = a->keylim;
1028: textb = b->keybeg, limb = b->keylim;
1029: }
1030:
1031: /* Find the lengths. */
1032: lena = lima - texta, lenb = limb - textb;
1033: if (lena < 0)
1034: lena = 0;
1035: if (lenb < 0)
1036: lenb = 0;
1037:
1038: if (key->skipeblanks)
1039: {
1040: char *a_end = texta + lena;
1041: char *b_end = textb + lenb;
1042: trim_trailing_blanks (texta, &a_end);
1043: trim_trailing_blanks (textb, &b_end);
1044: lena = a_end - texta;
1045: lenb = b_end - textb;
1046: }
1047:
1048: /* Actually compare the fields. */
1049: if (key->numeric)
1050: {
1051: if (*lima || *limb)
1052: {
1053: char savea = *lima, saveb = *limb;
1054:
1055: *lima = *limb = '\0';
1056: diff = numcompare (texta, textb);
1057: *lima = savea, *limb = saveb;
1058: }
1059: else
1060: diff = numcompare (texta, textb);
1061:
1062: if (diff)
1063: return key->reverse ? -diff : diff;
1064: continue;
1065: }
1066: else if (key->general_numeric)
1067: {
1068: if (*lima || *limb)
1069: {
1070: char savea = *lima, saveb = *limb;
1071:
1072: *lima = *limb = '\0';
1073: diff = general_numcompare (texta, textb);
1074: *lima = savea, *limb = saveb;
1075: }
1076: else
1077: diff = general_numcompare (texta, textb);
1078:
1079: if (diff)
1080: return key->reverse ? -diff : diff;
1081: continue;
1082: }
1083: else if (key->month)
1084: {
1085: diff = getmonth (texta, lena) - getmonth (textb, lenb);
1086: if (diff)
1087: return key->reverse ? -diff : diff;
1088: continue;
1089: }
1090: else if (ignore && translate)
1091:
1092: #define CMP_WITH_IGNORE(A, B) \
1093: do \
1094: { \
1095: while (texta < lima && textb < limb) \
1096: { \
1097: while (texta < lima && ignore[UCHAR (*texta)]) \
1098: ++texta; \
1099: while (textb < limb && ignore[UCHAR (*textb)]) \
1100: ++textb; \
1101: if (texta < lima && textb < limb) \
1102: { \
1103: if ((A) != (B)) \
1104: { \
1105: diff = (A) - (B); \
1106: break; \
1107: } \
1108: ++texta; \
1109: ++textb; \
1110: } \
1111: \
1112: if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1113: diff = -1; \
1114: else if (texta < lima && textb == limb \
1115: && !ignore[UCHAR (*texta)]) \
1116: diff = 1; \
1117: } \
1118: \
1119: if (diff == 0) \
1120: { \
1121: while (texta < lima && ignore[UCHAR (*texta)]) \
1122: ++texta; \
1123: while (textb < limb && ignore[UCHAR (*textb)]) \
1124: ++textb; \
1125: \
1126: if (texta == lima && textb < limb) \
1127: diff = -1; \
1128: else if (texta < lima && textb == limb) \
1129: diff = 1; \
1130: } \
1131: /* Relative lengths are meaningless if characters were ignored. \
1132: Handling this case here avoids what might be an invalid length \
1133: comparison below. */ \
1134: if (diff == 0 && texta == lima && textb == limb) \
1135: return 0; \
1136: } \
1137: while (0)
1138:
1139: CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1140: else if (ignore)
1141: CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb));
1142: else if (translate)
1143: while (texta < lima && textb < limb)
1144: {
1145: if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1146: {
1147: diff = (translate[UCHAR (*--texta)]
1148: - translate[UCHAR (*--textb)]);
1149: break;
1150: }
1151: }
1152: else
1153: diff = memcmp (texta, textb, min (lena, lenb));
1154:
1155: if (diff)
1156: return key->reverse ? -diff : diff;
1157: if ((diff = lena - lenb) != 0)
1158: return key->reverse ? -diff : diff;
1159: }
1160:
1161: return 0;
1162: }
1163:
1164: /* Compare two lines A and B, returning negative, zero, or positive
1165: depending on whether A compares less than, equal to, or greater than B. */
1166:
1167: static int
1168: compare (register const struct line *a, register const struct line *b)
1169: {
1170: int diff, tmpa, tmpb, mini;
1171:
1172: /* First try to compare on the specified keys (if any).
1173: The only two cases with no key at all are unadorned sort,
1174: and unadorned sort -r. */
1175: if (keyhead.next)
1176: {
1177: diff = keycompare (a, b);
1178: if (diff != 0)
1179: return diff;
1180: if (unique || stable)
1181: return 0;
1182: }
1183:
1184: /* If the keys all compare equal (or no keys were specified)
1185: fall through to the default byte-by-byte comparison. */
1186: tmpa = a->length, tmpb = b->length;
1187: mini = min (tmpa, tmpb);
1188: if (mini == 0)
1189: diff = tmpa - tmpb;
1190: else
1191: {
1192: char *ap = a->text, *bp = b->text;
1193:
1194: diff = UCHAR (*ap) - UCHAR (*bp);
1195: if (diff == 0)
1196: {
1197: diff = memcmp (ap, bp, mini);
1198: if (diff == 0)
1199: diff = tmpa - tmpb;
1200: }
1201: }
1202:
1203: return reverse ? -diff : diff;
1204: }
1205:
1206: /* Check that the lines read from the given FP come in order. Return
1207: 1 if they do and 0 if there is a disorder.
1208: FIXME: return number of first out-of-order line if not sorted. */
1209:
1210: static int
1211: checkfp (FILE *fp)
1212: {
1213: struct buffer buf; /* Input buffer. */
1214: struct lines lines; /* Lines scanned from the buffer. */
1215: struct line temp; /* Copy of previous line. */
1216: int cc; /* Character count. */
1217: int alloc, sorted = 1;
1218:
1219: initbuf (&buf, mergealloc);
1220: initlines (&lines, mergealloc / linelength + 1,
1221: LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1222: alloc = linelength;
1223: temp.text = xmalloc (alloc);
1224:
1225: cc = fillbuf (&buf, fp);
1226: if (cc == 0)
1227: goto finish;
1228:
1229: findlines (&buf, &lines);
1230:
1231: while (1)
1232: {
1233: struct line *prev_line; /* Pointer to previous line. */
1234: int cmp; /* Result of calling compare. */
1235: int i;
1236:
1237: /* Compare each line in the buffer with its successor. */
1238: for (i = 0; i < lines.used - 1; ++i)
1239: {
1240: cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1241: if ((unique && cmp >= 0) || (cmp > 0))
1242: {
1243: sorted = 0;
1244: goto finish;
1245: }
1246: }
1247:
1248: /* Save the last line of the buffer and refill the buffer. */
1249: prev_line = lines.lines + (lines.used - 1);
1250: if (prev_line->length + 1 > alloc)
1251: {
1252: do
1253: {
1254: alloc *= 2;
1255: }
1256: while (alloc < prev_line->length + 1);
1257: temp.text = xrealloc (temp.text, alloc);
1258: }
1259: assert (prev_line->length + 1 <= alloc);
1260: memcpy (temp.text, prev_line->text, prev_line->length + 1);
1261: temp.length = prev_line->length;
1262: temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1263: temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1264:
1265: cc = fillbuf (&buf, fp);
1266: if (cc == 0)
1267: break;
1268:
1269: findlines (&buf, &lines);
1270: /* Make sure the line saved from the old buffer contents is
1271: less than or equal to the first line of the new buffer. */
1272: cmp = compare (&temp, &lines.lines[0]);
1273: if ((unique && cmp >= 0) || (cmp > 0))
1274: {
1275: sorted = 0;
1276: break;
1277: }
1278: }
1279:
1280: finish:
1281: xfclose (fp);
1282: free (buf.buf);
1283: free ((char *) lines.lines);
1284: free (temp.text);
1285: return sorted;
1286: }
1287:
1288: /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1289: Close FPS before returning. */
1290:
1291: static void
1292: mergefps (FILE **fps, register int nfps, FILE *ofp)
1293: {
1294: struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1295: struct lines lines[NMERGE]; /* Line tables for each buffer. */
1296: struct line saved; /* Saved line for unique check. */
1297: int savedflag = 0; /* True if there is a saved line. */
1298: int savealloc; /* Size allocated for the saved line. */
1299: int cur[NMERGE]; /* Current line in each line table. */
1300: int ord[NMERGE]; /* Table representing a permutation of fps,
1301: such that lines[ord[0]].lines[cur[ord[0]]]
1302: is the smallest line and will be next
1303: output. */
1304: register int i, j, t;
1305:
1306: #ifdef lint /* Suppress `used before initialized' warning. */
1307: savealloc = 0;
1308: #endif
1309:
1310: /* Allocate space for a saved line if necessary. */
1311: if (unique)
1312: {
1313: savealloc = linelength;
1314: saved.text = xmalloc (savealloc);
1315: }
1316:
1317: /* Read initial lines from each input file. */
1318: for (i = 0; i < nfps; ++i)
1319: {
1320: initbuf (&buffer[i], mergealloc);
1321: /* If a file is empty, eliminate it from future consideration. */
1322: while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1323: {
1324: xfclose (fps[i]);
1325: --nfps;
1326: for (j = i; j < nfps; ++j)
1327: fps[j] = fps[j + 1];
1328: }
1329: if (i == nfps)
1330: free (buffer[i].buf);
1331: else
1332: {
1333: initlines (&lines[i], mergealloc / linelength + 1,
1334: LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1335: findlines (&buffer[i], &lines[i]);
1336: cur[i] = 0;
1337: }
1338: }
1339:
1340: /* Set up the ord table according to comparisons among input lines.
1341: Since this only reorders two items if one is strictly greater than
1342: the other, it is stable. */
1343: for (i = 0; i < nfps; ++i)
1344: ord[i] = i;
1345: for (i = 1; i < nfps; ++i)
1346: if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1347: &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1348: t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1349:
1350: /* Repeatedly output the smallest line until no input remains. */
1351: while (nfps)
1352: {
1353: /* If uniqified output is turned on, output only the first of
1354: an identical series of lines. */
1355: if (unique)
1356: {
1357: if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1358: {
1359: write_bytes (saved.text, saved.length, ofp);
1360: putc (eolchar, ofp);
1361: savedflag = 0;
1362: }
1363: if (!savedflag)
1364: {
1365: if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1366: {
1367: while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1368: savealloc *= 2;
1369: saved.text = xrealloc (saved.text, savealloc);
1370: }
1371: saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1372: memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1373: saved.length + 1);
1374: if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1375: {
1376: saved.keybeg = saved.text +
1377: (lines[ord[0]].lines[cur[ord[0]]].keybeg
1378: - lines[ord[0]].lines[cur[ord[0]]].text);
1379: }
1380: if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1381: {
1382: saved.keylim = saved.text +
1383: (lines[ord[0]].lines[cur[ord[0]]].keylim
1384: - lines[ord[0]].lines[cur[ord[0]]].text);
1385: }
1386: savedflag = 1;
1387: }
1388: }
1389: else
1390: {
1391: write_bytes (lines[ord[0]].lines[cur[ord[0]]].text,
1392: lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1393: putc (eolchar, ofp);
1394: }
1395:
1396: /* Check if we need to read more lines into core. */
1397: if (++cur[ord[0]] == lines[ord[0]].used)
1398: if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1399: {
1400: findlines (&buffer[ord[0]], &lines[ord[0]]);
1401: cur[ord[0]] = 0;
1402: }
1403: else
1404: {
1405: /* We reached EOF on fps[ord[0]]. */
1406: for (i = 1; i < nfps; ++i)
1407: if (ord[i] > ord[0])
1408: --ord[i];
1409: --nfps;
1410: xfclose (fps[ord[0]]);
1411: free (buffer[ord[0]].buf);
1412: free ((char *) lines[ord[0]].lines);
1413: for (i = ord[0]; i < nfps; ++i)
1414: {
1415: fps[i] = fps[i + 1];
1416: buffer[i] = buffer[i + 1];
1417: lines[i] = lines[i + 1];
1418: cur[i] = cur[i + 1];
1419: }
1420: for (i = 0; i < nfps; ++i)
1421: ord[i] = ord[i + 1];
1422: continue;
1423: }
1424:
1425: /* The new line just read in may be larger than other lines
1426: already in core; push it back in the queue until we encounter
1427: a line larger than it. */
1428: for (i = 1; i < nfps; ++i)
1429: {
1430: t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1431: &lines[ord[i]].lines[cur[ord[i]]]);
1432: if (!t)
1433: t = ord[0] - ord[i];
1434: if (t < 0)
1435: break;
1436: }
1437: t = ord[0];
1438: for (j = 1; j < i; ++j)
1439: ord[j - 1] = ord[j];
1440: ord[i - 1] = t;
1441: }
1442:
1443: if (unique && savedflag)
1444: {
1445: write_bytes (saved.text, saved.length, ofp);
1446: putc (eolchar, ofp);
1447: free (saved.text);
1448: }
1449: }
1450:
1451: /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1452:
1453: static void
1454: sortlines (struct line *lines, int nlines, struct line *temp)
1455: {
1456: register struct line *lo, *hi, *t;
1457: register int nlo, nhi;
1458:
1459: if (nlines == 2)
1460: {
1461: if (compare (&lines[0], &lines[1]) > 0)
1462: *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1463: return;
1464: }
1465:
1466: nlo = nlines / 2;
1467: lo = lines;
1468: nhi = nlines - nlo;
1469: hi = lines + nlo;
1470:
1471: if (nlo > 1)
1472: sortlines (lo, nlo, temp);
1473:
1474: if (nhi > 1)
1475: sortlines (hi, nhi, temp);
1476:
1477: t = temp;
1478:
1479: while (nlo && nhi)
1480: if (compare (lo, hi) <= 0)
1481: *t++ = *lo++, --nlo;
1482: else
1483: *t++ = *hi++, --nhi;
1484: while (nlo--)
1485: *t++ = *lo++;
1486:
1487: for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1488: *lo++ = *t++;
1489: }
1490:
1491: /* Check that each of the NFILES FILES is ordered.
1492: Return a count of disordered files. */
1493:
1494: static int
1495: check (char **files, int nfiles)
1496: {
1497: int i, disorders = 0;
1498: FILE *fp;
1499:
1500: for (i = 0; i < nfiles; ++i)
1501: {
1502: fp = xfopen (files[i], "r");
1503: if (!checkfp (fp))
1504: {
1505: fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1506: ++disorders;
1507: }
1508: }
1509: return disorders;
1510: }
1511:
1512: /* Merge NFILES FILES onto OFP. */
1513:
1514: static void
1515: merge (char **files, int nfiles, FILE *ofp)
1516: {
1517: int i, j, t;
1518: char *temp;
1519: FILE *fps[NMERGE], *tfp;
1520:
1521: while (nfiles > NMERGE)
1522: {
1523: t = 0;
1524: for (i = 0; i < nfiles / NMERGE; ++i)
1525: {
1526: for (j = 0; j < NMERGE; ++j)
1527: fps[j] = xfopen (files[i * NMERGE + j], "r");
1528: tfp = xtmpfopen (temp = tempname ());
1529: mergefps (fps, NMERGE, tfp);
1530: xfclose (tfp);
1531: for (j = 0; j < NMERGE; ++j)
1532: zaptemp (files[i * NMERGE + j]);
1533: files[t++] = temp;
1534: }
1535: for (j = 0; j < nfiles % NMERGE; ++j)
1536: fps[j] = xfopen (files[i * NMERGE + j], "r");
1537: tfp = xtmpfopen (temp = tempname ());
1538: mergefps (fps, nfiles % NMERGE, tfp);
1539: xfclose (tfp);
1540: for (j = 0; j < nfiles % NMERGE; ++j)
1541: zaptemp (files[i * NMERGE + j]);
1542: files[t++] = temp;
1543: nfiles = t;
1544: }
1545:
1546: for (i = 0; i < nfiles; ++i)
1547: fps[i] = xfopen (files[i], "r");
1548: mergefps (fps, i, ofp);
1549: for (i = 0; i < nfiles; ++i)
1550: zaptemp (files[i]);
1551: }
1552:
1553: /* Sort NFILES FILES onto OFP. */
1554:
1555: static void
1556: sort (char **files, int nfiles, FILE *ofp)
1557: {
1558: struct buffer buf;
1559: struct lines lines;
1560: struct line *tmp;
1561: int i, ntmp;
1562: FILE *fp, *tfp;
1563: struct tempnode *node;
1564: int n_temp_files = 0;
1565: char **tempfiles;
1566:
1567: initbuf (&buf, sortalloc);
1568: initlines (&lines, sortalloc / linelength + 1,
1569: LINEALLOC / sizeof (struct line));
1570: ntmp = lines.alloc;
1571: tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1572:
1573: while (nfiles--)
1574: {
1575: fp = xfopen (*files++, "r");
1576: while (fillbuf (&buf, fp))
1577: {
1578: findlines (&buf, &lines);
1579: if (lines.used > ntmp)
1580: {
1581: while (lines.used > ntmp)
1582: ntmp *= 2;
1583: tmp = (struct line *)
1584: xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1585: }
1586: sortlines (lines.lines, lines.used, tmp);
1587: if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1588: tfp = ofp;
1589: else
1590: {
1591: ++n_temp_files;
1592: tfp = xtmpfopen (tempname ());
1593: }
1594: for (i = 0; i < lines.used; ++i)
1595: if (!unique || i == 0
1596: || compare (&lines.lines[i], &lines.lines[i - 1]))
1597: {
1598: write_bytes (lines.lines[i].text, lines.lines[i].length, tfp);
1599: putc (eolchar, tfp);
1600: }
1601: if (tfp != ofp)
1602: xfclose (tfp);
1603: }
1604: xfclose (fp);
1605: }
1606:
1607: free (buf.buf);
1608: free ((char *) lines.lines);
1609: free ((char *) tmp);
1610:
1611: if (n_temp_files)
1612: {
1613: tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1614: i = n_temp_files;
1615: for (node = temphead.next; i > 0; node = node->next)
1616: tempfiles[--i] = node->name;
1617: merge (tempfiles, n_temp_files, ofp);
1618: free ((char *) tempfiles);
1619: }
1620: }
1621:
1622: /* Insert key KEY at the end of the list (`keyhead'). */
1623:
1624: static void
1625: insertkey (struct keyfield *key)
1626: {
1627: struct keyfield *k = &keyhead;
1628:
1629: while (k->next)
1630: k = k->next;
1631: k->next = key;
1632: key->next = NULL;
1633: }
1634:
1635: static void
1636: badfieldspec (const char *s)
1637: {
1638: error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s);
1639: }
1640:
1641: /* Handle interrupts and hangups. */
1642:
1643: static void
1644: sighandler (int sig)
1645: {
1646: #ifdef SA_INTERRUPT
1647: struct sigaction sigact;
1648:
1649: sigact.sa_handler = SIG_DFL;
1650: sigemptyset (&sigact.sa_mask);
1651: sigact.sa_flags = 0;
1652: sigaction (sig, &sigact, NULL);
1653: #else /* !SA_INTERRUPT */
1654: signal (sig, SIG_DFL);
1655: #endif /* SA_INTERRUPT */
1656: cleanup ();
1657: kill (getpid (), sig);
1658: }
1659:
1660: /* Set the ordering options for KEY specified in S.
1661: Return the address of the first character in S that
1662: is not a valid ordering option.
1663: BLANKTYPE is the kind of blanks that 'b' should skip. */
1664:
1665: static char *
1666: set_ordering (register const char *s, struct keyfield *key,
1667: enum blanktype blanktype)
1668: {
1669: while (*s)
1670: {
1671: switch (*s)
1672: {
1673: case 'b':
1674: if (blanktype == bl_start || blanktype == bl_both)
1675: key->skipsblanks = 1;
1676: if (blanktype == bl_end || blanktype == bl_both)
1677: key->skipeblanks = 1;
1678: break;
1679: case 'd':
1680: key->ignore = nondictionary;
1681: break;
1682: case 'f':
1683: key->translate = fold_toupper;
1684: break;
1685: case 'g':
1686: key->general_numeric = 1;
1687: break;
1688: case 'i':
1689: key->ignore = nonprinting;
1690: break;
1691: case 'M':
1692: key->month = 1;
1693: break;
1694: case 'n':
1695: key->numeric = 1;
1696: break;
1697: case 'r':
1698: key->reverse = 1;
1699: break;
1700: default:
1701: return (char *) s;
1702: }
1703: ++s;
1704: }
1705: return (char *) s;
1706: }
1707:
1708: static void
1709: key_init (struct keyfield *key)
1710: {
1711: memset (key, 0, sizeof (*key));
1712: key->eword = -1;
1713: }
1714:
1715: int
1716: main (int argc, char **argv)
1717: {
1718: struct keyfield *key = NULL, gkey;
1719: char *s;
1720: int i, t, t2;
1721: int checkonly = 0, mergeonly = 0, nfiles = 0;
1722: char *minus = "-", *outfile = minus, **files, *tmp;
1723: FILE *ofp;
1724: #ifdef SA_INTERRUPT
1725: struct sigaction oldact, newact;
1726: #endif /* SA_INTERRUPT */
1727:
1728: program_name = argv[0];
1729: setlocale (LC_ALL, "");
1730: bindtextdomain (PACKAGE, LOCALEDIR);
1731: textdomain (PACKAGE);
1732:
1733: parse_long_options (argc, argv, "sort", GNU_PACKAGE, VERSION, usage);
1734:
1735: have_read_stdin = 0;
1736: inittables ();
1737:
1738: temp_file_prefix = getenv ("TMPDIR");
1739: if (temp_file_prefix == NULL)
1740: temp_file_prefix = DEFAULT_TMPDIR;
1741:
1742: #ifdef SA_INTERRUPT
1743: newact.sa_handler = sighandler;
1744: sigemptyset (&newact.sa_mask);
1745: newact.sa_flags = 0;
1746:
1747: sigaction (SIGINT, NULL, &oldact);
1748: if (oldact.sa_handler != SIG_IGN)
1749: sigaction (SIGINT, &newact, NULL);
1750: sigaction (SIGHUP, NULL, &oldact);
1751: if (oldact.sa_handler != SIG_IGN)
1752: sigaction (SIGHUP, &newact, NULL);
1753: sigaction (SIGPIPE, NULL, &oldact);
1754: if (oldact.sa_handler != SIG_IGN)
1755: sigaction (SIGPIPE, &newact, NULL);
1756: sigaction (SIGTERM, NULL, &oldact);
1757: if (oldact.sa_handler != SIG_IGN)
1758: sigaction (SIGTERM, &newact, NULL);
1759: #else /* !SA_INTERRUPT */
1760: if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1761: signal (SIGINT, sighandler);
1762: if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1763: signal (SIGHUP, sighandler);
1764: if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1765: signal (SIGPIPE, sighandler);
1766: if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1767: signal (SIGTERM, sighandler);
1768: #endif /* !SA_INTERRUPT */
1769:
1770: gkey.sword = gkey.eword = -1;
1771: gkey.ignore = NULL;
1772: gkey.translate = NULL;
1773: gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1774: gkey.skipsblanks = gkey.skipeblanks = 0;
1775:
1776: files = (char **) xmalloc (sizeof (char *) * argc);
1777:
1778: for (i = 1; i < argc; ++i)
1779: {
1780: if (argv[i][0] == '+')
1781: {
1782: if (key)
1783: insertkey (key);
1784: key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1785: key_init (key);
1786: s = argv[i] + 1;
1787: if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))))
1788: badfieldspec (argv[i]);
1789: for (t = 0; ISDIGIT (*s); ++s)
1790: t = 10 * t + *s - '0';
1791: t2 = 0;
1792: if (*s == '.')
1793: for (++s; ISDIGIT (*s); ++s)
1794: t2 = 10 * t2 + *s - '0';
1795: if (t2 || t)
1796: {
1797: key->sword = t;
1798: key->schar = t2;
1799: }
1800: else
1801: key->sword = -1;
1802: s = set_ordering (s, key, bl_start);
1803: if (*s)
1804: badfieldspec (argv[i]);
1805: }
1806: else if (argv[i][0] == '-' && argv[i][1])
1807: {
1808: s = argv[i] + 1;
1809: if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))
1810: {
1811: if (!key)
1812: {
1813: /* Provoke with `sort -9'. */
1814: error (0, 0, _("when using the old-style +POS and -POS \
1815: key specifiers,\nthe +POS specifier must come first"));
1816: usage (SORT_FAILURE);
1817: }
1818: for (t = 0; ISDIGIT (*s); ++s)
1819: t = t * 10 + *s - '0';
1820: t2 = 0;
1821: if (*s == '.')
1822: for (++s; ISDIGIT (*s); ++s)
1823: t2 = t2 * 10 + *s - '0';
1824: key->eword = t;
1825: key->echar = t2;
1826: s = set_ordering (s, key, bl_end);
1827: if (*s)
1828: badfieldspec (argv[i]);
1829: insertkey (key);
1830: key = NULL;
1831: }
1832: else
1833: while (*s)
1834: {
1835: s = set_ordering (s, &gkey, bl_both);
1836: switch (*s)
1837: {
1838: case '\0':
1839: break;
1840: case 'c':
1841: checkonly = 1;
1842: break;
1843: case 'k':
1844: if (s[1])
1845: ++s;
1846: else
1847: {
1848: if (i == argc - 1)
1849: error (SORT_FAILURE, 0,
1850: _("option `-k' requires an argument"));
1851: else
1852: s = argv[++i];
1853: }
1854: if (key)
1855: insertkey (key);
1856: key = (struct keyfield *)
1857: xmalloc (sizeof (struct keyfield));
1858: key_init (key);
1859: /* Get POS1. */
1860: if (!ISDIGIT (*s))
1861: badfieldspec (argv[i]);
1862: for (t = 0; ISDIGIT (*s); ++s)
1863: t = 10 * t + *s - '0';
1864: if (t == 0)
1865: {
1866: /* Provoke with `sort -k0' */
1867: error (0, 0, _("the starting field number argument \
1868: to the `-k' option must be positive"));
1869: badfieldspec (argv[i]);
1870: }
1871: --t;
1872: t2 = 0;
1873: if (*s == '.')
1874: {
1875: if (!ISDIGIT (s[1]))
1876: {
1877: /* Provoke with `sort -k1.' */
1878: error (0, 0, _("starting field spec has `.' but \
1879: lacks following character offset"));
1880: badfieldspec (argv[i]);
1881: }
1882: for (++s; ISDIGIT (*s); ++s)
1883: t2 = 10 * t2 + *s - '0';
1884: if (t2 == 0)
1885: {
1886: /* Provoke with `sort -k1.0' */
1887: error (0, 0, _("starting field character offset \
1888: argument to the `-k' option\nmust be positive"));
1889: badfieldspec (argv[i]);
1890: }
1891: --t2;
1892: }
1893: if (t2 || t)
1894: {
1895: key->sword = t;
1896: key->schar = t2;
1897: }
1898: else
1899: key->sword = -1;
1900: s = set_ordering (s, key, bl_start);
1901: if (*s == 0)
1902: {
1903: key->eword = -1;
1904: key->echar = 0;
1905: }
1906: else if (*s != ',')
1907: badfieldspec (argv[i]);
1908: else if (*s == ',')
1909: {
1910: /* Skip over comma. */
1911: ++s;
1912: if (*s == 0)
1913: {
1914: /* Provoke with `sort -k1,' */
1915: error (0, 0, _("field specification has `,' but \
1916: lacks following field spec"));
1917: badfieldspec (argv[i]);
1918: }
1919: /* Get POS2. */
1920: for (t = 0; ISDIGIT (*s); ++s)
1921: t = t * 10 + *s - '0';
1922: if (t == 0)
1923: {
1924: /* Provoke with `sort -k1,0' */
1925: error (0, 0, _("ending field number argument \
1926: to the `-k' option must be positive"));
1927: badfieldspec (argv[i]);
1928: }
1929: --t;
1930: t2 = 0;
1931: if (*s == '.')
1932: {
1933: if (!ISDIGIT (s[1]))
1934: {
1935: /* Provoke with `sort -k1,1.' */
1936: error (0, 0, _("ending field spec has `.' \
1937: but lacks following character offset"));
1938: badfieldspec (argv[i]);
1939: }
1940: for (++s; ISDIGIT (*s); ++s)
1941: t2 = t2 * 10 + *s - '0';
1942: }
1943: else
1944: {
1945: /* `-k 2,3' is equivalent to `+1 -3'. */
1946: ++t;
1947: }
1948: key->eword = t;
1949: key->echar = t2;
1950: s = set_ordering (s, key, bl_end);
1951: if (*s)
1952: badfieldspec (argv[i]);
1953: }
1954: insertkey (key);
1955: key = NULL;
1956: goto outer;
1957: case 'm':
1958: mergeonly = 1;
1959: break;
1960: case 'o':
1961: if (s[1])
1962: outfile = s + 1;
1963: else
1964: {
1965: if (i == argc - 1)
1966: error (SORT_FAILURE, 0,
1967: _("option `-o' requires an argument"));
1968: else
1969: outfile = argv[++i];
1970: }
1971: goto outer;
1972: case 's':
1973: stable = 1;
1974: break;
1975: case 't':
1976: if (s[1])
1977: tab = *++s;
1978: else if (i < argc - 1)
1979: {
1980: tab = *argv[++i];
1981: goto outer;
1982: }
1983: else
1984: error (SORT_FAILURE, 0,
1985: _("option `-t' requires an argument"));
1986: break;
1987: case 'T':
1988: if (s[1])
1989: temp_file_prefix = ++s;
1990: else
1991: {
1992: if (i < argc - 1)
1993: temp_file_prefix = argv[++i];
1994: else
1995: error (SORT_FAILURE, 0,
1996: _("option `-T' requires an argument"));
1997: }
1998: goto outer;
1999: /* break; */
2000: case 'u':
2001: unique = 1;
2002: break;
2003: case 'z':
2004: eolchar = 0;
2005: break;
2006: case 'y':
2007: /* Accept and ignore e.g. -y0 for compatibility with
2008: Solaris 2. */
2009: goto outer;
2010: default:
2011: fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2012: argv[0], *s);
2013: usage (SORT_FAILURE);
2014: }
2015: if (*s)
2016: ++s;
2017: }
2018: }
2019: else /* Not an option. */
2020: {
2021: files[nfiles++] = argv[i];
2022: }
2023: outer:;
2024: }
2025:
2026: if (key)
2027: insertkey (key);
2028:
2029: /* Inheritance of global options to individual keys. */
2030: for (key = keyhead.next; key; key = key->next)
2031: if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2032: && !key->skipeblanks && !key->month && !key->numeric
2033: && !key->general_numeric)
2034: {
2035: key->ignore = gkey.ignore;
2036: key->translate = gkey.translate;
2037: key->skipsblanks = gkey.skipsblanks;
2038: key->skipeblanks = gkey.skipeblanks;
2039: key->month = gkey.month;
2040: key->numeric = gkey.numeric;
2041: key->general_numeric = gkey.general_numeric;
2042: key->reverse = gkey.reverse;
2043: }
2044:
2045: if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2046: || gkey.skipeblanks || gkey.month || gkey.numeric
2047: || gkey.general_numeric))
2048: insertkey (&gkey);
2049: reverse = gkey.reverse;
2050:
2051: if (nfiles == 0)
2052: {
2053: nfiles = 1;
2054: files = −
2055: }
2056:
2057: if (checkonly)
2058: {
2059: /* POSIX requires that sort return 1 IFF invoked with -c and the
2060: input is not properly sorted. */
2061: exit (check (files, nfiles) == 0 ? 0 : 1);
2062: }
2063:
2064: if (strcmp (outfile, "-"))
2065: {
2066: struct stat outstat;
2067: if (stat (outfile, &outstat) == 0)
2068: {
2069: /* The following code prevents a race condition when
2070: people use the brain dead shell programming idiom:
2071: cat file | sort -o file
2072: This feature is provided for historical compatibility,
2073: but we strongly discourage ever relying on this in
2074: new shell programs. */
2075:
2076: /* Temporarily copy each input file that might be another name
2077: for the output file. When in doubt (e.g. a pipe), copy. */
2078: for (i = 0; i < nfiles; ++i)
2079: {
2080: char buf[8192];
2081: FILE *fp;
2082: int cc;
2083:
2084: if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2085: {
2086: struct stat instat;
2087: if ((strcmp (files[i], "-")
2088: ? stat (files[i], &instat)
2089: : fstat (STDIN_FILENO, &instat)) != 0)
2090: {
2091: error (0, errno, "%s", files[i]);
2092: cleanup ();
2093: exit (SORT_FAILURE);
2094: }
2095: if (S_ISREG (instat.st_mode)
2096: && (instat.st_ino != outstat.st_ino
2097: || instat.st_dev != outstat.st_dev))
2098: {
2099: /* We know the files are distinct. */
2100: continue;
2101: }
2102: }
2103:
2104: fp = xfopen (files[i], "r");
2105: tmp = tempname ();
2106: ofp = xtmpfopen (tmp);
2107: while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2108: write_bytes (buf, cc, ofp);
2109: if (ferror (fp))
2110: {
2111: error (0, errno, "%s", files[i]);
2112: cleanup ();
2113: exit (SORT_FAILURE);
2114: }
2115: xfclose (ofp);
2116: xfclose (fp);
2117: files[i] = tmp;
2118: }
2119: }
2120: ofp = xfopen (outfile, "w");
2121: }
2122: else
2123: ofp = stdout;
2124:
2125: if (mergeonly)
2126: merge (files, nfiles, ofp);
2127: else
2128: sort (files, nfiles, ofp);
2129: cleanup ();
2130:
2131: /* If we wait for the implicit flush on exit, and the parent process
2132: has closed stdout (e.g., exec >&- in a shell), then the output file
2133: winds up empty. I don't understand why. This is under SunOS,
2134: Solaris, Ultrix, and Irix. This premature fflush makes the output
2135: reappear. --karl@cs.umb.edu */
2136: if (fflush (ofp) < 0)
2137: error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2138:
2139: if (have_read_stdin && fclose (stdin) == EOF)
2140: error (SORT_FAILURE, errno, outfile);
2141: if (ferror (stdout) || fclose (stdout) == EOF)
2142: error (SORT_FAILURE, errno, _("%s: write error"), outfile);
2143:
2144: exit (EXIT_SUCCESS);
2145: }