Full Source

Full Source: 1: /* sort - sort lines of text (with all kinds of options). 2: Copyright (C) 88, 91, 92, 93, 94, 95, 1996 Free Software Foundation 3: 4: This program is free software; you can redistribute it and/or modify 5: it under the terms of the GNU General Public License as published by 6: the Free Software Foundation; either version 2, or (at your option) 7: any later version. 8: 9: This program is distributed in the hope that it will be useful, 10: but WITHOUT ANY WARRANTY; without even the implied warranty of 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12: GNU General Public License for more details. 13: 14: You should have received a copy of the GNU General Public License 15: along with this program; if not, write to the Free Software 16: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17: 18: Written December 1988 by Mike Haertel. 19: The author may be reached (Email) at the address mike@gnu.ai.mit.edu, 20: or (US mail) as Mike Haertel c/o Free Software Foundation. */ 21: 22: #include <config.h> 23: 24: /* Get isblank from GNU libc. */ 25: #define _GNU_SOURCE 26: 27: #include <sys/types.h> 28: #include <signal.h> 29: #include <stdio.h> 30: #ifndef ENABLE_ASSERTIONS 31: # define NDEBUG 1 32: #endif 33: #include <assert.h> 34: #include "system.h" 35: #include "long-options.h" 36: #include "error.h" 37: #include "xstrtod.h" 38: 39: #ifdef HAVE_LIMITS_H 40: #include <limits.h> 41: #else 42: #ifndef UCHAR_MAX 43: #define UCHAR_MAX 255 44: #endif 45: #endif 46: #ifndef STDC_HEADERS 47: char *malloc (); 48: char *realloc (); 49: void free (); 50: #endif 51: 52: /* Undefine, to avoid warning about redefinition on some systems. */ 53: #undef min 54: #define min(a, b) ((a) < (b) ? (a) : (b)) 55: 56: #define UCHAR_LIM (UCHAR_MAX + 1) 57: #define UCHAR(c) ((unsigned char) (c)) 58: 59: #ifndef DEFAULT_TMPDIR 60: #define DEFAULT_TMPDIR "/tmp" 61: #endif 62: 63: /* Use this as exit status in case of error, not EXIT_FAILURE. This 64: is necessary because EXIT_FAILURE is usually 1 and POSIX requires 65: that sort exit with status 1 IFF invoked with -c and the input is 66: not properly sorted. Any other irregular exit must exit with a 67: status code greater than 1. */ 68: #define SORT_FAILURE 2 69: 70: /* The kind of blanks for '-b' to skip in various options. */ 71: enum blanktype { bl_start, bl_end, bl_both }; 72: 73: /* The character marking end of line. Default to \n. */ 74: int eolchar = '\n'; 75: 76: /* Lines are held in core as counted strings. */ 77: struct line 78: { 79: char *text; /* Text of the line. */ 80: int length; /* Length not including final newline. */ 81: char *keybeg; /* Start of first key. */ 82: char *keylim; /* Limit of first key. */ 83: }; 84: 85: /* Arrays of lines. */ 86: struct lines 87: { 88: struct line *lines; /* Dynamically allocated array of lines. */ 89: int used; /* Number of slots used. */ 90: int alloc; /* Number of slots allocated. */ 91: int limit; /* Max number of slots to allocate. */ 92: }; 93: 94: /* Input buffers. */ 95: struct buffer 96: { 97: char *buf; /* Dynamically allocated buffer. */ 98: int used; /* Number of bytes used. */ 99: int alloc; /* Number of bytes allocated. */ 100: int left; /* Number of bytes left after line parsing. */ 101: }; 102: 103: struct keyfield 104: { 105: int sword; /* Zero-origin 'word' to start at. */ 106: int schar; /* Additional characters to skip. */ 107: int skipsblanks; /* Skip leading white space at start. */ 108: int eword; /* Zero-origin first word after field. */ 109: int echar; /* Additional characters in field. */ 110: int skipeblanks; /* Skip trailing white space at finish. */ 111: int *ignore; /* Boolean array of characters to ignore. */ 112: char *translate; /* Translation applied to characters. */ 113: int numeric; /* Flag for numeric comparison. Handle 114: strings of digits with optional decimal 115: point, but no exponential notation. */ 116: int general_numeric; /* Flag for general, numeric comparison. 117: Handle numbers in exponential notation. */ 118: int month; /* Flag for comparison by month name. */ 119: int reverse; /* Reverse the sense of comparison. */ 120: struct keyfield *next; /* Next keyfield to try. */ 121: }; 122: 123: struct month 124: { 125: char *name; 126: int val; 127: }; 128: 129: /* The name this program was run with. */ 130: char *program_name; 131: 132: /* Table of white space. */ 133: static int blanks[UCHAR_LIM]; 134: 135: /* Table of non-printing characters. */ 136: static int nonprinting[UCHAR_LIM]; 137: 138: /* Table of non-dictionary characters (not letters, digits, or blanks). */ 139: static int nondictionary[UCHAR_LIM]; 140: 141: /* Translation table folding lower case to upper. */ 142: static char fold_toupper[UCHAR_LIM]; 143: 144: /* Table mapping 3-letter month names to integers. 145: Alphabetic order allows binary search. */ 146: static struct month const monthtab[] = 147: { 148: {"APR", 4}, 149: {"AUG", 8}, 150: {"DEC", 12}, 151: {"FEB", 2}, 152: {"JAN", 1}, 153: {"JUL", 7}, 154: {"JUN", 6}, 155: {"MAR", 3}, 156: {"MAY", 5}, 157: {"NOV", 11}, 158: {"OCT", 10}, 159: {"SEP", 9} 160: }; 161: 162: /* During the merge phase, the number of files to merge at once. */ 163: #define NMERGE 16 164: 165: /* Initial buffer size for in core sorting. Will not grow unless a 166: line longer than this is seen. */ 167: static int sortalloc = 512 * 1024; 168: 169: /* Initial buffer size for in core merge buffers. Bear in mind that 170: up to NMERGE * mergealloc bytes may be allocated for merge buffers. */ 171: static int mergealloc = 16 * 1024; 172: 173: /* Guess of average line length. */ 174: static int linelength = 30; 175: 176: /* Maximum number of elements for the array(s) of struct line's, in bytes. */ 177: #define LINEALLOC (256 * 1024) 178: 179: /* Prefix for temporary file names. */ 180: static char *temp_file_prefix; 181: 182: /* Flag to reverse the order of all comparisons. */ 183: static int reverse; 184: 185: /* Flag for stable sort. This turns off the last ditch bytewise 186: comparison of lines, and instead leaves lines in the same order 187: they were read if all keys compare equal. */ 188: static int stable; 189: 190: /* Tab character separating fields. If NUL, then fields are separated 191: by the empty string between a non-whitespace character and a whitespace 192: character. */ 193: static char tab; 194: 195: /* Flag to remove consecutive duplicate lines from the output. 196: Only the last of a sequence of equal lines will be output. */ 197: static int unique; 198: 199: /* Nonzero if any of the input files are the standard input. */ 200: static int have_read_stdin; 201: 202: /* Lists of key field comparisons to be tried. */ 203: static struct keyfield keyhead; 204: 205: static void 206: usage (int status) 207: { 208: if (status != 0) 209: fprintf (stderr, _("Try `%s --help' for more information.\n"), 210: program_name); 211: else 212: { 213: printf (_("\ 214: Usage: %s [OPTION]... [FILE]...\n\ 215: "), 216: program_name); 217: printf (_("\ 218: Write sorted concatenation of all FILE(s) to standard output.\n\ 219: \n\ 220: +POS1 [-POS2] start a key at POS1, end it before POS2\n\ 221: -b ignore leading blanks in sort fields or keys\n\ 222: -c check if given files already sorted, do not sort\n\ 223: -d consider only [a-zA-Z0-9 ] characters in keys\n\ 224: -f fold lower case to upper case characters in keys\n\ 225: -g compare according to general numerical value, imply -b\n\ 226: -i consider only [\\040-\\0176] characters in keys\n\ 227: -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\ 228: -m merge already sorted files, do not sort\n\ 229: -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\ 230: -n compare according to string numerical value, imply -b\n\ 231: -o FILE write result on FILE instead of standard output\n\ 232: -r reverse the result of comparisons\n\ 233: -s stabilize sort by disabling last resort comparison\n\ 234: -t SEP use SEParator instead of non- to whitespace transition\n\ 235: -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\ 236: -u with -c, check for strict ordering;\n\ 237: with -m, only output the first of an equal sequence\n\ 238: -z end lines with 0 byte, not newline, for find -print0\n\ 239: --help display this help and exit\n\ 240: --version output version information and exit\n\ 241: \n\ 242: POS is F[.C][OPTS], where F is the field number and C the character\n\ 243: position in the field, both counted from zero. OPTS is made up of one\n\ 244: or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\ 245: for that key. If no key given, use the entire line as key. With no\n\ 246: FILE, or when FILE is -, read standard input.\n\ 247: ") 248: , DEFAULT_TMPDIR); 249: puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu")); 250: } 251: /* Don't use EXIT_FAILURE here in case it is defined to be 1. 252: POSIX requires that sort return 1 IFF invoked with -c and 253: the input is not properly sorted. */ 254: assert (status == 0 || status == SORT_FAILURE); 255: exit (status); 256: } 257: 258: /* The list of temporary files. */ 259: static struct tempnode 260: { 261: char *name; 262: struct tempnode *next; 263: } temphead; 264: 265: /* Clean up any remaining temporary files. */ 266: 267: static void 268: cleanup (void) 269: { 270: struct tempnode *node; 271: 272: for (node = temphead.next; node; node = node->next) 273: unlink (node->name); 274: } 275: 276: /* Allocate N bytes of memory dynamically, with error checking. */ 277: 278: static char * 279: xmalloc (unsigned int n) 280: { 281: char *p; 282: 283: p = malloc (n); 284: if (p == 0) 285: { 286: error (0, 0, _("virtual memory exhausted")); 287: cleanup (); 288: exit (SORT_FAILURE); 289: } 290: return p; 291: } 292: 293: /* Change the size of an allocated block of memory P to N bytes, 294: with error checking. 295: If P is NULL, run xmalloc. 296: If N is 0, run free and return NULL. */ 297: 298: static char * 299: xrealloc (char *p, unsigned int n) 300: { 301: if (p == 0) 302: return xmalloc (n); 303: if (n == 0) 304: { 305: free (p); 306: return 0; 307: } 308: p = realloc (p, n); 309: if (p == 0) 310: { 311: error (0, 0, _("virtual memory exhausted")); 312: cleanup (); 313: exit (SORT_FAILURE); 314: } 315: return p; 316: } 317: 318: static FILE * 319: xtmpfopen (const char *file) 320: { 321: FILE *fp; 322: int fd; 323: 324: fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600); 325: if (fd < 0 || (fp = fdopen (fd, "w")) == NULL) 326: { 327: error (0, errno, "%s", file); 328: cleanup (); 329: exit (SORT_FAILURE); 330: } 331: 332: return fp; 333: } 334: 335: static FILE * 336: xfopen (const char *file, const char *how) 337: { 338: FILE *fp; 339: 340: if (strcmp (file, "-") == 0) 341: { 342: fp = stdin; 343: } 344: else 345: { 346: if ((fp = fopen (file, how)) == NULL) 347: { 348: error (0, errno, "%s", file); 349: cleanup (); 350: exit (SORT_FAILURE); 351: } 352: } 353: 354: if (fp == stdin) 355: have_read_stdin = 1; 356: return fp; 357: } 358: 359: static void 360: xfclose (FILE *fp) 361: { 362: if (fp == stdin) 363: { 364: /* Allow reading stdin from tty more than once. */ 365: if (feof (fp)) 366: clearerr (fp); 367: } 368: else if (fp == stdout) 369: { 370: if (fflush (fp) != 0) 371: { 372: error (0, errno, _("flushing file")); 373: cleanup (); 374: exit (SORT_FAILURE); 375: } 376: } 377: else 378: { 379: if (fclose (fp) != 0) 380: { 381: error (0, errno, _("error closing file")); 382: cleanup (); 383: exit (SORT_FAILURE); 384: } 385: } 386: } 387: 388: static void 389: write_bytes (const char *buf, size_t n_bytes, FILE *fp) 390: { 391: if (fwrite (buf, 1, n_bytes, fp) != n_bytes) 392: { 393: error (0, errno, _("write error")); 394: cleanup (); 395: exit (SORT_FAILURE); 396: } 397: } 398: 399: /* Return a name for a temporary file. */ 400: 401: static char * 402: tempname (void) 403: { 404: static unsigned int seq; 405: int len = strlen (temp_file_prefix); 406: char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1); 407: struct tempnode *node; 408: 409: node = (struct tempnode *) xmalloc (sizeof (struct tempnode)); 410: sprintf (name, 411: "%s%ssort%5.5d%5.5d", 412: temp_file_prefix, 413: (len && temp_file_prefix[len - 1] != '/') ? "/" : "", 414: (unsigned int) getpid () & 0xffff, seq); 415: 416: /* Make sure that SEQ's value fits in 5 digits. */ 417: ++seq; 418: if (seq >= 100000) 419: seq = 0; 420: 421: node->name = name; 422: node->next = temphead.next; 423: temphead.next = node; 424: return name; 425: } 426: 427: /* Search through the list of temporary files for NAME; 428: remove it if it is found on the list. */ 429: 430: static void 431: zaptemp (char *name) 432: { 433: struct tempnode *node, *temp; 434: 435: for (node = &temphead; node->next; node = node->next) 436: if (!strcmp (name, node->next->name)) 437: break; 438: if (node->next) 439: { 440: temp = node->next; 441: unlink (temp->name); 442: free (temp->name); 443: node->next = temp->next; 444: free ((char *) temp); 445: } 446: } 447: 448: /* Initialize the character class tables. */ 449: 450: static void 451: inittables (void) 452: { 453: int i; 454: 455: for (i = 0; i < UCHAR_LIM; ++i) 456: { 457: if (ISBLANK (i)) 458: blanks[i] = 1; 459: if (!ISPRINT (i)) 460: nonprinting[i] = 1; 461: if (!ISALNUM (i) && !ISBLANK (i)) 462: nondictionary[i] = 1; 463: if (ISLOWER (i)) 464: fold_toupper[i] = toupper (i); 465: else 466: fold_toupper[i] = i; 467: } 468: } 469: 470: /* Initialize BUF, allocating ALLOC bytes initially. */ 471: 472: static void 473: initbuf (struct buffer *buf, int alloc) 474: { 475: buf->alloc = alloc; 476: buf->buf = xmalloc (buf->alloc); 477: buf->used = buf->left = 0; 478: } 479: 480: /* Fill BUF reading from FP, moving buf->left bytes from the end 481: of buf->buf to the beginning first. If EOF is reached and the 482: file wasn't terminated by a newline, supply one. Return a count 483: of bytes buffered. */ 484: 485: static int 486: fillbuf (struct buffer *buf, FILE *fp) 487: { 488: int cc; 489: 490: memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); 491: buf->used = buf->left; 492: 493: while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used))) 494: { 495: if (buf->used == buf->alloc) 496: { 497: buf->alloc *= 2; 498: buf->buf = xrealloc (buf->buf, buf->alloc); 499: } 500: cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp); 501: if (ferror (fp)) 502: { 503: error (0, errno, _("read error")); 504: cleanup (); 505: exit (SORT_FAILURE); 506: } 507: buf->used += cc; 508: } 509: 510: if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar) 511: { 512: if (buf->used == buf->alloc) 513: { 514: buf->alloc *= 2; 515: buf->buf = xrealloc (buf->buf, buf->alloc); 516: } 517: buf->buf[buf->used++] = eolchar; 518: } 519: 520: return buf->used; 521: } 522: 523: /* Initialize LINES, allocating space for ALLOC lines initially. 524: LIMIT is the maximum possible number of lines to allocate space 525: for, ever. */ 526: 527: static void 528: initlines (struct lines *lines, int alloc, int limit) 529: { 530: lines->alloc = alloc; 531: lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line)); 532: lines->used = 0; 533: lines->limit = limit; 534: } 535: 536: /* Return a pointer to the first character of the field specified 537: by KEY in LINE. */ 538: 539: static char * 540: begfield (const struct line *line, const struct keyfield *key) 541: { 542: register char *ptr = line->text, *lim = ptr + line->length; 543: register int sword = key->sword, schar = key->schar; 544: 545: if (tab) 546: while (ptr < lim && sword--) 547: { 548: while (ptr < lim && *ptr != tab) 549: ++ptr; 550: if (ptr < lim) 551: ++ptr; 552: } 553: else 554: while (ptr < lim && sword--) 555: { 556: while (ptr < lim && blanks[UCHAR (*ptr)]) 557: ++ptr; 558: while (ptr < lim && !blanks[UCHAR (*ptr)]) 559: ++ptr; 560: } 561: 562: if (key->skipsblanks) 563: while (ptr < lim && blanks[UCHAR (*ptr)]) 564: ++ptr; 565: 566: if (ptr + schar <= lim) 567: ptr += schar; 568: else 569: ptr = lim; 570: 571: return ptr; 572: } 573: 574: /* Return the limit of (a pointer to the first character after) the field 575: in LINE specified by KEY. */ 576: 577: static char * 578: limfield (const struct line *line, const struct keyfield *key) 579: { 580: register char *ptr = line->text, *lim = ptr + line->length; 581: register int eword = key->eword, echar = key->echar; 582: 583: /* Note: from the POSIX spec: 584: The leading field separator itself is included in 585: a field when -t is not used. FIXME: move this comment up... */ 586: 587: /* Move PTR past EWORD fields or to one past the last byte on LINE, 588: whichever comes first. If there are more than EWORD fields, leave 589: PTR pointing at the beginning of the field having zero-based index, 590: EWORD. If a delimiter character was specified (via -t), then that 591: `beginning' is the first character following the delimiting TAB. 592: Otherwise, leave PTR pointing at the first `blank' character after 593: the preceding field. */ 594: if (tab) 595: while (ptr < lim && eword--) 596: { 597: while (ptr < lim && *ptr != tab) 598: ++ptr; 599: if (ptr < lim && (eword || echar > 0)) 600: ++ptr; 601: } 602: else 603: while (ptr < lim && eword--) 604: { 605: while (ptr < lim && blanks[UCHAR (*ptr)]) 606: ++ptr; 607: while (ptr < lim && !blanks[UCHAR (*ptr)]) 608: ++ptr; 609: } 610: 611: #ifdef POSIX_UNSPECIFIED 612: /* The following block of code makes GNU sort incompatible with 613: standard Unix sort, so it's ifdef'd out for now. 614: The POSIX spec isn't clear on how to interpret this. 615: FIXME: request clarification. 616: 617: From: kwzh@gnu.ai.mit.edu (Karl Heuer) 618: Date: Thu, 30 May 96 12:20:41 -0400 619: 620: [...]I believe I've found another bug in `sort'. 621: 622: $ cat /tmp/sort.in 623: a b c 2 d 624: pq rs 1 t 625: $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in 626: a b c 2 d 627: pq rs 1 t 628: $ /bin/sort +0.6 -0.7 </tmp/sort.in 629: pq rs 1 t 630: a b c 2 d 631: 632: Unix sort produced the answer I expected: sort on the single character 633: in column 6. GNU sort produced different results, because it disagrees 634: on the interpretation of the key-end spec "-M.N". Unix sort reads this 635: as "skip M fields, then N characters"; but GNU sort wants it to mean 636: "skip M fields, then either N characters or the rest of the current 637: field, whichever comes first". This extra clause applies only to 638: key-ends, not key-starts. 639: */ 640: 641: /* Make LIM point to the end of (one byte past) the current field. */ 642: if (tab) 643: { 644: char *newlim; 645: newlim = memchr (ptr, tab, lim - ptr); 646: if (newlim) 647: lim = newlim; 648: } 649: else 650: { 651: char *newlim; 652: newlim = ptr; 653: while (newlim < lim && blanks[UCHAR (*newlim)]) 654: ++newlim; 655: while (newlim < lim && !blanks[UCHAR (*newlim)]) 656: ++newlim; 657: lim = newlim; 658: } 659: #endif 660: 661: /* If we're skipping leading blanks, don't start counting characters 662: until after skipping past any leading blanks. */ 663: if (key->skipsblanks) 664: while (ptr < lim && blanks[UCHAR (*ptr)]) 665: ++ptr; 666: 667: /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 668: if (ptr + echar <= lim) 669: ptr += echar; 670: else 671: ptr = lim; 672: 673: return ptr; 674: } 675: 676: /* FIXME */ 677: 678: void 679: trim_trailing_blanks (const char *a_start, char **a_end) 680: { 681: while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))]) 682: --(*a_end); 683: } 684: 685: /* Find the lines in BUF, storing pointers and lengths in LINES. 686: Also replace newlines in BUF with NULs. */ 687: 688: static void 689: findlines (struct buffer *buf, struct lines *lines) 690: { 691: register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr; 692: struct keyfield *key = keyhead.next; 693: 694: lines->used = 0; 695: 696: while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg)) 697: && lines->used < lines->limit) 698: { 699: /* There are various places in the code that rely on a NUL 700: being at the end of in-core lines; NULs inside the lines 701: will not cause trouble, though. */ 702: *ptr = '\0'; 703: 704: if (lines->used == lines->alloc) 705: { 706: lines->alloc *= 2; 707: lines->lines = (struct line *) 708: xrealloc ((char *) lines->lines, 709: lines->alloc * sizeof (struct line)); 710: } 711: 712: lines->lines[lines->used].text = beg; 713: lines->lines[lines->used].length = ptr - beg; 714: 715: /* Precompute the position of the first key for efficiency. */ 716: if (key) 717: { 718: if (key->eword >= 0) 719: lines->lines[lines->used].keylim = 720: limfield (&lines->lines[lines->used], key); 721: else 722: lines->lines[lines->used].keylim = ptr; 723: 724: if (key->sword >= 0) 725: lines->lines[lines->used].keybeg = 726: begfield (&lines->lines[lines->used], key); 727: else 728: { 729: if (key->skipsblanks) 730: while (blanks[UCHAR (*beg)]) 731: ++beg; 732: lines->lines[lines->used].keybeg = beg; 733: } 734: if (key->skipeblanks) 735: { 736: trim_trailing_blanks (lines->lines[lines->used].keybeg, 737: &lines->lines[lines->used].keylim); 738: } 739: } 740: else 741: { 742: lines->lines[lines->used].keybeg = 0; 743: lines->lines[lines->used].keylim = 0; 744: } 745: 746: ++lines->used; 747: beg = ptr + 1; 748: } 749: 750: buf->left = lim - beg; 751: } 752: 753: /* Compare strings A and B containing decimal fractions < 1. Each string 754: should begin with a decimal point followed immediately by the digits 755: of the fraction. Strings not of this form are considered to be zero. */ 756: 757: static int 758: fraccompare (register const char *a, register const char *b) 759: { 760: register int tmpa = *a; 761: register int tmpb = *b; 762: 763: if (tmpa == '.' && tmpb == '.') 764: { 765: do 766: tmpa = *++a, tmpb = *++b; 767: while (tmpa == tmpb && ISDIGIT (tmpa)); 768: if (ISDIGIT (tmpa) && ISDIGIT (tmpb)) 769: return tmpa - tmpb; 770: if (ISDIGIT (tmpa)) 771: { 772: while (tmpa == '0') 773: tmpa = *++a; 774: if (ISDIGIT (tmpa)) 775: return 1; 776: return 0; 777: } 778: if (ISDIGIT (tmpb)) 779: { 780: while (tmpb == '0') 781: tmpb = *++b; 782: if (ISDIGIT (tmpb)) 783: return -1; 784: return 0; 785: } 786: return 0; 787: } 788: else if (tmpa == '.') 789: { 790: do 791: tmpa = *++a; 792: while (tmpa == '0'); 793: if (ISDIGIT (tmpa)) 794: return 1; 795: return 0; 796: } 797: else if (tmpb == '.') 798: { 799: do 800: tmpb = *++b; 801: while (tmpb == '0'); 802: if (ISDIGIT (tmpb)) 803: return -1; 804: return 0; 805: } 806: return 0; 807: } 808: 809: /* Compare strings A and B as numbers without explicitly converting them to 810: machine numbers. Comparatively slow for short strings, but asymptotically 811: hideously fast. */ 812: 813: static int 814: numcompare (register const char *a, register const char *b) 815: { 816: register int tmpa, tmpb, loga, logb, tmp; 817: 818: tmpa = UCHAR (*a); 819: tmpb = UCHAR (*b); 820: 821: while (blanks[tmpa]) 822: tmpa = UCHAR (*++a); 823: while (blanks[tmpb]) 824: tmpb = UCHAR (*++b); 825: 826: if (tmpa == '-') 827: { 828: do 829: tmpa = *++a; 830: while (tmpa == '0'); 831: if (tmpb != '-') 832: { 833: if (tmpa == '.') 834: do 835: tmpa = *++a; 836: while (tmpa == '0'); 837: if (ISDIGIT (tmpa)) 838: return -1; 839: while (tmpb == '0') 840: tmpb = *++b; 841: if (tmpb == '.') 842: do 843: tmpb = *++b; 844: while (tmpb == '0'); 845: if (ISDIGIT (tmpb)) 846: return -1; 847: return 0; 848: } 849: do 850: tmpb = *++b; 851: while (tmpb == '0'); 852: 853: while (tmpa == tmpb && ISDIGIT (tmpa)) 854: tmpa = *++a, tmpb = *++b; 855: 856: if ((tmpa == '.' && !ISDIGIT (tmpb)) 857: || (tmpb == '.' && !ISDIGIT (tmpa))) 858: return -fraccompare (a, b); 859: 860: if (ISDIGIT (tmpa)) 861: for (loga = 1; ISDIGIT (*++a); ++loga) 862: ; 863: else 864: loga = 0; 865: 866: if (ISDIGIT (tmpb)) 867: for (logb = 1; ISDIGIT (*++b); ++logb) 868: ; 869: else 870: logb = 0; 871: 872: if ((tmp = logb - loga) != 0) 873: return tmp; 874: 875: if (!loga) 876: return 0; 877: 878: return tmpb - tmpa; 879: } 880: else if (tmpb == '-') 881: { 882: do 883: tmpb = *++b; 884: while (tmpb == '0'); 885: if (tmpb == '.') 886: do 887: tmpb = *++b; 888: while (tmpb == '0'); 889: if (ISDIGIT (tmpb)) 890: return 1; 891: while (tmpa == '0') 892: tmpa = *++a; 893: if (tmpa == '.') 894: do 895: tmpa = *++a; 896: while (tmpa == '0'); 897: if (ISDIGIT (tmpa)) 898: return 1; 899: return 0; 900: } 901: else 902: { 903: while (tmpa == '0') 904: tmpa = *++a; 905: while (tmpb == '0') 906: tmpb = *++b; 907: 908: while (tmpa == tmpb && ISDIGIT (tmpa)) 909: tmpa = *++a, tmpb = *++b; 910: 911: if ((tmpa == '.' && !ISDIGIT (tmpb)) 912: || (tmpb == '.' && !ISDIGIT (tmpa))) 913: return fraccompare (a, b); 914: 915: if (ISDIGIT (tmpa)) 916: for (loga = 1; ISDIGIT (*++a); ++loga) 917: ; 918: else 919: loga = 0; 920: 921: if (ISDIGIT (tmpb)) 922: for (logb = 1; ISDIGIT (*++b); ++logb) 923: ; 924: else 925: logb = 0; 926: 927: if ((tmp = loga - logb) != 0) 928: return tmp; 929: 930: if (!loga) 931: return 0; 932: 933: return tmpa - tmpb; 934: } 935: } 936: 937: static int 938: general_numcompare (const char *sa, const char *sb) 939: { 940: double a, b; 941: /* FIXME: add option to warn about failed conversions. */ 942: /* FIXME: maybe add option to try expensive FP conversion 943: only if A and B can't be compared more cheaply/accurately. */ 944: if (xstrtod (sa, NULL, &a)) 945: { 946: a = 0; 947: } 948: if (xstrtod (sb, NULL, &b)) 949: { 950: b = 0; 951: } 952: return a == b ? 0 : a < b ? -1 : 1; 953: } 954: 955: /* Return an integer <= 12 associated with month name S with length LEN, 956: 0 if the name in S is not recognized. */ 957: 958: static int 959: getmonth (const char *s, int len) 960: { 961: char month[4]; 962: register int i, lo = 0, hi = 12; 963: 964: while (len > 0 && blanks[UCHAR(*s)]) 965: ++s, --len; 966: 967: if (len < 3) 968: return 0; 969: 970: for (i = 0; i < 3; ++i) 971: month[i] = fold_toupper[UCHAR (s[i])]; 972: month[3] = '\0'; 973: 974: while (hi - lo > 1) 975: if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0) 976: hi = (lo + hi) / 2; 977: else 978: lo = (lo + hi) / 2; 979: if (!strcmp (month, monthtab[lo].name)) 980: return monthtab[lo].val; 981: return 0; 982: } 983: 984: /* Compare two lines A and B trying every key in sequence until there 985: are no more keys or a difference is found. */ 986: 987: static int 988: keycompare (const struct line *a, const struct line *b) 989: { 990: register char *texta, *textb, *lima, *limb; 991: register unsigned char *translate; 992: register int *ignore; 993: struct keyfield *key; 994: int diff = 0, iter = 0, lena, lenb; 995: 996: for (key = keyhead.next; key; key = key->next, ++iter) 997: { 998: ignore = key->ignore; 999: translate = (unsigned char *) key->translate; 1000: 1001: /* Find the beginning and limit of each field. */ 1002: if (iter || a->keybeg == NULL || b->keybeg == NULL) 1003: { 1004: if (key->eword >= 0) 1005: lima = limfield (a, key), limb = limfield (b, key); 1006: else 1007: lima = a->text + a->length, limb = b->text + b->length; 1008: 1009: if (key->sword >= 0) 1010: texta = begfield (a, key), textb = begfield (b, key); 1011: else 1012: { 1013: texta = a->text, textb = b->text; 1014: if (key->skipsblanks) 1015: { 1016: while (texta < lima && blanks[UCHAR (*texta)]) 1017: ++texta; 1018: while (textb < limb && blanks[UCHAR (*textb)]) 1019: ++textb; 1020: } 1021: } 1022: } 1023: else 1024: { 1025: /* For the first iteration only, the key positions have 1026: been precomputed for us. */ 1027: texta = a->keybeg, lima = a->keylim; 1028: textb = b->keybeg, limb = b->keylim; 1029: } 1030: 1031: /* Find the lengths. */ 1032: lena = lima - texta, lenb = limb - textb; 1033: if (lena < 0) 1034: lena = 0; 1035: if (lenb < 0) 1036: lenb = 0; 1037: 1038: if (key->skipeblanks) 1039: { 1040: char *a_end = texta + lena; 1041: char *b_end = textb + lenb; 1042: trim_trailing_blanks (texta, &a_end); 1043: trim_trailing_blanks (textb, &b_end); 1044: lena = a_end - texta; 1045: lenb = b_end - textb; 1046: } 1047: 1048: /* Actually compare the fields. */ 1049: if (key->numeric) 1050: { 1051: if (*lima || *limb) 1052: { 1053: char savea = *lima, saveb = *limb; 1054: 1055: *lima = *limb = '\0'; 1056: diff = numcompare (texta, textb); 1057: *lima = savea, *limb = saveb; 1058: } 1059: else 1060: diff = numcompare (texta, textb); 1061: 1062: if (diff) 1063: return key->reverse ? -diff : diff; 1064: continue; 1065: } 1066: else if (key->general_numeric) 1067: { 1068: if (*lima || *limb) 1069: { 1070: char savea = *lima, saveb = *limb; 1071: 1072: *lima = *limb = '\0'; 1073: diff = general_numcompare (texta, textb); 1074: *lima = savea, *limb = saveb; 1075: } 1076: else 1077: diff = general_numcompare (texta, textb); 1078: 1079: if (diff) 1080: return key->reverse ? -diff : diff; 1081: continue; 1082: } 1083: else if (key->month) 1084: { 1085: diff = getmonth (texta, lena) - getmonth (textb, lenb); 1086: if (diff) 1087: return key->reverse ? -diff : diff; 1088: continue; 1089: } 1090: else if (ignore && translate) 1091: 1092: #define CMP_WITH_IGNORE(A, B) \ 1093: do \ 1094: { \ 1095: while (texta < lima && textb < limb) \ 1096: { \ 1097: while (texta < lima && ignore[UCHAR (*texta)]) \ 1098: ++texta; \ 1099: while (textb < limb && ignore[UCHAR (*textb)]) \ 1100: ++textb; \ 1101: if (texta < lima && textb < limb) \ 1102: { \ 1103: if ((A) != (B)) \ 1104: { \ 1105: diff = (A) - (B); \ 1106: break; \ 1107: } \ 1108: ++texta; \ 1109: ++textb; \ 1110: } \ 1111: \ 1112: if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \ 1113: diff = -1; \ 1114: else if (texta < lima && textb == limb \ 1115: && !ignore[UCHAR (*texta)]) \ 1116: diff = 1; \ 1117: } \ 1118: \ 1119: if (diff == 0) \ 1120: { \ 1121: while (texta < lima && ignore[UCHAR (*texta)]) \ 1122: ++texta; \ 1123: while (textb < limb && ignore[UCHAR (*textb)]) \ 1124: ++textb; \ 1125: \ 1126: if (texta == lima && textb < limb) \ 1127: diff = -1; \ 1128: else if (texta < lima && textb == limb) \ 1129: diff = 1; \ 1130: } \ 1131: /* Relative lengths are meaningless if characters were ignored. \ 1132: Handling this case here avoids what might be an invalid length \ 1133: comparison below. */ \ 1134: if (diff == 0 && texta == lima && textb == limb) \ 1135: return 0; \ 1136: } \ 1137: while (0) 1138: 1139: CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]); 1140: else if (ignore) 1141: CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb)); 1142: else if (translate) 1143: while (texta < lima && textb < limb) 1144: { 1145: if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)]) 1146: { 1147: diff = (translate[UCHAR (*--texta)] 1148: - translate[UCHAR (*--textb)]); 1149: break; 1150: } 1151: } 1152: else 1153: diff = memcmp (texta, textb, min (lena, lenb)); 1154: 1155: if (diff) 1156: return key->reverse ? -diff : diff; 1157: if ((diff = lena - lenb) != 0) 1158: return key->reverse ? -diff : diff; 1159: } 1160: 1161: return 0; 1162: } 1163: 1164: /* Compare two lines A and B, returning negative, zero, or positive 1165: depending on whether A compares less than, equal to, or greater than B. */ 1166: 1167: static int 1168: compare (register const struct line *a, register const struct line *b) 1169: { 1170: int diff, tmpa, tmpb, mini; 1171: 1172: /* First try to compare on the specified keys (if any). 1173: The only two cases with no key at all are unadorned sort, 1174: and unadorned sort -r. */ 1175: if (keyhead.next) 1176: { 1177: diff = keycompare (a, b); 1178: if (diff != 0) 1179: return diff; 1180: if (unique || stable) 1181: return 0; 1182: } 1183: 1184: /* If the keys all compare equal (or no keys were specified) 1185: fall through to the default byte-by-byte comparison. */ 1186: tmpa = a->length, tmpb = b->length; 1187: mini = min (tmpa, tmpb); 1188: if (mini == 0) 1189: diff = tmpa - tmpb; 1190: else 1191: { 1192: char *ap = a->text, *bp = b->text; 1193: 1194: diff = UCHAR (*ap) - UCHAR (*bp); 1195: if (diff == 0) 1196: { 1197: diff = memcmp (ap, bp, mini); 1198: if (diff == 0) 1199: diff = tmpa - tmpb; 1200: } 1201: } 1202: 1203: return reverse ? -diff : diff; 1204: } 1205: 1206: /* Check that the lines read from the given FP come in order. Return 1207: 1 if they do and 0 if there is a disorder. 1208: FIXME: return number of first out-of-order line if not sorted. */ 1209: 1210: static int 1211: checkfp (FILE *fp) 1212: { 1213: struct buffer buf; /* Input buffer. */ 1214: struct lines lines; /* Lines scanned from the buffer. */ 1215: struct line temp; /* Copy of previous line. */ 1216: int cc; /* Character count. */ 1217: int alloc, sorted = 1; 1218: 1219: initbuf (&buf, mergealloc); 1220: initlines (&lines, mergealloc / linelength + 1, 1221: LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); 1222: alloc = linelength; 1223: temp.text = xmalloc (alloc); 1224: 1225: cc = fillbuf (&buf, fp); 1226: if (cc == 0) 1227: goto finish; 1228: 1229: findlines (&buf, &lines); 1230: 1231: while (1) 1232: { 1233: struct line *prev_line; /* Pointer to previous line. */ 1234: int cmp; /* Result of calling compare. */ 1235: int i; 1236: 1237: /* Compare each line in the buffer with its successor. */ 1238: for (i = 0; i < lines.used - 1; ++i) 1239: { 1240: cmp = compare (&lines.lines[i], &lines.lines[i + 1]); 1241: if ((unique && cmp >= 0) || (cmp > 0)) 1242: { 1243: sorted = 0; 1244: goto finish; 1245: } 1246: } 1247: 1248: /* Save the last line of the buffer and refill the buffer. */ 1249: prev_line = lines.lines + (lines.used - 1); 1250: if (prev_line->length + 1 > alloc) 1251: { 1252: do 1253: { 1254: alloc *= 2; 1255: } 1256: while (alloc < prev_line->length + 1); 1257: temp.text = xrealloc (temp.text, alloc); 1258: } 1259: assert (prev_line->length + 1 <= alloc); 1260: memcpy (temp.text, prev_line->text, prev_line->length + 1); 1261: temp.length = prev_line->length; 1262: temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text); 1263: temp.keylim = temp.text + (prev_line->keylim - prev_line->text); 1264: 1265: cc = fillbuf (&buf, fp); 1266: if (cc == 0) 1267: break; 1268: 1269: findlines (&buf, &lines); 1270: /* Make sure the line saved from the old buffer contents is 1271: less than or equal to the first line of the new buffer. */ 1272: cmp = compare (&temp, &lines.lines[0]); 1273: if ((unique && cmp >= 0) || (cmp > 0)) 1274: { 1275: sorted = 0; 1276: break; 1277: } 1278: } 1279: 1280: finish: 1281: xfclose (fp); 1282: free (buf.buf); 1283: free ((char *) lines.lines); 1284: free (temp.text); 1285: return sorted; 1286: } 1287: 1288: /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE. 1289: Close FPS before returning. */ 1290: 1291: static void 1292: mergefps (FILE **fps, register int nfps, FILE *ofp) 1293: { 1294: struct buffer buffer[NMERGE]; /* Input buffers for each file. */ 1295: struct lines lines[NMERGE]; /* Line tables for each buffer. */ 1296: struct line saved; /* Saved line for unique check. */ 1297: int savedflag = 0; /* True if there is a saved line. */ 1298: int savealloc; /* Size allocated for the saved line. */ 1299: int cur[NMERGE]; /* Current line in each line table. */ 1300: int ord[NMERGE]; /* Table representing a permutation of fps, 1301: such that lines[ord[0]].lines[cur[ord[0]]] 1302: is the smallest line and will be next 1303: output. */ 1304: register int i, j, t; 1305: 1306: #ifdef lint /* Suppress `used before initialized' warning. */ 1307: savealloc = 0; 1308: #endif 1309: 1310: /* Allocate space for a saved line if necessary. */ 1311: if (unique) 1312: { 1313: savealloc = linelength; 1314: saved.text = xmalloc (savealloc); 1315: } 1316: 1317: /* Read initial lines from each input file. */ 1318: for (i = 0; i < nfps; ++i) 1319: { 1320: initbuf (&buffer[i], mergealloc); 1321: /* If a file is empty, eliminate it from future consideration. */ 1322: while (i < nfps && !fillbuf (&buffer[i], fps[i])) 1323: { 1324: xfclose (fps[i]); 1325: --nfps; 1326: for (j = i; j < nfps; ++j) 1327: fps[j] = fps[j + 1]; 1328: } 1329: if (i == nfps) 1330: free (buffer[i].buf); 1331: else 1332: { 1333: initlines (&lines[i], mergealloc / linelength + 1, 1334: LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); 1335: findlines (&buffer[i], &lines[i]); 1336: cur[i] = 0; 1337: } 1338: } 1339: 1340: /* Set up the ord table according to comparisons among input lines. 1341: Since this only reorders two items if one is strictly greater than 1342: the other, it is stable. */ 1343: for (i = 0; i < nfps; ++i) 1344: ord[i] = i; 1345: for (i = 1; i < nfps; ++i) 1346: if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]], 1347: &lines[ord[i]].lines[cur[ord[i]]]) > 0) 1348: t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; 1349: 1350: /* Repeatedly output the smallest line until no input remains. */ 1351: while (nfps) 1352: { 1353: /* If uniqified output is turned on, output only the first of 1354: an identical series of lines. */ 1355: if (unique) 1356: { 1357: if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]])) 1358: { 1359: write_bytes (saved.text, saved.length, ofp); 1360: putc (eolchar, ofp); 1361: savedflag = 0; 1362: } 1363: if (!savedflag) 1364: { 1365: if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) 1366: { 1367: while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) 1368: savealloc *= 2; 1369: saved.text = xrealloc (saved.text, savealloc); 1370: } 1371: saved.length = lines[ord[0]].lines[cur[ord[0]]].length; 1372: memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text, 1373: saved.length + 1); 1374: if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL) 1375: { 1376: saved.keybeg = saved.text + 1377: (lines[ord[0]].lines[cur[ord[0]]].keybeg 1378: - lines[ord[0]].lines[cur[ord[0]]].text); 1379: } 1380: if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL) 1381: { 1382: saved.keylim = saved.text + 1383: (lines[ord[0]].lines[cur[ord[0]]].keylim 1384: - lines[ord[0]].lines[cur[ord[0]]].text); 1385: } 1386: savedflag = 1; 1387: } 1388: } 1389: else 1390: { 1391: write_bytes (lines[ord[0]].lines[cur[ord[0]]].text, 1392: lines[ord[0]].lines[cur[ord[0]]].length, ofp); 1393: putc (eolchar, ofp); 1394: } 1395: 1396: /* Check if we need to read more lines into core. */ 1397: if (++cur[ord[0]] == lines[ord[0]].used) 1398: if (fillbuf (&buffer[ord[0]], fps[ord[0]])) 1399: { 1400: findlines (&buffer[ord[0]], &lines[ord[0]]); 1401: cur[ord[0]] = 0; 1402: } 1403: else 1404: { 1405: /* We reached EOF on fps[ord[0]]. */ 1406: for (i = 1; i < nfps; ++i) 1407: if (ord[i] > ord[0]) 1408: --ord[i]; 1409: --nfps; 1410: xfclose (fps[ord[0]]); 1411: free (buffer[ord[0]].buf); 1412: free ((char *) lines[ord[0]].lines); 1413: for (i = ord[0]; i < nfps; ++i) 1414: { 1415: fps[i] = fps[i + 1]; 1416: buffer[i] = buffer[i + 1]; 1417: lines[i] = lines[i + 1]; 1418: cur[i] = cur[i + 1]; 1419: } 1420: for (i = 0; i < nfps; ++i) 1421: ord[i] = ord[i + 1]; 1422: continue; 1423: } 1424: 1425: /* The new line just read in may be larger than other lines 1426: already in core; push it back in the queue until we encounter 1427: a line larger than it. */ 1428: for (i = 1; i < nfps; ++i) 1429: { 1430: t = compare (&lines[ord[0]].lines[cur[ord[0]]], 1431: &lines[ord[i]].lines[cur[ord[i]]]); 1432: if (!t) 1433: t = ord[0] - ord[i]; 1434: if (t < 0) 1435: break; 1436: } 1437: t = ord[0]; 1438: for (j = 1; j < i; ++j) 1439: ord[j - 1] = ord[j]; 1440: ord[i - 1] = t; 1441: } 1442: 1443: if (unique && savedflag) 1444: { 1445: write_bytes (saved.text, saved.length, ofp); 1446: putc (eolchar, ofp); 1447: free (saved.text); 1448: } 1449: } 1450: 1451: /* Sort the array LINES with NLINES members, using TEMP for temporary space. */ 1452: 1453: static void 1454: sortlines (struct line *lines, int nlines, struct line *temp) 1455: { 1456: register struct line *lo, *hi, *t; 1457: register int nlo, nhi; 1458: 1459: if (nlines == 2) 1460: { 1461: if (compare (&lines[0], &lines[1]) > 0) 1462: *temp = lines[0], lines[0] = lines[1], lines[1] = *temp; 1463: return; 1464: } 1465: 1466: nlo = nlines / 2; 1467: lo = lines; 1468: nhi = nlines - nlo; 1469: hi = lines + nlo; 1470: 1471: if (nlo > 1) 1472: sortlines (lo, nlo, temp); 1473: 1474: if (nhi > 1) 1475: sortlines (hi, nhi, temp); 1476: 1477: t = temp; 1478: 1479: while (nlo && nhi) 1480: if (compare (lo, hi) <= 0) 1481: *t++ = *lo++, --nlo; 1482: else 1483: *t++ = *hi++, --nhi; 1484: while (nlo--) 1485: *t++ = *lo++; 1486: 1487: for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo) 1488: *lo++ = *t++; 1489: } 1490: 1491: /* Check that each of the NFILES FILES is ordered. 1492: Return a count of disordered files. */ 1493: 1494: static int 1495: check (char **files, int nfiles) 1496: { 1497: int i, disorders = 0; 1498: FILE *fp; 1499: 1500: for (i = 0; i < nfiles; ++i) 1501: { 1502: fp = xfopen (files[i], "r"); 1503: if (!checkfp (fp)) 1504: { 1505: fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]); 1506: ++disorders; 1507: } 1508: } 1509: return disorders; 1510: } 1511: 1512: /* Merge NFILES FILES onto OFP. */ 1513: 1514: static void 1515: merge (char **files, int nfiles, FILE *ofp) 1516: { 1517: int i, j, t; 1518: char *temp; 1519: FILE *fps[NMERGE], *tfp; 1520: 1521: while (nfiles > NMERGE) 1522: { 1523: t = 0; 1524: for (i = 0; i < nfiles / NMERGE; ++i) 1525: { 1526: for (j = 0; j < NMERGE; ++j) 1527: fps[j] = xfopen (files[i * NMERGE + j], "r"); 1528: tfp = xtmpfopen (temp = tempname ()); 1529: mergefps (fps, NMERGE, tfp); 1530: xfclose (tfp); 1531: for (j = 0; j < NMERGE; ++j) 1532: zaptemp (files[i * NMERGE + j]); 1533: files[t++] = temp; 1534: } 1535: for (j = 0; j < nfiles % NMERGE; ++j) 1536: fps[j] = xfopen (files[i * NMERGE + j], "r"); 1537: tfp = xtmpfopen (temp = tempname ()); 1538: mergefps (fps, nfiles % NMERGE, tfp); 1539: xfclose (tfp); 1540: for (j = 0; j < nfiles % NMERGE; ++j) 1541: zaptemp (files[i * NMERGE + j]); 1542: files[t++] = temp; 1543: nfiles = t; 1544: } 1545: 1546: for (i = 0; i < nfiles; ++i) 1547: fps[i] = xfopen (files[i], "r"); 1548: mergefps (fps, i, ofp); 1549: for (i = 0; i < nfiles; ++i) 1550: zaptemp (files[i]); 1551: } 1552: 1553: /* Sort NFILES FILES onto OFP. */ 1554: 1555: static void 1556: sort (char **files, int nfiles, FILE *ofp) 1557: { 1558: struct buffer buf; 1559: struct lines lines; 1560: struct line *tmp; 1561: int i, ntmp; 1562: FILE *fp, *tfp; 1563: struct tempnode *node; 1564: int n_temp_files = 0; 1565: char **tempfiles; 1566: 1567: initbuf (&buf, sortalloc); 1568: initlines (&lines, sortalloc / linelength + 1, 1569: LINEALLOC / sizeof (struct line)); 1570: ntmp = lines.alloc; 1571: tmp = (struct line *) xmalloc (ntmp * sizeof (struct line)); 1572: 1573: while (nfiles--) 1574: { 1575: fp = xfopen (*files++, "r"); 1576: while (fillbuf (&buf, fp)) 1577: { 1578: findlines (&buf, &lines); 1579: if (lines.used > ntmp) 1580: { 1581: while (lines.used > ntmp) 1582: ntmp *= 2; 1583: tmp = (struct line *) 1584: xrealloc ((char *) tmp, ntmp * sizeof (struct line)); 1585: } 1586: sortlines (lines.lines, lines.used, tmp); 1587: if (feof (fp) && !nfiles && !n_temp_files && !buf.left) 1588: tfp = ofp; 1589: else 1590: { 1591: ++n_temp_files; 1592: tfp = xtmpfopen (tempname ()); 1593: } 1594: for (i = 0; i < lines.used; ++i) 1595: if (!unique || i == 0 1596: || compare (&lines.lines[i], &lines.lines[i - 1])) 1597: { 1598: write_bytes (lines.lines[i].text, lines.lines[i].length, tfp); 1599: putc (eolchar, tfp); 1600: } 1601: if (tfp != ofp) 1602: xfclose (tfp); 1603: } 1604: xfclose (fp); 1605: } 1606: 1607: free (buf.buf); 1608: free ((char *) lines.lines); 1609: free ((char *) tmp); 1610: 1611: if (n_temp_files) 1612: { 1613: tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *)); 1614: i = n_temp_files; 1615: for (node = temphead.next; i > 0; node = node->next) 1616: tempfiles[--i] = node->name; 1617: merge (tempfiles, n_temp_files, ofp); 1618: free ((char *) tempfiles); 1619: } 1620: } 1621: 1622: /* Insert key KEY at the end of the list (`keyhead'). */ 1623: 1624: static void 1625: insertkey (struct keyfield *key) 1626: { 1627: struct keyfield *k = &keyhead; 1628: 1629: while (k->next) 1630: k = k->next; 1631: k->next = key; 1632: key->next = NULL; 1633: } 1634: 1635: static void 1636: badfieldspec (const char *s) 1637: { 1638: error (SORT_FAILURE, 0, _("invalid field specification `%s'"), s); 1639: } 1640: 1641: /* Handle interrupts and hangups. */ 1642: 1643: static void 1644: sighandler (int sig) 1645: { 1646: #ifdef SA_INTERRUPT 1647: struct sigaction sigact; 1648: 1649: sigact.sa_handler = SIG_DFL; 1650: sigemptyset (&sigact.sa_mask); 1651: sigact.sa_flags = 0; 1652: sigaction (sig, &sigact, NULL); 1653: #else /* !SA_INTERRUPT */ 1654: signal (sig, SIG_DFL); 1655: #endif /* SA_INTERRUPT */ 1656: cleanup (); 1657: kill (getpid (), sig); 1658: } 1659: 1660: /* Set the ordering options for KEY specified in S. 1661: Return the address of the first character in S that 1662: is not a valid ordering option. 1663: BLANKTYPE is the kind of blanks that 'b' should skip. */ 1664: 1665: static char * 1666: set_ordering (register const char *s, struct keyfield *key, 1667: enum blanktype blanktype) 1668: { 1669: while (*s) 1670: { 1671: switch (*s) 1672: { 1673: case 'b': 1674: if (blanktype == bl_start || blanktype == bl_both) 1675: key->skipsblanks = 1; 1676: if (blanktype == bl_end || blanktype == bl_both) 1677: key->skipeblanks = 1; 1678: break; 1679: case 'd': 1680: key->ignore = nondictionary; 1681: break; 1682: case 'f': 1683: key->translate = fold_toupper; 1684: break; 1685: case 'g': 1686: key->general_numeric = 1; 1687: break; 1688: case 'i': 1689: key->ignore = nonprinting; 1690: break; 1691: case 'M': 1692: key->month = 1; 1693: break; 1694: case 'n': 1695: key->numeric = 1; 1696: break; 1697: case 'r': 1698: key->reverse = 1; 1699: break; 1700: default: 1701: return (char *) s; 1702: } 1703: ++s; 1704: } 1705: return (char *) s; 1706: } 1707: 1708: static void 1709: key_init (struct keyfield *key) 1710: { 1711: memset (key, 0, sizeof (*key)); 1712: key->eword = -1; 1713: } 1714: 1715: int 1716: main (int argc, char **argv) 1717: { 1718: struct keyfield *key = NULL, gkey; 1719: char *s; 1720: int i, t, t2; 1721: int checkonly = 0, mergeonly = 0, nfiles = 0; 1722: char *minus = "-", *outfile = minus, **files, *tmp; 1723: FILE *ofp; 1724: #ifdef SA_INTERRUPT 1725: struct sigaction oldact, newact; 1726: #endif /* SA_INTERRUPT */ 1727: 1728: program_name = argv[0]; 1729: setlocale (LC_ALL, ""); 1730: bindtextdomain (PACKAGE, LOCALEDIR); 1731: textdomain (PACKAGE); 1732: 1733: parse_long_options (argc, argv, "sort", GNU_PACKAGE, VERSION, usage); 1734: 1735: have_read_stdin = 0; 1736: inittables (); 1737: 1738: temp_file_prefix = getenv ("TMPDIR"); 1739: if (temp_file_prefix == NULL) 1740: temp_file_prefix = DEFAULT_TMPDIR; 1741: 1742: #ifdef SA_INTERRUPT 1743: newact.sa_handler = sighandler; 1744: sigemptyset (&newact.sa_mask); 1745: newact.sa_flags = 0; 1746: 1747: sigaction (SIGINT, NULL, &oldact); 1748: if (oldact.sa_handler != SIG_IGN) 1749: sigaction (SIGINT, &newact, NULL); 1750: sigaction (SIGHUP, NULL, &oldact); 1751: if (oldact.sa_handler != SIG_IGN) 1752: sigaction (SIGHUP, &newact, NULL); 1753: sigaction (SIGPIPE, NULL, &oldact); 1754: if (oldact.sa_handler != SIG_IGN) 1755: sigaction (SIGPIPE, &newact, NULL); 1756: sigaction (SIGTERM, NULL, &oldact); 1757: if (oldact.sa_handler != SIG_IGN) 1758: sigaction (SIGTERM, &newact, NULL); 1759: #else /* !SA_INTERRUPT */ 1760: if (signal (SIGINT, SIG_IGN) != SIG_IGN) 1761: signal (SIGINT, sighandler); 1762: if (signal (SIGHUP, SIG_IGN) != SIG_IGN) 1763: signal (SIGHUP, sighandler); 1764: if (signal (SIGPIPE, SIG_IGN) != SIG_IGN) 1765: signal (SIGPIPE, sighandler); 1766: if (signal (SIGTERM, SIG_IGN) != SIG_IGN) 1767: signal (SIGTERM, sighandler); 1768: #endif /* !SA_INTERRUPT */ 1769: 1770: gkey.sword = gkey.eword = -1; 1771: gkey.ignore = NULL; 1772: gkey.translate = NULL; 1773: gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0; 1774: gkey.skipsblanks = gkey.skipeblanks = 0; 1775: 1776: files = (char **) xmalloc (sizeof (char *) * argc); 1777: 1778: for (i = 1; i < argc; ++i) 1779: { 1780: if (argv[i][0] == '+') 1781: { 1782: if (key) 1783: insertkey (key); 1784: key = (struct keyfield *) xmalloc (sizeof (struct keyfield)); 1785: key_init (key); 1786: s = argv[i] + 1; 1787: if (! (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1])))) 1788: badfieldspec (argv[i]); 1789: for (t = 0; ISDIGIT (*s); ++s) 1790: t = 10 * t + *s - '0'; 1791: t2 = 0; 1792: if (*s == '.') 1793: for (++s; ISDIGIT (*s); ++s) 1794: t2 = 10 * t2 + *s - '0'; 1795: if (t2 || t) 1796: { 1797: key->sword = t; 1798: key->schar = t2; 1799: } 1800: else 1801: key->sword = -1; 1802: s = set_ordering (s, key, bl_start); 1803: if (*s) 1804: badfieldspec (argv[i]); 1805: } 1806: else if (argv[i][0] == '-' && argv[i][1]) 1807: { 1808: s = argv[i] + 1; 1809: if (ISDIGIT (*s) || (*s == '.' && ISDIGIT (s[1]))) 1810: { 1811: if (!key) 1812: { 1813: /* Provoke with `sort -9'. */ 1814: error (0, 0, _("when using the old-style +POS and -POS \ 1815: key specifiers,\nthe +POS specifier must come first")); 1816: usage (SORT_FAILURE); 1817: } 1818: for (t = 0; ISDIGIT (*s); ++s) 1819: t = t * 10 + *s - '0'; 1820: t2 = 0; 1821: if (*s == '.') 1822: for (++s; ISDIGIT (*s); ++s) 1823: t2 = t2 * 10 + *s - '0'; 1824: key->eword = t; 1825: key->echar = t2; 1826: s = set_ordering (s, key, bl_end); 1827: if (*s) 1828: badfieldspec (argv[i]); 1829: insertkey (key); 1830: key = NULL; 1831: } 1832: else 1833: while (*s) 1834: { 1835: s = set_ordering (s, &gkey, bl_both); 1836: switch (*s) 1837: { 1838: case '\0': 1839: break; 1840: case 'c': 1841: checkonly = 1; 1842: break; 1843: case 'k': 1844: if (s[1]) 1845: ++s; 1846: else 1847: { 1848: if (i == argc - 1) 1849: error (SORT_FAILURE, 0, 1850: _("option `-k' requires an argument")); 1851: else 1852: s = argv[++i]; 1853: } 1854: if (key) 1855: insertkey (key); 1856: key = (struct keyfield *) 1857: xmalloc (sizeof (struct keyfield)); 1858: key_init (key); 1859: /* Get POS1. */ 1860: if (!ISDIGIT (*s)) 1861: badfieldspec (argv[i]); 1862: for (t = 0; ISDIGIT (*s); ++s) 1863: t = 10 * t + *s - '0'; 1864: if (t == 0) 1865: { 1866: /* Provoke with `sort -k0' */ 1867: error (0, 0, _("the starting field number argument \ 1868: to the `-k' option must be positive")); 1869: badfieldspec (argv[i]); 1870: } 1871: --t; 1872: t2 = 0; 1873: if (*s == '.') 1874: { 1875: if (!ISDIGIT (s[1])) 1876: { 1877: /* Provoke with `sort -k1.' */ 1878: error (0, 0, _("starting field spec has `.' but \ 1879: lacks following character offset")); 1880: badfieldspec (argv[i]); 1881: } 1882: for (++s; ISDIGIT (*s); ++s) 1883: t2 = 10 * t2 + *s - '0'; 1884: if (t2 == 0) 1885: { 1886: /* Provoke with `sort -k1.0' */ 1887: error (0, 0, _("starting field character offset \ 1888: argument to the `-k' option\nmust be positive")); 1889: badfieldspec (argv[i]); 1890: } 1891: --t2; 1892: } 1893: if (t2 || t) 1894: { 1895: key->sword = t; 1896: key->schar = t2; 1897: } 1898: else 1899: key->sword = -1; 1900: s = set_ordering (s, key, bl_start); 1901: if (*s == 0) 1902: { 1903: key->eword = -1; 1904: key->echar = 0; 1905: } 1906: else if (*s != ',') 1907: badfieldspec (argv[i]); 1908: else if (*s == ',') 1909: { 1910: /* Skip over comma. */ 1911: ++s; 1912: if (*s == 0) 1913: { 1914: /* Provoke with `sort -k1,' */ 1915: error (0, 0, _("field specification has `,' but \ 1916: lacks following field spec")); 1917: badfieldspec (argv[i]); 1918: } 1919: /* Get POS2. */ 1920: for (t = 0; ISDIGIT (*s); ++s) 1921: t = t * 10 + *s - '0'; 1922: if (t == 0) 1923: { 1924: /* Provoke with `sort -k1,0' */ 1925: error (0, 0, _("ending field number argument \ 1926: to the `-k' option must be positive")); 1927: badfieldspec (argv[i]); 1928: } 1929: --t; 1930: t2 = 0; 1931: if (*s == '.') 1932: { 1933: if (!ISDIGIT (s[1])) 1934: { 1935: /* Provoke with `sort -k1,1.' */ 1936: error (0, 0, _("ending field spec has `.' \ 1937: but lacks following character offset")); 1938: badfieldspec (argv[i]); 1939: } 1940: for (++s; ISDIGIT (*s); ++s) 1941: t2 = t2 * 10 + *s - '0'; 1942: } 1943: else 1944: { 1945: /* `-k 2,3' is equivalent to `+1 -3'. */ 1946: ++t; 1947: } 1948: key->eword = t; 1949: key->echar = t2; 1950: s = set_ordering (s, key, bl_end); 1951: if (*s) 1952: badfieldspec (argv[i]); 1953: } 1954: insertkey (key); 1955: key = NULL; 1956: goto outer; 1957: case 'm': 1958: mergeonly = 1; 1959: break; 1960: case 'o': 1961: if (s[1]) 1962: outfile = s + 1; 1963: else 1964: { 1965: if (i == argc - 1) 1966: error (SORT_FAILURE, 0, 1967: _("option `-o' requires an argument")); 1968: else 1969: outfile = argv[++i]; 1970: } 1971: goto outer; 1972: case 's': 1973: stable = 1; 1974: break; 1975: case 't': 1976: if (s[1]) 1977: tab = *++s; 1978: else if (i < argc - 1) 1979: { 1980: tab = *argv[++i]; 1981: goto outer; 1982: } 1983: else 1984: error (SORT_FAILURE, 0, 1985: _("option `-t' requires an argument")); 1986: break; 1987: case 'T': 1988: if (s[1]) 1989: temp_file_prefix = ++s; 1990: else 1991: { 1992: if (i < argc - 1) 1993: temp_file_prefix = argv[++i]; 1994: else 1995: error (SORT_FAILURE, 0, 1996: _("option `-T' requires an argument")); 1997: } 1998: goto outer; 1999: /* break; */ 2000: case 'u': 2001: unique = 1; 2002: break; 2003: case 'z': 2004: eolchar = 0; 2005: break; 2006: case 'y': 2007: /* Accept and ignore e.g. -y0 for compatibility with 2008: Solaris 2. */ 2009: goto outer; 2010: default: 2011: fprintf (stderr, _("%s: unrecognized option `-%c'\n"), 2012: argv[0], *s); 2013: usage (SORT_FAILURE); 2014: } 2015: if (*s) 2016: ++s; 2017: } 2018: } 2019: else /* Not an option. */ 2020: { 2021: files[nfiles++] = argv[i]; 2022: } 2023: outer:; 2024: } 2025: 2026: if (key) 2027: insertkey (key); 2028: 2029: /* Inheritance of global options to individual keys. */ 2030: for (key = keyhead.next; key; key = key->next) 2031: if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse 2032: && !key->skipeblanks && !key->month && !key->numeric 2033: && !key->general_numeric) 2034: { 2035: key->ignore = gkey.ignore; 2036: key->translate = gkey.translate; 2037: key->skipsblanks = gkey.skipsblanks; 2038: key->skipeblanks = gkey.skipeblanks; 2039: key->month = gkey.month; 2040: key->numeric = gkey.numeric; 2041: key->general_numeric = gkey.general_numeric; 2042: key->reverse = gkey.reverse; 2043: } 2044: 2045: if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks 2046: || gkey.skipeblanks || gkey.month || gkey.numeric 2047: || gkey.general_numeric)) 2048: insertkey (&gkey); 2049: reverse = gkey.reverse; 2050: 2051: if (nfiles == 0) 2052: { 2053: nfiles = 1; 2054: files = − 2055: } 2056: 2057: if (checkonly) 2058: { 2059: /* POSIX requires that sort return 1 IFF invoked with -c and the 2060: input is not properly sorted. */ 2061: exit (check (files, nfiles) == 0 ? 0 : 1); 2062: } 2063: 2064: if (strcmp (outfile, "-")) 2065: { 2066: struct stat outstat; 2067: if (stat (outfile, &outstat) == 0) 2068: { 2069: /* The following code prevents a race condition when 2070: people use the brain dead shell programming idiom: 2071: cat file | sort -o file 2072: This feature is provided for historical compatibility, 2073: but we strongly discourage ever relying on this in 2074: new shell programs. */ 2075: 2076: /* Temporarily copy each input file that might be another name 2077: for the output file. When in doubt (e.g. a pipe), copy. */ 2078: for (i = 0; i < nfiles; ++i) 2079: { 2080: char buf[8192]; 2081: FILE *fp; 2082: int cc; 2083: 2084: if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i])) 2085: { 2086: struct stat instat; 2087: if ((strcmp (files[i], "-") 2088: ? stat (files[i], &instat) 2089: : fstat (STDIN_FILENO, &instat)) != 0) 2090: { 2091: error (0, errno, "%s", files[i]); 2092: cleanup (); 2093: exit (SORT_FAILURE); 2094: } 2095: if (S_ISREG (instat.st_mode) 2096: && (instat.st_ino != outstat.st_ino 2097: || instat.st_dev != outstat.st_dev)) 2098: { 2099: /* We know the files are distinct. */ 2100: continue; 2101: } 2102: } 2103: 2104: fp = xfopen (files[i], "r"); 2105: tmp = tempname (); 2106: ofp = xtmpfopen (tmp); 2107: while ((cc = fread (buf, 1, sizeof buf, fp)) > 0) 2108: write_bytes (buf, cc, ofp); 2109: if (ferror (fp)) 2110: { 2111: error (0, errno, "%s", files[i]); 2112: cleanup (); 2113: exit (SORT_FAILURE); 2114: } 2115: xfclose (ofp); 2116: xfclose (fp); 2117: files[i] = tmp; 2118: } 2119: } 2120: ofp = xfopen (outfile, "w"); 2121: } 2122: else 2123: ofp = stdout; 2124: 2125: if (mergeonly) 2126: merge (files, nfiles, ofp); 2127: else 2128: sort (files, nfiles, ofp); 2129: cleanup (); 2130: 2131: /* If we wait for the implicit flush on exit, and the parent process 2132: has closed stdout (e.g., exec >&- in a shell), then the output file 2133: winds up empty. I don't understand why. This is under SunOS, 2134: Solaris, Ultrix, and Irix. This premature fflush makes the output 2135: reappear. --karl@cs.umb.edu */ 2136: if (fflush (ofp) < 0) 2137: error (SORT_FAILURE, errno, _("%s: write error"), outfile); 2138: 2139: if (have_read_stdin && fclose (stdin) == EOF) 2140: error (SORT_FAILURE, errno, outfile); 2141: if (ferror (stdout) || fclose (stdout) == EOF) 2142: error (SORT_FAILURE, errno, _("%s: write error"), outfile); 2143: 2144: exit (EXIT_SUCCESS); 2145: }