Define and Extract Fields
Define and Extract Fields
Support the definition of single fields.
Notes
There are two forms of field definition.
The first form uses a field seperator character
(normally tab) to divide a line into fields (TabSep).
The second form uses white space transitions to
divide a line into fields (WhiteSep).
The second form is used if the field separator
character is the null character ('\0').
- Declare keyfield{} structure
-
Structure declaration
- Segment Source
- 103: struct keyfield
104: {
105: int sword; /* Zero-origin 'word' to start at. */
106: int schar; /* Additional characters to skip. */
107: int skipsblanks; /* Skip leading white space at start. */
108: int eword; /* Zero-origin first word after field. */
109: int echar; /* Additional characters in field. */
110: int skipeblanks; /* Skip trailing white space at finish. */
111: int *ignore; /* Boolean array of characters to ignore. */
112: char *translate; /* Translation applied to characters. */
113: int numeric; /* Flag for numeric comparison. Handle
114: strings of digits with optional decimal
115: point, but no exponential notation. */
116: int general_numeric; /* Flag for general, numeric comparison.
117: Handle numbers in exponential notation. */
118: int month; /* Flag for comparison by month name. */
119: int reverse; /* Reverse the sense of comparison. */
120: struct keyfield *next; /* Next keyfield to try. */
121: };
122:
- Define begfield()
-
Function definition
- Segment Source
-
536: /* Return a pointer to the first character of the field specified
537: by KEY in LINE. */
538:
539: static char *
540: begfield (const struct line *line, const struct keyfield *key)
541: {
542: register char *ptr = line->text, *lim = ptr + line->length;
543: register int sword = key->sword, schar = key->schar;
544:
545: if (tab)
546: while (ptr < lim && sword--)
547: {
548: while (ptr < lim && *ptr != tab)
549: ++ptr;
550: if (ptr < lim)
551: ++ptr;
552: }
553: else
554: while (ptr < lim && sword--)
555: {
556: while (ptr < lim && blanks[UCHAR (*ptr)])
557: ++ptr;
558: while (ptr < lim && !blanks[UCHAR (*ptr)])
559: ++ptr;
560: }
561:
562: if (key->skipsblanks)
563: while (ptr < lim && blanks[UCHAR (*ptr)])
564: ++ptr;
565:
566: if (ptr + schar <= lim)
567: ptr += schar;
568: else
569: ptr = lim;
570:
571: return ptr;
572: }
573:
- Define limfield()
-
Function definition
- Segment Source
-
574: /* Return the limit of (a pointer to the first character after) the field
575: in LINE specified by KEY. */
576:
577: static char *
578: limfield (const struct line *line, const struct keyfield *key)
579: {
580: register char *ptr = line->text, *lim = ptr + line->length;
581: register int eword = key->eword, echar = key->echar;
582:
583: /* Note: from the POSIX spec:
584: The leading field separator itself is included in
585: a field when -t is not used. FIXME: move this comment up... */
586:
587: /* Move PTR past EWORD fields or to one past the last byte on LINE,
588: whichever comes first. If there are more than EWORD fields, leave
589: PTR pointing at the beginning of the field having zero-based index,
590: EWORD. If a delimiter character was specified (via -t), then that
591: `beginning' is the first character following the delimiting TAB.
592: Otherwise, leave PTR pointing at the first `blank' character after
593: the preceding field. */
594: if (tab)
595: while (ptr < lim && eword--)
596: {
597: while (ptr < lim && *ptr != tab)
598: ++ptr;
599: if (ptr < lim && (eword || echar > 0))
600: ++ptr;
601: }
602: else
603: while (ptr < lim && eword--)
604: {
605: while (ptr < lim && blanks[UCHAR (*ptr)])
606: ++ptr;
607: while (ptr < lim && !blanks[UCHAR (*ptr)])
608: ++ptr;
609: }
610:
611: #ifdef POSIX_UNSPECIFIED
612: /* The following block of code makes GNU sort incompatible with
613: standard Unix sort, so it's ifdef'd out for now.
614: The POSIX spec isn't clear on how to interpret this.
615: FIXME: request clarification.
616:
617: From: kwzh@gnu.ai.mit.edu (Karl Heuer)
618: Date: Thu, 30 May 96 12:20:41 -0400
619:
620: [...]I believe I've found another bug in `sort'.
621:
622: $ cat /tmp/sort.in
623: a b c 2 d
624: pq rs 1 t
625: $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in
626: a b c 2 d
627: pq rs 1 t
628: $ /bin/sort +0.6 -0.7 </tmp/sort.in
629: pq rs 1 t
630: a b c 2 d
631:
632: Unix sort produced the answer I expected: sort on the single character
633: in column 6. GNU sort produced different results, because it disagrees
634: on the interpretation of the key-end spec "-M.N". Unix sort reads this
635: as "skip M fields, then N characters"; but GNU sort wants it to mean
636: "skip M fields, then either N characters or the rest of the current
637: field, whichever comes first". This extra clause applies only to
638: key-ends, not key-starts.
639: */
640:
641: /* Make LIM point to the end of (one byte past) the current field. */
642: if (tab)
643: {
644: char *newlim;
645: newlim = memchr (ptr, tab, lim - ptr);
646: if (newlim)
647: lim = newlim;
648: }
649: else
650: {
651: char *newlim;
652: newlim = ptr;
653: while (newlim < lim && blanks[UCHAR (*newlim)])
654: ++newlim;
655: while (newlim < lim && !blanks[UCHAR (*newlim)])
656: ++newlim;
657: lim = newlim;
658: }
659: #endif
660:
661: /* If we're skipping leading blanks, don't start counting characters
662: until after skipping past any leading blanks. */
663: if (key->skipsblanks)
664: while (ptr < lim && blanks[UCHAR (*ptr)])
665: ++ptr;
666:
667: /* Advance PTR by ECHAR (if possible), but no further than LIM. */
668: if (ptr + echar <= lim)
669: ptr += echar;
670: else
671: ptr = lim;
672:
673: return ptr;
674: }
675:
- Extract next field
- Code insertion
- Segment Source
-
1004: if (key->eword >= 0)
1005: lima = limfield (a, key), limb = limfield (b, key);
1006: else
1007: lima = a->text + a->length, limb = b->text + b->length;
1008:
1009: if (key->sword >= 0)
1010: texta = begfield (a, key), textb = begfield (b, key);
1011: else
1012: {
1013: texta = a->text, textb = b->text;
1014: if (key->skipsblanks)
1015: {
1016: while (texta < lima && blanks[UCHAR (*texta)])
1017: ++texta;
1018: while (textb < limb && blanks[UCHAR (*textb)])
1019: ++textb;
1020: }
1021: }
- Define key_init()
-
Function definition
- Notes
-
Virtually all uses of key_init() are via an
inlined (and unrealized) FieldMgmt function
we can call "genNewField()".
- Segment Source
- 1708: static void
1709: key_init (struct keyfield *key)
1710: {
1711: memset (key, 0, sizeof (*key));
1712: key->eword = -1;
1713: }
1714: