Xv6 with picoc & Linkage editor  v1.0
The project delineate mutual cohesion between c library, linkage editor ( linker), interpreter and operating system by porting the same on xv6 kernel
 All Data Structures
46_grep.c
00001 /*
00002  * The  information  in  this  document  is  subject  to  change
00003  * without  notice  and  should not be construed as a commitment
00004  * by Digital Equipment Corporation or by DECUS.
00005  *
00006  * Neither Digital Equipment Corporation, DECUS, nor the authors
00007  * assume any responsibility for the use or reliability of  this
00008  * document or the described software.
00009  *
00010  *      Copyright (C) 1980, DECUS
00011  *
00012  * General permission to copy or modify, but not for profit,  is
00013  * hereby  granted,  provided that the above copyright notice is
00014  * included and reference made to  the  fact  that  reproduction
00015  * privileges were granted by DECUS.
00016  */
00017 #include <stdio.h>
00018 
00019 /*
00020  * grep
00021  *
00022  * Runs on the Decus compiler or on vms, On vms, define as:
00023  *      grep :== "$disk:[account]grep"      (native)
00024  *      grep :== "$disk:[account]grep grep" (Decus)
00025  * See below for more information.
00026  */
00027 
00028 #if 0
00029 char    *documentation[] = {
00030 "grep searches a file for a given pattern.  Execute by",
00031 "   grep [flags] regular_expression file_list\n",
00032 "Flags are single characters preceeded by '-':",
00033 "   -c      Only a count of matching lines is printed",
00034 "   -f      Print file name for matching lines switch, see below",
00035 "   -n      Each line is preceeded by its line number",
00036 "   -v      Only print non-matching lines\n",
00037 "The file_list is a list of files (wildcards are acceptable on RSX modes).",
00038 "\nThe file name is normally printed if there is a file given.",
00039 "The -f flag reverses this action (print name no file, not if more).\n",
00040 0 };
00041 
00042 char    *patdoc[] = {
00043 "The regular_expression defines the pattern to search for.  Upper- and",
00044 "lower-case are always ignored.  Blank lines never match.  The expression",
00045 "should be quoted to prevent file-name translation.",
00046 "x      An ordinary character (not mentioned below) matches that character.",
00047 "'\\'    The backslash quotes any character.  \"\\$\" matches a dollar-sign.",
00048 "'^'    A circumflex at the beginning of an expression matches the",
00049 "       beginning of a line.",
00050 "'$'    A dollar-sign at the end of an expression matches the end of a line.",
00051 "'.'    A period matches any character except \"new-line\".",
00052 "':a'   A colon matches a class of characters described by the following",
00053 "':d'     character.  \":a\" matches any alphabetic, \":d\" matches digits,",
00054 "':n'     \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
00055 "': '     other control characters, such as new-line.",
00056 "'*'    An expression followed by an asterisk matches zero or more",
00057 "       occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
00058 "       \"foo\", etc.",
00059 "'+'    An expression followed by a plus sign matches one or more",
00060 "       occurrances of that expression: \"fo+\" matches \"fo\", etc.",
00061 "'-'    An expression followed by a minus sign optionally matches",
00062 "       the expression.",
00063 "'[]'   A string enclosed in square brackets matches any character in",
00064 "       that string, but no others.  If the first character in the",
00065 "       string is a circumflex, the expression matches any character",
00066 "       except \"new-line\" and the characters in the string.  For",
00067 "       example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
00068 "       matches \"abc\" but not \"axb\".  A range of characters may be",
00069 "       specified by two characters separated by \"-\".  Note that,",
00070 "       [a-z] matches alphabetics, while [z-a] never matches.",
00071 "The concatenation of regular expressions is a regular expression.",
00072 0};
00073 #endif
00074 
00075 #define LMAX    512
00076 #define PMAX    256
00077 
00078 #define CHAR    1
00079 #define BOL     2
00080 #define EOL     3
00081 #define ANY     4
00082 #define CLASS   5
00083 #define NCLASS  6
00084 #define STAR    7
00085 #define PLUS    8
00086 #define MINUS   9
00087 #define ALPHA   10
00088 #define DIGIT   11
00089 #define NALPHA  12
00090 #define PUNCT   13
00091 #define RANGE   14
00092 #define ENDPAT  15
00093 
00094 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
00095 
00096 char *pp, lbuf[LMAX], pbuf[PMAX];
00097 
00098 char *cclass();
00099 char *pmatch();
00100 
00101 
00102 /*** Display a file name *******************************/
00103 void file(char *s)
00104 {
00105    printf("File %s:\n", s);
00106 }
00107 
00108 /*** Report unopenable file ****************************/
00109 void cant(char *s)
00110 {
00111    fprintf(stderr, "%s: cannot open\n", s);
00112 }
00113 
00114 /*** Give good help ************************************/
00115 void help(char **hp)
00116 {
00117    char   **dp;
00118 
00119    for (dp = hp; *dp; ++dp)
00120       printf("%s\n", *dp);
00121 }
00122 
00123 /*** Display usage summary *****************************/
00124 void usage(char *s)
00125 {
00126    fprintf(stderr, "?GREP-E-%s\n", s);
00127    fprintf(stderr,
00128       "Usage: grep [-cfnv] pattern [file ...].  grep ? for help\n");
00129    exit(1);
00130 }
00131 
00132 /*** Compile the pattern into global pbuf[] ************/
00133 void compile(char *source)
00134 {
00135    char  *s;         /* Source string pointer     */
00136    char  *lp;        /* Last pattern pointer      */
00137    int   c;          /* Current character         */
00138    int            o;          /* Temp                      */
00139    char           *spp;       /* Save beginning of pattern */
00140 
00141    s = source;
00142    if (debug)
00143       printf("Pattern = \"%s\"\n", s);
00144    pp = pbuf;
00145    while (c = *s++) {
00146       /*
00147        * STAR, PLUS and MINUS are special.
00148        */
00149       if (c == '*' || c == '+' || c == '-') {
00150          if (pp == pbuf ||
00151               (o=pp[-1]) == BOL ||
00152               o == EOL ||
00153               o == STAR ||
00154               o == PLUS ||
00155               o == MINUS)
00156             badpat("Illegal occurrance op.", source, s);
00157          store(ENDPAT);
00158          store(ENDPAT);
00159          spp = pp;               /* Save pattern end     */
00160          while (--pp > lp)       /* Move pattern down    */
00161             *pp = pp[-1];        /* one byte             */
00162          *pp =   (c == '*') ? STAR :
00163             (c == '-') ? MINUS : PLUS;
00164          pp = spp;               /* Restore pattern end  */
00165          continue;
00166       }
00167       /*
00168        * All the rest.
00169        */
00170       lp = pp;         /* Remember start       */
00171       switch(c) {
00172 
00173       case '^':
00174          store(BOL);
00175          break;
00176 
00177       case '$':
00178          store(EOL);
00179          break;
00180 
00181       case '.':
00182          store(ANY);
00183          break;
00184 
00185       case '[':
00186          s = cclass(source, s);
00187          break;
00188 
00189       case ':':
00190          if (*s) {
00191             switch(tolower(c = *s++)) {
00192 
00193             case 'a':
00194             case 'A':
00195                store(ALPHA);
00196                break;
00197 
00198             case 'd':
00199             case 'D':
00200                store(DIGIT);
00201                break;
00202 
00203             case 'n':
00204             case 'N':
00205                store(NALPHA);
00206                break;
00207 
00208             case ' ':
00209                store(PUNCT);
00210                break;
00211 
00212             default:
00213                badpat("Unknown : type", source, s);
00214 
00215             }
00216             break;
00217          }
00218          else    badpat("No : type", source, s);
00219 
00220       case '\\':
00221          if (*s)
00222             c = *s++;
00223 
00224       default:
00225          store(CHAR);
00226          store(tolower(c));
00227       }
00228    }
00229    store(ENDPAT);
00230    store(0);                /* Terminate string     */
00231    if (debug) {
00232       for (lp = pbuf; lp < pp;) {
00233          if ((c = (*lp++ & 0377)) < ' ')
00234             printf("\\%o ", c);
00235          else    printf("%c ", c);
00236         }
00237         printf("\n");
00238    }
00239 }
00240 
00241 /*** Compile a class (within []) ***********************/
00242 char *cclass(char *source, char *src)
00243 /* char       *source;   // Pattern start -- for error msg. */
00244 /* char       *src;      // Class start */
00245 {
00246    char   *s;        /* Source pointer    */
00247    char   *cp;       /* Pattern start     */
00248    int    c;         /* Current character */
00249    int             o;         /* Temp              */
00250 
00251    s = src;
00252    o = CLASS;
00253    if (*s == '^') {
00254       ++s;
00255       o = NCLASS;
00256    }
00257    store(o);
00258    cp = pp;
00259    store(0);                          /* Byte count      */
00260    while ((c = *s++) && c!=']') {
00261       if (c == '\\') {                /* Store quoted char    */
00262          if ((c = *s++) == '\0')      /* Gotta get something  */
00263             badpat("Class terminates badly", source, s);
00264          else    store(tolower(c));
00265       }
00266       else if (c == '-' &&
00267             (pp - cp) > 1 && *s != ']' && *s != '\0') {
00268          c = pp[-1];             /* Range start     */
00269          pp[-1] = RANGE;         /* Range signal    */
00270          store(c);               /* Re-store start  */
00271          c = *s++;               /* Get end char and*/
00272          store(tolower(c));      /* Store it        */
00273       }
00274       else {
00275          store(tolower(c));      /* Store normal char */
00276       }
00277    }
00278    if (c != ']')
00279       badpat("Unterminated class", source, s);
00280    if ((c = (pp - cp)) >= 256)
00281       badpat("Class too large", source, s);
00282    if (c == 0)
00283       badpat("Empty class", source, s);
00284    *cp = c;
00285    return(s);
00286 }
00287 
00288 /*** Store an entry in the pattern buffer **************/
00289 void store(int op)
00290 {
00291    if (pp >= &pbuf[PMAX])
00292       error("Pattern too complex\n");
00293    *pp++ = op;
00294 }
00295 
00296 /*** Report a bad pattern specification ****************/
00297 void badpat(char *message, char *source, char *stop)
00298 /* char  *message;       // Error message */
00299 /* char  *source;        // Pattern start */
00300 /* char  *stop;          // Pattern end   */
00301 {
00302    fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
00303    fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n",
00304          stop-source, stop[-1]);
00305    error("?GREP-E-Bad pattern\n");
00306 }
00307 
00308 /*** Scan the file for the pattern in pbuf[] ***********/
00309 void grep(FILE *fp, char *fn)
00310 /* FILE       *fp;       // File to process            */
00311 /* char       *fn;       // File name (for -f option)  */
00312 {
00313    int lno, count, m;
00314 
00315    lno = 0;
00316    count = 0;
00317    while (fgets(lbuf, LMAX, fp)) {
00318       ++lno;
00319       m = match();
00320       if ((m && !vflag) || (!m && vflag)) {
00321          ++count;
00322          if (!cflag) {
00323             if (fflag && fn) {
00324                file(fn);
00325                fn = 0;
00326             }
00327             if (nflag)
00328                printf("%d\t", lno);
00329             printf("%s\n", lbuf);
00330          }
00331       }
00332    }
00333    if (cflag) {
00334       if (fflag && fn)
00335          file(fn);
00336       printf("%d\n", count);
00337    }
00338 }
00339 
00340 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
00341 void match()
00342 {
00343    char   *l;        /* Line pointer       */
00344 
00345    for (l = lbuf; *l; ++l) {
00346       if (pmatch(l, pbuf))
00347          return(1);
00348    }
00349    return(0);
00350 }
00351 
00352 /*** Match partial line with pattern *******************/
00353 char *pmatch(char *line, char *pattern)
00354 /* char               *line;     // (partial) line to match      */
00355 /* char               *pattern;  // (partial) pattern to match   */
00356 {
00357    char   *l;        /* Current line pointer         */
00358    char   *p;        /* Current pattern pointer      */
00359    char   c;         /* Current character            */
00360    char            *e;        /* End for STAR and PLUS match  */
00361    int             op;        /* Pattern operation            */
00362    int             n;         /* Class counter                */
00363    char            *are;      /* Start of STAR match          */
00364 
00365    l = line;
00366    if (debug > 1)
00367       printf("pmatch(\"%s\")\n", line);
00368    p = pattern;
00369    while ((op = *p++) != ENDPAT) {
00370       if (debug > 1)
00371          printf("byte[%d] = 0%o, '%c', op = 0%o\n",
00372                l-line, *l, *l, op);
00373       switch(op) {
00374 
00375       case CHAR:
00376          if (tolower(*l++) != *p++)
00377             return(0);
00378          break;
00379 
00380       case BOL:
00381          if (l != lbuf)
00382             return(0);
00383          break;
00384 
00385       case EOL:
00386          if (*l != '\0')
00387             return(0);
00388          break;
00389 
00390       case ANY:
00391          if (*l++ == '\0')
00392             return(0);
00393          break;
00394 
00395       case DIGIT:
00396          if ((c = *l++) < '0' || (c > '9'))
00397             return(0);
00398          break;
00399 
00400       case ALPHA:
00401          c = tolower(*l++);
00402          if (c < 'a' || c > 'z')
00403             return(0);
00404          break;
00405 
00406       case NALPHA:
00407          c = tolower(*l++);
00408          if (c >= 'a' && c <= 'z')
00409             break;
00410          else if (c < '0' || c > '9')
00411             return(0);
00412          break;
00413 
00414       case PUNCT:
00415          c = *l++;
00416          if (c == 0 || c > ' ')
00417             return(0);
00418          break;
00419 
00420       case CLASS:
00421       case NCLASS:
00422          c = tolower(*l++);
00423          n = *p++ & 0377;
00424          do {
00425             if (*p == RANGE) {
00426                p += 3;
00427                n -= 2;
00428                if (c >= p[-2] && c <= p[-1])
00429                   break;
00430             }
00431             else if (c == *p++)
00432                break;
00433          } while (--n > 1);
00434          if ((op == CLASS) == (n <= 1))
00435             return(0);
00436          if (op == CLASS)
00437             p += n - 2;
00438          break;
00439 
00440       case MINUS:
00441          e = pmatch(l, p);       /* Look for a match    */
00442          while (*p++ != ENDPAT); /* Skip over pattern   */
00443          if (e)                  /* Got a match?        */
00444             l = e;               /* Yes, update string  */
00445          break;                  /* Always succeeds     */
00446 
00447       case PLUS:                 /* One or more ...     */
00448          if ((l = pmatch(l, p)) == 0)
00449             return(0);           /* Gotta have a match  */
00450       case STAR:                 /* Zero or more ...    */
00451          are = l;                /* Remember line start */
00452          while (*l && (e = pmatch(l, p)))
00453             l = e;               /* Get longest match   */
00454          while (*p++ != ENDPAT); /* Skip over pattern   */
00455          while (l >= are) {      /* Try to match rest   */
00456             if (e = pmatch(l, p))
00457                return(e);
00458             --l;                 /* Nope, try earlier   */
00459          }
00460          return(0);              /* Nothing else worked */
00461 
00462       default:
00463          printf("Bad op code %d\n", op);
00464          error("Cannot happen -- match\n");
00465       }
00466    }
00467    return(l);
00468 }
00469 
00470 /*** Report an error ***********************************/
00471 void error(char *s)
00472 {
00473    fprintf(stderr, "%s", s);
00474    exit(1);
00475 }
00476 
00477 /*** Main program - parse arguments & grep *************/
00478 int main(int argc, char **argv)
00479 {
00480    char   *p;
00481    int    c, i;
00482    int             gotpattern;
00483 
00484    FILE            *f;
00485 
00486    if (argc <= 1)
00487       usage("No arguments");
00488    if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
00489       help(documentation);
00490       help(patdoc);
00491       return;
00492       }
00493    nfile = argc-1;
00494    gotpattern = 0;
00495    for (i=1; i < argc; ++i) {
00496       p = argv[i];
00497       if (*p == '-') {
00498          ++p;
00499          while (c = *p++) {
00500             switch(tolower(c)) {
00501 
00502             case '?':
00503                help(documentation);
00504                break;
00505 
00506             case 'C':
00507             case 'c':
00508                ++cflag;
00509                break;
00510 
00511             case 'D':
00512             case 'd':
00513                ++debug;
00514                break;
00515 
00516             case 'F':
00517             case 'f':
00518                ++fflag;
00519                break;
00520 
00521             case 'n':
00522             case 'N':
00523                ++nflag;
00524                break;
00525 
00526             case 'v':
00527             case 'V':
00528                ++vflag;
00529                break;
00530 
00531             default:
00532                usage("Unknown flag");
00533             }
00534          }
00535          argv[i] = 0;
00536          --nfile;
00537       } else if (!gotpattern) {
00538          compile(p);
00539          argv[i] = 0;
00540          ++gotpattern;
00541          --nfile;
00542       }
00543    }
00544    if (!gotpattern)
00545       usage("No pattern");
00546    if (nfile == 0)
00547       grep(stdin, 0);
00548    else {
00549       fflag = fflag ^ (nfile > 0);
00550       for (i=1; i < argc; ++i) {
00551          if (p = argv[i]) {
00552             if ((f=fopen(p, "r")) == NULL)
00553                cant(p);
00554             else {
00555                grep(f, p);
00556                fclose(f);
00557             }
00558          }
00559       }
00560    }
00561 }
00562 
 All Data Structures