Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members  

searchword.c

Go to the documentation of this file.
00001 
00015 #include "CGrepLib.h"
00016 
00017 #define ERRORS 0
00018 
00019 void usage()
00020 {
00021   puts("usage: searchword word file.hwz");
00022 }
00023 
00024 int main(int argc, char **argv)
00025 {
00026   struct stat sbuf;
00027   int input_sz, scanned = 0;
00028   FILE *input_file;
00029   char *input_buf;
00030   int *position = NULL;
00031   int nocc = 0;
00032   char *body;
00033   int tlen, blen;
00034   Console console;
00035 
00036   if ( argc < 2 ) { /* check for at least 2 arguments */
00037     usage();
00038     exit(0);
00039   }
00040 
00041   /* check for file readability */
00042   stat(argv[argc-1],&sbuf);
00043   if ( (input_file = fopen(argv[argc-1], "r")) == NULL ) {
00044     fprintf(stderr, "cannot open %s, exiting.\n", argv[argc-1]);
00045     exit(-1);
00046   }
00047   input_sz = (int)sbuf.st_size;
00048   if (input_sz == 0){ /* check for file size */
00049     fprintf(stderr,"Fatal Error: Input file empty\n");
00050     exit(-1); 
00051   } 
00052 
00053   /* mmap the compressed file */
00054   input_buf = (char *) mmap(NULL,input_sz,PROT_READ,MAP_SHARED,
00055                             fileno(input_file),0);
00056   if ( input_buf == MAP_FAILED ) {
00057     perror("mmap");
00058     exit(-1);
00059   }
00060 
00061   printf("searching for \"%s\" allowing %d errors...\n", argv[argc-2],
00062          ERRORS);
00063 
00064   /* invoke CGrep_SearchWord. The function returns an array of positions
00065      of matching codeword within the body of the compressed file. The
00066      last parameters is the number of errors allowed in the search.
00067      (defined at the top of this file)
00068   */
00069   position = CGrep_SearchWord(&nocc, argv[argc-2], input_buf, input_sz,
00070                               ERRORS);
00071   
00072   /* check if some results were found */
00073   if ( position == NULL ) {
00074     puts("No matches found.");
00075     return(0);
00076   }
00077   /* otherwise, go ahead and decompress the compressed file around
00078      each match */
00079   /* Get the compressed body and console from the compressed file */
00080   HDecompress_getBodyAndConsole(input_buf, input_sz, &tlen, &body, &blen,
00081                                 &console);
00082   for ( scanned = 0; scanned < nocc; scanned++ ) { /* for each match */
00083     char *prev = NULL, *next = NULL;
00084     int prevLen = 0, nextLen = 0, decoded;
00085     printf("at position %d: ", position[scanned]);
00086     /* decompress 3 words to the left of the match */
00087     /* the first byte of the compressed file is != 0 if the file is compressed
00088        without spaces, 0 otherwise */
00089     if ( *input_buf ) { /* spaceless or with spaces? */
00090       HDecompress_previousBlock_tokens_spaceless(body+position[scanned],
00091                                                  position[scanned],
00092                                                  3, &prev, &prevLen, &decoded,
00093                                                  &console);
00094     } else {
00095       HDecompress_previousBlock_tokens(body+position[scanned],
00096                                        position[scanned],
00097                                        3, &prev, &prevLen, &decoded,
00098                                        &console);
00099     }
00100     /* print the previous tokens */
00101     if ( prevLen ) {
00102       prev[prevLen] = '\0'; /* null-terminate the string */
00103       printf(prev);
00104       if ( *input_buf && isalnum(prev[prevLen-1]) ) { // add a space
00105         putchar(' ');
00106       }
00107     }
00108     /* now, decompress 4 words beginning with the match */
00109     /* the first byte of the compressed file is != 0 if the file is compressed
00110        without spaces, 0 otherwise */
00111     if ( *input_buf ) { /* spaceless or with spaces? */
00112       HDecompress_nextBlock_tokens_spaceless(body+position[scanned],
00113                                              input_sz-position[scanned],
00114                                              4, &next, &nextLen, &decoded,
00115                                              &console);
00116     } else {
00117       HDecompress_nextBlock_tokens(body+position[scanned],
00118                                    input_sz-position[scanned],
00119                                    3, &next, &nextLen, &decoded,
00120                                    &console);
00121     }
00122     /* print the match and the next 3 tokens */
00123     if ( nextLen ) {
00124       next[nextLen] = '\0'; /* null-terminate the string */
00125       puts(next);
00126     }
00127     fflush(stdout); /* flush the buffers */
00128     if ( prev ) free(prev); /* frees the buffers */
00129     if ( next ) free(next);
00130   }
00131 
00132   printf("(%d occurrences)\n", nocc);
00133   
00134   return(0);
00135 }

Generated on Mon Mar 31 14:44:31 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002