#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include "CGrepLib.h"
Go to the source code of this file.
Functions | |
int * | CGrep_SearchPattern (int *nres, const char *ctext, size_t ctext_sz, const char *pattern, char **options) |
int * | CGrep_SearchSubstring (int *nres, const char *pattern, const char *ctext, size_t ctext_len, int errors) |
int * | CGrep_SearchWord (int *nres, const char *word, const char *ctext, size_t ctext_len, int errors) |
proximity_hit_t * | CGrep_SearchProximity (int *nres, const char *ctext, size_t ctext_sz, int prox_window, char **patterns, char ***options) |
int | CGrep_GetMatchingCW (MyHash_table *ht, char *filter, char **options, int npattern, const Console *c) |
int * | CGrep_GetCWOccurrences (int *nocc, const MyHash_table *ht, const char *filter, const char *cbody, size_t cbody_len) |
const char * | CGrep_GetNextCWOccurrence (int *len, MyHash_table *ht, const char *filter, const char *cbody, size_t remaining) |
proximity_hit_t * | CGrep_GetOccurrencesProximitySpaceless (int *nocc, int prox_window, int npatterns, const MyHash_table *ht, const char filter[], const MyHash_table *separators, const char sepFilter[], const char *cbody, size_t cbody_len) |
proximity_hit_t * | CGrep_GetOccurrencesProximity (int *nocc, int prox_window, int npatterns, const MyHash_table *ht, const char filter[], const MyHash_table *separators, const char *sepFilter, const char *cbody, size_t cbody_len, const Hash_node *nl) |
MyHash_node * | CGrep_CheckIfIsPattern (const char *cbody, int cw_len, const MyHash_table *ht, MyHash_node *hn, const char filter[]) |
char * | CGrep_escapeStringConfigurable (const char *s, size_t len, char min, char max, const char *exceptions) |
char * | CGrep_escapeString (const char *s, size_t len) |
void | MyHashtable_init (MyHash_table *ht, int n) |
void | MyHashtable_clear (MyHash_table *ht) |
int | MyHashtable_func (const char *s, int len, const MyHash_table *ht) |
MyHash_node * | MyHashtable_search (const char *s, int slen, const MyHash_table *ht) |
int | MyHashtable_insert (const char *s, int slen, int npattern, MyHash_table *ht) |
For more details please have a look at the html documentation.
This file is licensed under LGPL terms (see file LICENSE)
Definition in file CGrepLib.c.
|
Value: { \ prevpos = currpos; \ do { \ currpos++; \ } while ( (currpos<endpos) && ((*currpos & 0x80) == 0) ); \ cw_len = currpos - prevpos; \ } Definition at line 689 of file CGrepLib.c. |
|
Checks whether a codeword matches with one of the patterns sought for. ht must hold the codewords relative to the patterns. Because the same codeword can represent a word that matches more than one pattern, the function allows to be called repeatedly. If the parameter hn is NULL, the first match is returned. If hn holds the value previously returned, the next one is returned.
Definition at line 1033 of file CGrepLib.c. |
|
"Default" version of CGrep_escapeStringConfigurable, using the defaults CGREP_MIN_PRINTABLE_CHAR (32), CGREP_MAX_PRINTABLE_CHAR (126), and CGREP_NONPRINTABLE_CHARS ("[]"). Definition at line 1106 of file CGrepLib.c. |
|
Utility function: given a string, it copies it to another allocated string where some of the characters are escaped as: [<hex value>], where <hex value> is the hexadecimal value of the escaped character. The characters to escape are selectable by range and list: all character whose value is smaller than min, greater than max, or equal to one in exceptions are escaped.
Definition at line 1068 of file CGrepLib.c. |
|
Looks for the codewords contained within the hashtable in the compressed body. Returns the list of positions (in the compressed body) of the matches.
Definition at line 607 of file CGrepLib.c. |
|
Fills the hashtable ht with the codewords corresponding to all the words in the Dictionary obtained from Console c matching the search pattern as specified in agrep's options. 'Systems' agrep, I/O via file. The dictionary content is saved to '/tmp/agrep.tmp.PID.n', and the output of agrep to '/tmp/agrep.out.PID.n'. Temporary files are unlink'ed after the execution. "-n" or other flags that modify agrep's output format MUST NOT be within the options, and the buffer must contain a word per line. options[0] must be agrep's executable name, and the array must be null-terminated. Along with the hashtable (which must be initialized), if filter is not NULL, it is interpreted as an array of 256 unsigned char to fill with 1's in correspondance to the fisrt byte of every matching codeword found.
Definition at line 580 of file CGrepLib.c. |
|
Looks for the codewords contained in the hashtable ht in the compressed body cbody. Returns the position of the first match found.
Definition at line 661 of file CGrepLib.c. |
|
This function is the same as CGrep_GetOccurrencesProximitySpaceless, but is intended for files compressed with spaces. Has an additional argument: a pointer to the hashnode relative to the newline entry in the dictionary (NULL if there is no newline). Definition at line 861 of file CGrepLib.c. |
|
returns an array of proximity_hit_t, proximity hits for the patterns over the compressed body of a spaceless-compressed text. The array returned is malloc'ed; each proximity_hit_t entry is composed of: * byte_position, the position in the compressed body of the beginning of the matching window * start_position, the rank of the first word of the matching window * end_position, the rank of the last word in the matching window * positions, an array of positions in the cbody of the match for each pattern * ranks, an array of ranks giving the rank of each pattern
Definition at line 734 of file CGrepLib.c. |
|
Search for a pattern on a compressed text using agrep. Agrep is invoked over the dictionary in order to find out tokens that match against the query string. The dictionary is in the format of a token per line: when using options to agrep or regexp, you must remember that things like: "^p.*" will match all WORDS (not lines) beginning with the letter 'p'. This is because agrep is invoked over the dictionary, that actually contains a token per line. Once the list of matching token is found, their corresponding codewords Are inserted into a hashtable, and the compressed file is scanned looking for such codewords.
Definition at line 85 of file CGrepLib.c. |
|
Performs a proximity search, returning an array of proximity_hit_t's Up to CGREP_MAX_PATTERNS (10) patterns are allowed, with all of agrep's options (regexp search, approximated search, case sentitiveness/insensitiveness, etc.) In case of non exact search, it may happen that a word matches multiple words. In this case, if a set of words matches the search in multiple ways, it is returned only once (the arrays positions and ranks in the corresponding proximity_hit_t will hold values for the first such match found.
Definition at line 262 of file CGrepLib.c. |
|
returns an array of positions of the pattern in the compressed body as a substring, allowing for at most errors errors. The array returned is malloc'ed. This function is a 'shortcut' invocation for CGrep_SearchPattern.
Definition at line 152 of file CGrepLib.c. |
|
returns the array of positions of the word in the compressed body, with at most errors errors. The array returned is malloc'ed. This function is a 'shortcut' invocation for CGrep_SearchPattern.
Definition at line 183 of file CGrepLib.c. |
|
Frees all elements of a hashtable. After this call, ht is an empty, uninitialized MyHash_table.
Definition at line 1145 of file CGrepLib.c. |
|
Computes the hash value for the given string.
Definition at line 1167 of file CGrepLib.c. |
|
Initialize the hash table according to the number of estimated tokens; the load factor is set to 0.1.
Definition at line 1124 of file CGrepLib.c. |
|
Inserts the token in the hash table and returns 1 if new, 0 otherwise; it also updates the counter of occurrences for that token.
Definition at line 1211 of file CGrepLib.c. |
|
Searches for the given string into the passed hash table (NULL if not).
Definition at line 1186 of file CGrepLib.c. |