Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members  

HuffwordLib.h

Go to the documentation of this file.
00001 
00011 #include <stdio.h>
00012 #include <stdlib.h>
00013 #include <string.h>
00014 #include <sys/mman.h>
00015 #include <sys/stat.h>
00016 #include <unistd.h>
00017 #include <errno.h>
00018 #include <ctype.h>
00019 #include <math.h>
00020 
00021 
00022 /* -------------------------------------------------------- */
00023 /* ---------------  Pull of data structures --------------- */
00024 /* -------------------------------------------------------- */
00025 
00026 
00027 
00032 typedef struct Hash_node {
00033   char  *str;          
00034   int len_str;         
00035   int count_occ;       
00036   int codeword;        
00037   int cw_len;          
00038   struct Hash_node *next;    
00039 } Hash_node;
00040 
00041 typedef Hash_node **Hash_nodeptr_array; 
00042 
00047 typedef struct {
00048   int size;             
00049   int card;             
00050   Hash_nodeptr_array table;   
00051 } HHash_table;
00052 
00053 
00054 
00061 typedef struct {
00062   char *content;    
00063   int length;       
00064   int num_tokens;   
00065   int *start_pos;   
00066 } Dictionary;
00067 
00068 
00069 
00071 typedef struct {
00072   int firstcw[5];   
00073   int offsetcw[5];  
00074   int max_cwlen;    
00075   int num_tokens;   
00076 } Canonical;
00077 
00078 
00079 
00081 typedef struct {
00082   int number;             
00083   int *text_offsets;      
00084   int *ctext_offsets;     
00085 } Jumpers;
00086 
00087 
00089 typedef struct {
00090   HHash_table hashtable;           
00091   Dictionary dictionary;          
00092   Canonical canoinfo;   
00093   Jumpers jumpers;       
00094 } Console;
00095 
00096 
00097 
00098 /* ------------------------------------------------------ */
00099 /* -----------  Text parsing and compression ------------ */
00100 /* ------------------------------------------------------ */
00101 
00102 
00103 void HParse_text(char *text, int text_len, HHash_table *ht);
00104 
00105 void Huffw_compress(char *text, int tlen, int jump_value, char **cstring, int *clen, int Verbose);
00106 void Huffw_decompress(char *cstring, int clen, char **text, int *tlen, int Verbose);
00107 void Huffw_PrintInfo(Console *console, int ctext_len, int text_len, int cstring_len, 
00108                      int rule, int Verbose);
00109 
00110 void Huffw_spaceless_compress(char *text, int tlen, char **cstring, int *clen, int Verbose);
00111 void Huffw_spaceless_decompress(char *cstring, int clen, char **text, int *tlen, int Verbose);
00112 
00113 
00114 void HCompress_getBody(char *text, int text_len, HHash_table *ht, char **ctext, int *ctext_len);
00115 void HCompress_getBodyAndConsole(char *text, int tlen, int jump_value, char **body, int *blen, 
00116                                      Console *console);
00117 void HCompress_getString(char **ctext, int *ctext_len, int text_len,char *body, 
00118                              int blen, Console *console);
00119 void HCompress_contractSpaces(char *body, int blen, char **body_spaceless, int *blen_spaceless, 
00120                                   Console *console);
00121 
00122 
00123 void HDecompress_getBodyAndConsole(char *ctext, int ctext_len, int *text_len, char **body, 
00124                                        int *blen, Console *console);
00125 void HDecompress_expandSpaces(char *body_spaceless, int blen_spaceless, char **body, 
00126                                   int *blen, Console *console);
00127 void HDecompress_nextBlock_bytes(char *ctext, int num_bytes, char **text, int *text_len, 
00128                                      Console *console);
00129 void HDecompress_nextBlock_bytes_spaceless(char *ctext, int num_bytes, char **text, 
00130                                                int *text_len, Console *console);
00131 void HDecompress_previousBlock_bytes(char *ctext, int num_bytes, char **text, int *text_len, 
00132                                          Console *console);
00133 void HDecompress_previousBlock_bytes_spaceless(char *ctext, int num_bytes, char **text, 
00134                                                    int *text_len, Console *console);
00135 void HDecompress_nextBlock_tokens(char *ctext, int byte_left, int num_obj, char **text, 
00136                                       int *text_len, int *decoded, Console *console);
00137 void HDecompress_nextBlock_tokens_spaceless(char *ctext, int bytes_left, int num_obj, 
00138                                                 char **text, int *text_len, int *decoded,
00139                                                 Console *console);
00140 void HDecompress_previousBlock_tokens(char *ctext, int byte_left, int num_obj, char **text, 
00141                                           int *text_len, int *decoded, Console *console);
00142 void HDecompress_previousBlock_tokens_spaceless(char *ctext, int bytes_left, int num_obj, 
00143                                                     char **text, int *text_len, int *decoded,
00144                                                     Console *console);
00145 
00146 
00147 
00148 
00149 /* ---------------------------------------------------- */
00150 /* -------------------  Hash Table    ----------------- */
00151 /* ---------------------------------------------------- */
00152 
00153 
00154 void HHashtable_init(HHash_table *ht, int estimated);
00155 int HHashtable_func(char *s, int len, HHash_table *ht);
00156 Hash_node *HHashtable_search(char *str, int len, HHash_table *ht);
00157 int HHashtable_insert(char *str, int str_len, HHash_table *ht);   
00158 void HHashtable_clear(HHash_table *ht);
00159 void HHashtable_fromdict(Dictionary *dict, Canonical *cano, HHash_table *ht);
00160 void HHashtable_print(HHash_table *ht);
00161 
00162 
00163 /* ------------------------------------------------------------ */
00164 /* ---------------------  Huffman Tree   ---------------------- */
00165 /* ------------------------------------------------------------ */
00166 
00167 void Hufftree_createLeaves(Hash_nodeptr_array tree, int num_leaves, HHash_table *ht);
00168 int Hufftree_fromLeaves(Hash_nodeptr_array work_area, int *tot_nodes, int HT_leaves);
00169 int Hufftree_computeCwLen(Hash_nodeptr_array tree, int root, int tree_size);
00170 void Hufftree_build(Hash_nodeptr_array *tree_array_ptr, HHash_table *ht);
00171 void Hufftree_print(Canonical *cano, Hash_nodeptr_array tree_array, int Verbose);
00172 
00173 
00174 
00175 /* ------------------------------------------------------ */
00176 /* -----------------  Codeword operations --------------- */
00177 /* ------------------------------------------------------ */
00178 
00179 void HCodeword_PlainFromTagged(int taggedcw, int taggedcwlen,int *plaincw, int *plaincwlen);
00180 void HCodeword_PlainFromTokenrank(int token_rank,Canonical *cano,int *plaincw,int *plaincwlen); 
00181 void HCodeword_TaggedFromPlain(int plaincw, int plaincwlen,int *taggedcw, int *taggedcwlen); 
00182 void HCodeword_TaggedFromTokenrank(int token_rank,Canonical *cano,int *taggedcw, int *taggedcwlen); 
00183 void HCodeword_TaggedFromToken(char *token, int token_len, HHash_table *ht,int *taggedcw, int *taggedcwlen); 
00184 void HCodeword_TaggedGetNext(char *s, int num_byte_left, int *taggedcw, int *lencw);
00185 int HCodeword_TaggedGetNextLength(char *s, int num_byte_left);
00186 void HCodeword_TaggedGetPrevious(char *s, int num_byte_left, int *taggedcw, int *lencw);
00187 void HCodeword_tostring(int taggedcw, int taggedcwlen, char **s); 
00188 
00189 void HToken_RankFromPlainCw(int plaincw, int plaincwlen, Canonical *cano,int *token_rank); 
00190 void HToken_RankFromTaggedCw(int taggedcw, int taggedcwlen, Canonical *cano,int *token_rank); 
00191 void HToken_fromTaggedCw(int taggedcw, int taggedcwlen,Canonical *cano, Dictionary *dict,
00192                              char **token, int *token_len);
00193 int HToken_decompressNext(char *s, int num_byte_left, char **token, int *lentoken, int *lencw, 
00194                          Console *console);
00195 int HToken_decompressPrevious(char *s, int num_byte_left, char **token, int *lentoken, int *lencw, 
00196                              Console *console);
00197 
00198 void HToken_getLengthNext(char *s, int num_char_left, int *len);
00199 
00200 
00201 /* ------------------------------------------------------ */
00202 /* ---------------  Dictionary operations --------------- */
00203 /* ------------------------------------------------------ */
00204 
00205 void HDictionary_fromstring(char *s, int slen, int stokens, Dictionary *dict);
00206 void HDictionary_fromtree(Hash_nodeptr_array tree, int num_tokens, Dictionary *dict);
00207 void HDictionary_print(Dictionary *dict, HHash_table *ht, int Verbose);
00208 
00209 /* -------------------------------------------------------- */
00210 /* ----------------  Jumpers data structure --------------- */
00211 /* -------------------------------------------------------- */
00212 
00213 void HJumpers_fromds(char *text, int tlen, HHash_table *ht,int jump_value, Jumpers *jumpers);
00214 void HJumpers_tostring(char *s, Jumpers *jumpers);
00215 void HJumpers_fromstring(char *s, Jumpers *jumpers);
00216 void HGet_charpos_from_bytepos(char *ctext, int ctext_len, int bytepos, int *textpos, Console *console);
00217 void HGet_bytepos_from_charpos(char *ctext, int ctext_len, int textpos, int *bytepos, Console *console);
00218 
00219 /* ---------------------------------------------------------- */
00220 /* ----------------  Canonical data structure --------------- */
00221 /* ---------------------------------------------------------- */
00222 
00223 
00224 void HCanonical_fromtree(Hash_nodeptr_array tree_array, Canonical *cano, HHash_table *ht);
00225 void HCanonical_fromstring(char *s, Canonical *cano);
00226 void HCanonical_tostring(char *s, Canonical *cano);
00227 
00228 
00229 
00230 
00231 
00232 /* ---------------------------------------------------- */
00233 /* -----------------  Basic routines    --------------- */
00234 /* ---------------------------------------------------- */
00235 
00236 
00238 void HInt_tostring(char *s, int i);
00239 
00240 
00242 int HInt_fromstring(char *s); 
00243 
00245 void HPrint_string(char *s, int l);
00246 
00247 /* Used in the construction of the Huffman tree */
00248 int HSort_for_freq(const void *va, const void *vb);
00249 int HSort_for_cwlen(const void *va, const void *vb);
00250 
00251 

Generated on Mon Mar 31 14:44:30 2003 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002