00001
00011 #include <stdio.h>
00012 #include <stdlib.h>
00013 #include <string.h>
00014 #include <sys/mman.h>
00015 #include <sys/stat.h>
00016 #include <unistd.h>
00017 #include <errno.h>
00018 #include <ctype.h>
00019 #include <math.h>
00020
00021
00022
00023
00024
00025
00026
00027
00032 typedef struct Hash_node {
00033 char *str;
00034 int len_str;
00035 int count_occ;
00036 int codeword;
00037 int cw_len;
00038 struct Hash_node *next;
00039 } Hash_node;
00040
00041 typedef Hash_node **Hash_nodeptr_array;
00042
00047 typedef struct {
00048 int size;
00049 int card;
00050 Hash_nodeptr_array table;
00051 } HHash_table;
00052
00053
00054
00061 typedef struct {
00062 char *content;
00063 int length;
00064 int num_tokens;
00065 int *start_pos;
00066 } Dictionary;
00067
00068
00069
00071 typedef struct {
00072 int firstcw[5];
00073 int offsetcw[5];
00074 int max_cwlen;
00075 int num_tokens;
00076 } Canonical;
00077
00078
00079
00081 typedef struct {
00082 int number;
00083 int *text_offsets;
00084 int *ctext_offsets;
00085 } Jumpers;
00086
00087
00089 typedef struct {
00090 HHash_table hashtable;
00091 Dictionary dictionary;
00092 Canonical canoinfo;
00093 Jumpers jumpers;
00094 } Console;
00095
00096
00097
00098
00099
00100
00101
00102
00103 void HParse_text(char *text, int text_len, HHash_table *ht);
00104
00105 void Huffw_compress(char *text, int tlen, int jump_value, char **cstring, int *clen, int Verbose);
00106 void Huffw_decompress(char *cstring, int clen, char **text, int *tlen, int Verbose);
00107 void Huffw_PrintInfo(Console *console, int ctext_len, int text_len, int cstring_len,
00108 int rule, int Verbose);
00109
00110 void Huffw_spaceless_compress(char *text, int tlen, char **cstring, int *clen, int Verbose);
00111 void Huffw_spaceless_decompress(char *cstring, int clen, char **text, int *tlen, int Verbose);
00112
00113
00114 void HCompress_getBody(char *text, int text_len, HHash_table *ht, char **ctext, int *ctext_len);
00115 void HCompress_getBodyAndConsole(char *text, int tlen, int jump_value, char **body, int *blen,
00116 Console *console);
00117 void HCompress_getString(char **ctext, int *ctext_len, int text_len,char *body,
00118 int blen, Console *console);
00119 void HCompress_contractSpaces(char *body, int blen, char **body_spaceless, int *blen_spaceless,
00120 Console *console);
00121
00122
00123 void HDecompress_getBodyAndConsole(char *ctext, int ctext_len, int *text_len, char **body,
00124 int *blen, Console *console);
00125 void HDecompress_expandSpaces(char *body_spaceless, int blen_spaceless, char **body,
00126 int *blen, Console *console);
00127 void HDecompress_nextBlock_bytes(char *ctext, int num_bytes, char **text, int *text_len,
00128 Console *console);
00129 void HDecompress_nextBlock_bytes_spaceless(char *ctext, int num_bytes, char **text,
00130 int *text_len, Console *console);
00131 void HDecompress_previousBlock_bytes(char *ctext, int num_bytes, char **text, int *text_len,
00132 Console *console);
00133 void HDecompress_previousBlock_bytes_spaceless(char *ctext, int num_bytes, char **text,
00134 int *text_len, Console *console);
00135 void HDecompress_nextBlock_tokens(char *ctext, int byte_left, int num_obj, char **text,
00136 int *text_len, int *decoded, Console *console);
00137 void HDecompress_nextBlock_tokens_spaceless(char *ctext, int bytes_left, int num_obj,
00138 char **text, int *text_len, int *decoded,
00139 Console *console);
00140 void HDecompress_previousBlock_tokens(char *ctext, int byte_left, int num_obj, char **text,
00141 int *text_len, int *decoded, Console *console);
00142 void HDecompress_previousBlock_tokens_spaceless(char *ctext, int bytes_left, int num_obj,
00143 char **text, int *text_len, int *decoded,
00144 Console *console);
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 void HHashtable_init(HHash_table *ht, int estimated);
00155 int HHashtable_func(char *s, int len, HHash_table *ht);
00156 Hash_node *HHashtable_search(char *str, int len, HHash_table *ht);
00157 int HHashtable_insert(char *str, int str_len, HHash_table *ht);
00158 void HHashtable_clear(HHash_table *ht);
00159 void HHashtable_fromdict(Dictionary *dict, Canonical *cano, HHash_table *ht);
00160 void HHashtable_print(HHash_table *ht);
00161
00162
00163
00164
00165
00166
00167 void Hufftree_createLeaves(Hash_nodeptr_array tree, int num_leaves, HHash_table *ht);
00168 int Hufftree_fromLeaves(Hash_nodeptr_array work_area, int *tot_nodes, int HT_leaves);
00169 int Hufftree_computeCwLen(Hash_nodeptr_array tree, int root, int tree_size);
00170 void Hufftree_build(Hash_nodeptr_array *tree_array_ptr, HHash_table *ht);
00171 void Hufftree_print(Canonical *cano, Hash_nodeptr_array tree_array, int Verbose);
00172
00173
00174
00175
00176
00177
00178
00179 void HCodeword_PlainFromTagged(int taggedcw, int taggedcwlen,int *plaincw, int *plaincwlen);
00180 void HCodeword_PlainFromTokenrank(int token_rank,Canonical *cano,int *plaincw,int *plaincwlen);
00181 void HCodeword_TaggedFromPlain(int plaincw, int plaincwlen,int *taggedcw, int *taggedcwlen);
00182 void HCodeword_TaggedFromTokenrank(int token_rank,Canonical *cano,int *taggedcw, int *taggedcwlen);
00183 void HCodeword_TaggedFromToken(char *token, int token_len, HHash_table *ht,int *taggedcw, int *taggedcwlen);
00184 void HCodeword_TaggedGetNext(char *s, int num_byte_left, int *taggedcw, int *lencw);
00185 int HCodeword_TaggedGetNextLength(char *s, int num_byte_left);
00186 void HCodeword_TaggedGetPrevious(char *s, int num_byte_left, int *taggedcw, int *lencw);
00187 void HCodeword_tostring(int taggedcw, int taggedcwlen, char **s);
00188
00189 void HToken_RankFromPlainCw(int plaincw, int plaincwlen, Canonical *cano,int *token_rank);
00190 void HToken_RankFromTaggedCw(int taggedcw, int taggedcwlen, Canonical *cano,int *token_rank);
00191 void HToken_fromTaggedCw(int taggedcw, int taggedcwlen,Canonical *cano, Dictionary *dict,
00192 char **token, int *token_len);
00193 int HToken_decompressNext(char *s, int num_byte_left, char **token, int *lentoken, int *lencw,
00194 Console *console);
00195 int HToken_decompressPrevious(char *s, int num_byte_left, char **token, int *lentoken, int *lencw,
00196 Console *console);
00197
00198 void HToken_getLengthNext(char *s, int num_char_left, int *len);
00199
00200
00201
00202
00203
00204
00205 void HDictionary_fromstring(char *s, int slen, int stokens, Dictionary *dict);
00206 void HDictionary_fromtree(Hash_nodeptr_array tree, int num_tokens, Dictionary *dict);
00207 void HDictionary_print(Dictionary *dict, HHash_table *ht, int Verbose);
00208
00209
00210
00211
00212
00213 void HJumpers_fromds(char *text, int tlen, HHash_table *ht,int jump_value, Jumpers *jumpers);
00214 void HJumpers_tostring(char *s, Jumpers *jumpers);
00215 void HJumpers_fromstring(char *s, Jumpers *jumpers);
00216 void HGet_charpos_from_bytepos(char *ctext, int ctext_len, int bytepos, int *textpos, Console *console);
00217 void HGet_bytepos_from_charpos(char *ctext, int ctext_len, int textpos, int *bytepos, Console *console);
00218
00219
00220
00221
00222
00223
00224 void HCanonical_fromtree(Hash_nodeptr_array tree_array, Canonical *cano, HHash_table *ht);
00225 void HCanonical_fromstring(char *s, Canonical *cano);
00226 void HCanonical_tostring(char *s, Canonical *cano);
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00238 void HInt_tostring(char *s, int i);
00239
00240
00242 int HInt_fromstring(char *s);
00243
00245 void HPrint_string(char *s, int l);
00246
00247
00248 int HSort_for_freq(const void *va, const void *vb);
00249 int HSort_for_cwlen(const void *va, const void *vb);
00250
00251