Context algorithm
Semi-predictive context algorithm implementation
 All Data Structures Files Functions Variables Typedefs Macros Pages
decoder.c
Go to the documentation of this file.
1 /* Copyright 2013 Jorge Merlino
2 
3  This file is part of Context.
4 
5  Context is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  Context is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with Context. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <assert.h>
22 #include <math.h>
23 #include "decoder.h"
24 #include "debug.h"
25 #include "alpha.h"
26 #include "spacedef.h"
27 #include "stack.h"
28 #include "see.h"
29 #include "reset.h"
30 #include "arithmetic/coder.h"
31 #include "arithmetic/bitio.h"
32 
40 static void fixParents (decoderTree_t tree, BOOL *deletedChars) {
41  Uint i; /*, numEscapes;*/
42  decoderTree_t parTree = tree;
43  BOOL newChars;
44 
45  while (!isRootDecoderTree(parTree)) {
46  parTree = parTree->parent->origin;
47  newChars = False;
48 
49  for (i=0; i<parTree->totalSyms && !newChars; i++) {
50  newChars |= parTree->count[i] > 1;
51  }
52 
53  if (!newChars) { /* there are no new symbols */
54  DEBUGCODE(printf("fixing %p\n", (void *)parTree));
55  parTree->totalCount = 0;
56  for (i=0; i<parTree->totalSyms; i++) {
57  if (deletedChars[alphaindex[parTree->symbols[i]]]) {
58  parTree->count[i] = 0;
59  }
60  else {
61  parTree->totalCount += parTree->count[i];
62  }
63  }
64 
65  parTree->totalCount *= 2;
66  }
67  else break;
68  }
69 }
70 
71 static void rescale (decoderTree_t tree) {
72  BOOL *charFlags, needToFix = False;
73  Uint numEscapes, i;
74  DEBUGCODE(printf("rescalo %p\n", (void *)tree));
75 
76  numEscapes = tree->totalCount;
77  tree->totalCount = 0;
78  CALLOC(charFlags, BOOL, alphasize);
79 
80  for (i=0; i<tree->totalSyms; i++) {
81  if (tree->count[i] > 0) {
82  numEscapes -= tree->count[i];
83  tree->count[i] = tree->count[i] >> 1;
84  if (tree->count[i] == 0) {
85  charFlags[alphaindex[tree->symbols[i]]] = True;
86  needToFix = True;
87  }
88  else {
89  tree->totalCount += tree->count[i];
90  }
91  }
92  }
93 
94  if (needToFix) {
95  fixParents(tree, charFlags);
96  }
97  FREE(charFlags);
98  numEscapes = numEscapes >> 1;
99  if (numEscapes == 0) numEscapes = 1;
100  tree->totalCount += numEscapes;
101 }
102 
103 #ifdef DEBUG
104 
105 static void printStats (decoderTree_t origTree, Uint *maskedChars, Uint i) {
106  int j;
107 
108  printf ("## ");
109  for(j=0; j<origTree->totalSyms; j++) {
110  printf("count[%d] = %ld %s", origTree->symbols[j], origTree->count[j],
111  (maskedChars[alphaindex[origTree->symbols[j]]] == i+1 ? "(masked) " : ""));
112  }
113  printf("Total: %ld %ld (%p)\n", origTree->totalCount, origTree->totalSyms, (void *)origTree);
114 }
115 
116 #endif
117 
118 static void addNodes (Uchar * text, Uchar sym, decoderTree_t * tree, decoderTree_t * prevTree, Uint i, Uint * zPrevLeft, Uint * zPrevRight) {
119  decoderTree_t sNext, child = NULL, newChild, new, newLeaf;
120  Uint zLeft = 1, zRight = 0, uSize, uLeft, zNextLeft, j, k, b;
121  BOOL end;
122 
123  text[i] = sym;
124  sNext = (*tree)->transitions[alphaindex[sym]];
125  new = NULL;
126 
127  /* calculo u */
128  if (!isRootDecoderTree(sNext) && sNext->right >= (*tree)->right + 1) {
129  /* u is empty */
130  if (*zPrevLeft <= *zPrevRight) {
131  child = sNext->children[alphaindex[(*(*prevTree)->text)[*zPrevLeft]]];
132  if (child) {
133  *zPrevLeft = child->left;
134  zLeft = child->left;
135  zRight = child->left + *zPrevRight - *zPrevLeft + 1;
136  }
137  else {
138  zLeft = 1;
139  zRight = 0;
140  }
141  }
142  else {
143  zLeft = 1;
144  zRight = 0;
145  }
146  }
147  else {
148  if (isRootDecoderTree(sNext)) {
149  uSize = (*tree)->right + 1;
150  uLeft = 0;
151  child = sNext->children[alphaindex[sym]];
152  }
153  else {
154  uSize = (*tree)->right - sNext->right;
155  uLeft = sNext->right + 1; /* points to tree->text */
156  child = sNext->children[alphaindex[(*(*tree)->text)[sNext->right]]];
157  }
158 
159  if (child) {
160  j = 0;
161  end = False;
162  zNextLeft = child->left; /* will be zPrevLeft when the do-while loop ends */
163  do {
164  for (k=child->left+1; j < uSize && k <= child->right && (*(*tree)->text)[uLeft+j] == (*child->text)[k]; j++, k++);
165  if (j == uSize) { /* all u belongs to WORD T */
166  if (k > child->right) { /* sz already is a node = child */
167  new = child;
168  }
169  else {
170  if ((*zPrevLeft <= *zPrevRight) &&
171  ((*(*prevTree)->text)[*zPrevLeft] == (*child->text)[k])) { /* head z belongs to WORD T */
172  if (k == child->right) {
173  new = child;
174  }
175  else {
176  zLeft = child->left;
177  zRight = k + *zPrevRight - *zPrevLeft;
178  }
179  }
180  else {
181  zLeft = child->left;
182  zRight = k - 1;
183  }
184  }
185  end = True;
186  }
187  else {
188  if (k > child->right) { /* standing in a node */
189  newChild = child->children[alphaindex[(*(*tree)->text)[uLeft+j]]];
190  if (newChild) {
191  sNext = child;
192  child = newChild;
193  j++;
194  }
195  else {
196  new = child;
197  end = True;
198  }
199  }
200  else { /* fell off the tree -> found z */
201  zLeft = child->left;
202  zRight = k - 1;
203  end = True;
204  }
205  }
206  } while (!end);
207  *zPrevLeft = zNextLeft;
208  }
209  else { /* z empty */
210  zLeft = 1;
211  zRight = 0;
212  }
213  }
214 
215  /* insert z if necessary */
216  if (new == NULL) {
217  if (zLeft <= zRight) {
218  new = initDecoderTree(False);
219  DEBUGCODE(printf("New node4: %p\n", (void *)new));
220  new->internal = child->internal;
221  new->internalFSM = child->internalFSM;
222  new->left = (!isRootDecoderTree(sNext) ? sNext->right + 1 : 0);
223  new->right = new->left + zRight - zLeft;
224 
225  new->children[alphaindex[(*child->text)[new->right+1]]] = child;
226  child->left = new->right + 1;
227  child->parent = new;
228  new->text = child->text;
229 
230  new->parent = sNext;
231  if (isRootDecoderTree(sNext)) {
232  sNext->children[alphaindex[sym]] = new;
233  }
234  else {
235  sNext->children[alphaindex[(*(*tree)->text)[sNext->right]]] = new;
236  }
237 
238  new->totalSyms = child->totalSyms;
239  new->totalCount = 0;
240 
241  memcpy(new->transitions, sNext->transitions, alphasize * sizeof(decoderTree_t));
242  for (j=0; j < new->totalSyms; j++) {
243  new->count[j] = (child->count[j] == 0 ? 0 : 1);
244  new->totalCount += new->count[j];
245  new->symbols[j] = child->symbols[j];
246  }
247  new->totalCount *= 2; /* symbols and escapes */
248 
249  if (sNext->internal) {
250  new->origin = new;
251  }
252  else {
253  new->origin = child->origin;
254  }
255 
256  verifyDecoder((isRootDecoderTree(sNext) ? sNext : sNext->tail), new);
257  }
258  else {
259  new = sNext;
260  *zPrevLeft = new->right + 1; /* empty */
261  }
262  }
263 
264  *prevTree = new;
265  *zPrevRight = new->right;
266 
267  /* get b */
268  b = -1; /* lambda */
269  if (new->internalFSM) {
270  if (!isRootDecoderTree(new) && new->right < i) {
271  b = text[i - new->right - 1];
272  }
273  else if (isRootDecoderTree(new)) {
274  b = text[i];
275  }
276  }
277 
278  if (b != -1) {
279  /* insert b */
280  newLeaf = initDecoderTree(False);
281  DEBUGCODE(printf("New node5: %p\n", (void *)newLeaf));
282  newLeaf->internal = False;
283  newLeaf->internalFSM = False;
284  newLeaf->left = newLeaf->right = new->right + 1;
285  newLeaf->parent = new;
286  new->children[alphaindex[b]] = newLeaf;
287 
288  MALLOC(newLeaf->text, Uchar *, 1);
289  MALLOC(*newLeaf->text,Uchar, newLeaf->right + 1);
290  if (newLeaf->right > 0) {
291  memcpy(*newLeaf->text, *new->text, newLeaf->left);
292  }
293  (*newLeaf->text)[newLeaf->left] = b;
294 
295  memcpy(newLeaf->transitions, new->transitions, alphasize * sizeof(decoderTree_t));
296 
297  if (new->internal) {
298  newLeaf->origin = newLeaf;
299  }
300  else {
301  newLeaf->origin = new->origin;
302  }
303 
304  verifyDecoder((isRootDecoderTree(new) ? new : new->tail), newLeaf);
305  *tree = newLeaf;
306  }
307  else {
308  *tree = new;
309  }
310 }
311 
312 void decode (decoderTree_t tree, const Uint textlen, FILE *compressedFile, FILE *output, const BOOL useSee) {
313  Uint i, j, k, length, count, numMasked, allCount, low, *maskedChars, state;
314  Uint zPrevLeft = 1, zPrevRight = 0;
315  Uchar sym = 0;
316  SYMBOL s;
317  decoderTree_t origTree, prevTree = NULL;
318  BOOL found, escape;
319  Uchar * text;
320 
321  printf("MAX_COUNT: %ld\n", MAX_COUNT);
322  MALLOC(text, Uchar, textlen);
323  CALLOC(maskedChars, Uint, alphasize);
324 
325  if (useSee) {
326  initSee();
327  }
328 
329  for (i=0; i<textlen; i++) {
330  origTree = tree->origin;
331  numMasked = 0;
332  DEBUGCODE(printf("index: %ld\n", i));
333 
334  if (origTree->totalCount > MAX_COUNT && origTree->used) {
335  rescale (origTree);
336  }
337 
338  DEBUGCODE(printStats(origTree, maskedChars, i));
339  length = 0;
340  found = False;
341  do {
342  escape = False;
343  if (origTree->totalSyms > numMasked && origTree->totalCount > 0) { /* found a parent with more data */
344  low = allCount = 0;
345  for (j=0; j < origTree->totalSyms; j++) {
346  allCount += origTree->count[j];
347  if (maskedChars[alphaindex[origTree->symbols[j]]] != i+1) {
348  low += origTree->count[j];
349  }
350  }
351 
352  if (low > 0 && useSee) {
353  state = getSeeStateDecoder(origTree, allCount, i, numMasked, text, alphasize);
354  if (state != -1) {
355  s.scale = See[state][1];
356  count = get_current_count(&s);
357  if (count < See[state][0]) { /* escape */
358  s.low_count = 0;
359  s.high_count = See[state][0];
360  DEBUGCODE(printf("--scale: %d diff: %d symbol: %d\n", s.scale, s.high_count - s.low_count, -1));
361  remove_symbol_from_stream(compressedFile, &s);
362  escape = True;
363 
364  for(j=0; j < origTree->totalSyms; j++) {
365  if (origTree->count[j] > 0 && maskedChars[alphaindex[origTree->symbols[j]]] != i+1) {
366  maskedChars[alphaindex[origTree->symbols[j]]] = i+1;
367  numMasked++;
368  }
369  }
370  }
371  else {
372  s.low_count = See[state][0];
373  s.high_count = See[state][1];
374  DEBUGCODE(printf("--scale: %d diff: %d symbol: %d\n", s.scale, s.high_count - s.low_count, -2));
375  remove_symbol_from_stream(compressedFile, &s);
376  s.scale = low;
377  }
378 
379  updateSee(state, escape, alphasize);
380  }
381  else {
382  s.scale = low + origTree->totalCount - allCount; /* low + escapes */
383  }
384  }
385  else {
386  s.scale = low + origTree->totalCount - allCount; /* low + escapes */
387  }
388 
389  if (!escape) {
390  count = get_current_count(&s);
391 
392  for (j=0; maskedChars[alphaindex[origTree->symbols[j]]] == i+1; j++);
393  s.high_count = origTree->count[j];
394  for (j++; (j < origTree->totalSyms) && (s.high_count <= count); j++) {
395  if (maskedChars[alphaindex[origTree->symbols[j]]] != i+1) {
396  s.high_count += origTree->count[j];
397  }
398  }
399 
400  if ((j == origTree->totalSyms) && (s.high_count <= count)) { /* not found */
401  s.low_count = s.high_count;
402  s.high_count = s.scale;
403  DEBUGCODE(printf("--scale: %d diff: %d symbol: %d\n", s.scale, s.high_count - s.low_count, -1));
404 
405  for(j--; j != -1; j--) {
406  if (origTree->count[j] > 0 && maskedChars[alphaindex[origTree->symbols[j]]] != i+1) {
407  maskedChars[alphaindex[origTree->symbols[j]]] = i+1;
408  numMasked++;
409  }
410  }
411  }
412  else { /* found */
413  found = True;
414  j--;
415  s.low_count = s.high_count - origTree->count[j] ;
416  sym = origTree->symbols[j];
417  putc(sym, output);
418  origTree->used = True;
419  DEBUGCODE(printf("++scale: %d diff: %d symbol: %d\n", s.scale, s.high_count - s.low_count, sym));
420  origTree->totalCount+=2;
421  origTree->count[j]+=2;
422  }
423  remove_symbol_from_stream(compressedFile, &s);
424  }
425  }
426  else {
427  DEBUGCODE(printf("--escape\n"));
428  }
429 
430  if (isRootDecoderTree(origTree) && !found) {
431  found = True;
432  for (j=0, count=0; j<alphasize; j++) {
433  if (maskedChars[j] != i+1) {
434  count++;
435  }
436  }
437  s.scale = count;
438  count = get_current_count(&s);
439 
440  s.high_count=0;
441  for (j=0; (j < alphasize) && (s.high_count <= count); j++) {
442  if (maskedChars[j] != i+1) {
443  s.high_count ++;
444  }
445  }
446  s.low_count = s.high_count - 1;
447  sym = characters[j-1];
448  remove_symbol_from_stream(compressedFile, &s);
449  putc(sym, output);
450 
451  origTree->totalCount += 2; /* symbol and escape */
452 
453  for (k=0; (k < origTree->totalSyms) && (origTree->symbols[k] != sym); k++);
454  if (k == origTree->totalSyms) {
455  origTree->totalSyms++;
456  }
457  origTree->count[k] = 1;
458  origTree->symbols[k] = sym;
459 
460  DEBUGCODE(printf("++scale: %d diff: %d symbol: %d\n", s.scale, s.high_count - s.low_count, sym));
461  }
462 
463  if (!found) {
464  origTree = origTree->parent->origin;
465  length++;
466 
467  if (origTree->totalCount > MAX_COUNT && origTree->used) {
468  rescale (origTree);
469  }
470  DEBUGCODE(printStats(origTree, maskedChars, i));
471 
472  /* search for a parent with more information */
473  while (!isRootDecoderTree(origTree) && origTree->totalSyms == numMasked) {
474  DEBUGCODE(printf("--escape\n"));
475  origTree = origTree->parent->origin;
476  length++;
477 
478  if (origTree->totalCount > MAX_COUNT && origTree->used) {
479  rescale (origTree);
480  }
481  DEBUGCODE(printStats(origTree, maskedChars, i));
482  DEBUGCODE(printf("--escape\n"));
483  }
484  }
485  }
486  while (!found);
487 
488  origTree = tree->origin;
489  for (j=0; j<length; j++) {
490  origTree->totalCount += 2; /* symbol and escape */
491  for (k=0; (k < origTree->totalSyms) && (origTree->symbols[k] != sym); k++);
492  if (k == origTree->totalSyms) {
493  origTree->totalSyms++;
494  }
495 
496  origTree->count[k] = 1;
497  origTree->symbols[k] = sym;
498  assert(origTree->parent);
499  origTree = origTree->parent->origin;
500  }
501 
502  DEBUGCODE(printf("\n"));
503  addNodes(text, sym, &tree, &prevTree, i, &zPrevLeft, &zPrevRight);
504  } /* for */
505  FREE(maskedChars);
506  FREE(text);
507 }
struct decoderTree ** children
List of pointers to all the children of this node.
Definition: decoderTree.h:32
Uint totalCount
&lt; Total number of symbols occuring at this state.
Definition: decoderTree.h:26
Uchar ** text
&lt; Flag that indicates if this node is an internal node of the FSM closure of T(x) ...
Definition: decoderTree.h:43
Uint textlen
Length of the input text in the encoder.
Definition: text.h:23
unsigned short int scale
Definition: coder.h:24
Definition: coder.h:21
static void rescale(decoderTree_t tree)
Definition: decoder.c:71
struct decoderTree * tail
Pointer to the node whose label is the tail of this one.
Definition: decoderTree.h:32
struct decoderTree * parent
Pointer to the parent of this node.
Definition: decoderTree.h:32
#define True
True boolean constant.
Definition: types.h:108
Uint alphasize
Alphabet size.
Definition: alpha.h:25
unsigned char Uchar
Unsigned char type.
Definition: types.h:48
Uint right
Index of the rightmost character of the label of this node.
Definition: decoderTree.h:26
BOOL internalFSM
&lt; Flag that indicates if this node is an internal node of T(x)
Definition: decoderTree.h:38
struct decoderTree * origin
Pointer to the original node this one descends from.
Definition: decoderTree.h:32
Uint left
Index of the leftmost character of the label of this node.
Definition: decoderTree.h:26
Uchar * text
Input text to the encoder.
Definition: text.h:26
BOOL isRootDecoderTree(decoderTree_t tree)
Indicates if the parameter node is the root of the tree.
Definition: decoderTree.c:717
Uint totalSyms
Definition: decoderTree.h:26
#define FREE(P)
Frees a block of memory and makes the pointer NULL.
Definition: spacedef.h:58
static unsigned short int low
Definition: coder.c:27
#define CALLOC(S, T, N)
Allocs a new block of memory and sets all its bits to zero.
Definition: spacedef.h:72
unsigned long Uint
Unsigned int type.
Definition: types.h:54
#define BOOL
Boolean data type.
Definition: types.h:92
unsigned short int low_count
Definition: coder.h:22
void verifyDecoder(const decoderTree_t root, decoderTree_t node)
Verify*, only called by the decoder routine.
Definition: decoderTree.c:418
BOOL used
Flag that indicates if this node has been used to decode eny symbols.
Definition: decoderTree.h:38
Uint MAX_COUNT
Reset threshold.
Definition: reset.c:22
Uchar characters[UCHAR_MAX+1]
Characters in text in alphabetical order.
Definition: alpha.h:28
#define DEBUGCODE(S)
Definition: debug.h:32
struct decoderTree ** transitions
List of FSM transitions from this state.
Definition: decoderTree.h:32
#define False
False boolean constant.
Definition: types.h:100
#define MALLOC(S, T, N)
Definition: spacedef.h:81
Uint alphaindex[UCHAR_MAX+1]
Index of each alphabet character.
Definition: alpha.h:31
decoderTree_t initDecoderTree(BOOL useMalloc)
Creates and initializes a new decoder tree structure instance.
Definition: decoderTree.c:597
Uint See[1<< 14][2]
SEE table structure.
Definition: see.h:27
unsigned short int high_count
Definition: coder.h:23
void initSee()
Initalizes the SEE table.
Definition: see.c:139
Uint * count
Counts of the number of occurrences of each symbol in this state.
Definition: decoderTree.h:26
static void fixParents(decoderTree_t tree, BOOL *deletedChars)
Remove symbols from the statistics of the ancestors of this node in case that is necessary.
Definition: decoder.c:40
void remove_symbol_from_stream(FILE *stream, SYMBOL *s)
Definition: coder.c:161
static void addNodes(Uchar *text, Uchar sym, decoderTree_t *tree, decoderTree_t *prevTree, Uint i, Uint *zPrevLeft, Uint *zPrevRight)
Definition: decoder.c:118
BOOL internal
Definition: decoderTree.h:38
void updateSee(Uint state, BOOL escape, Uint alphasize)
Updates the SEE table.
Definition: see.c:120
Decoder context tree structure.
Definition: decoderTree.h:25
short int get_current_count(SYMBOL *s)
Definition: coder.c:124
Uchar * symbols
Definition: decoderTree.h:44
int getSeeStateDecoder(decoderTree_t tree, Uint allCount, Uint pos, Uint numMasked, const Uchar *text, Uint alphasize)
Returns the contents of the SEE table for the encoder.
Definition: see.c:82
void decode(decoderTree_t tree, const Uint textlen, FILE *compressedFile, FILE *output, const BOOL useSee)
Decodes the compressed file data to the output file (for non-binary alphabet).
Definition: decoder.c:312