Context algorithm
Semi-predictive context algorithm implementation
 All Data Structures Files Functions Variables Typedefs Macros Pages
fsmTree.c File Reference
#include <assert.h>
#include "fsmTree.h"
#include "alpha.h"
#include "text.h"
#include "spacedef.h"
#include "debug.h"
#include "arithmetic/coder.h"
#include "arithmetic/bitio.h"

Go to the source code of this file.

Macros

#define ROOT   -1
 Flag to indicate a node is the root. More...
 
#define obstack_chunk_alloc   malloc
 
#define obstack_chunk_free   free
 

Functions

static void fastCanonize (fsmTree_t tree, Uint xLeft, Uint xRight, fsmTree_t *r, Uint *uLeft, Uint *uRight, Uint *vLeft, Uint *vRight)
 Calculates the canonical decomposition of a string in a faster way. More...
 
static void canonize (fsmTree_t tree, Uint xLeft, Uint xRight, fsmTree_t *r, Uint *uLeft, Uint *uRight, Uint *vLeft, Uint *vRight)
 Calculates the canonical decomposition of a string. More...
 
static fsmTree_t insert (fsmTree_t r, Uint uLeft, Uint uRight, Uint vLeft, Uint vRight)
 Inserts new nodes in the FSM closure of the tree. More...
 
static void verify (const fsmTree_t root, fsmTree_t node, BOOL fast)
 Verifies that the tail of root is in the tree, adding it if necessary and continuing recursively on the children of root. More...
 
static void propagateTransitions (fsmTree_t *transitions, fsmTree_t tree)
 Adds to the FSM a set of transitions originating from tree if they were not defined by verify. More...
 
static BOOL writeEncoder (BOOL internal, FILE *file)
 Auxiliary function to write a tree node to a file using an arithmetic encoder. More...
 
static BOOL writeFsmTreeRec (const fsmTree_t tree, Uint offset, FILE *file)
 Auxiliary recursive function to write a tree to a file using an arithemtic encoder. More...
 
static Ushort getInternalNodeCount (const fsmTree_t tree, const Uint offset)
 Counts the number of internal nodes in a full tree. More...
 
static void copyStatisticsRec (const fsmTree_t orig, const int offsetOrig, fsmTree_t dest, const int offsetDest, const Uchar *text2)
 
fsmTree_t initFsmTree ()
 Creates and initializes a new fsm tree structure instance. More...
 
void freeFsmTree (fsmTree_t tree)
 Deletes a fsm tree structure instance. More...
 
void addSymbol (fsmTree_t tree, const Uchar sym)
 Adds a new symbol to this tree node. More...
 
void makeFsm (fsmTree_t tree)
 Calculates the FSM closure of this tree. More...
 
void writeFsmTree (const fsmTree_t tree, FILE *file)
 Writes this tree into a file. More...
 
BOOL isRootFsmTree (const fsmTree_t tree)
 Indicates if the parameter node is the root of the tree. More...
 
Uint getHeight (const fsmTree_t tree)
 
void printContext (fsmTree_t tree)
 
static int compareTreesRec (const fsmTree_t treeA, const fsmTree_t treeB, int level)
 
void compareTrees (const fsmTree_t treeA, const fsmTree_t treeB)
 
void copyStatistics (const fsmTree_t orig, fsmTree_t dest, const Uchar *text2)
 

Variables

static short pos
 Current position in the buffer. More...
 
static Ushort internalNodes
 Number of internal nodes in the tree. More...
 
static Uint totalNodes
 Total number of nodes in the tree. More...
 

Macro Definition Documentation

#define obstack_chunk_alloc   malloc

Definition at line 31 of file fsmTree.c.

#define obstack_chunk_free   free

Definition at line 32 of file fsmTree.c.

#define ROOT   -1

Flag to indicate a node is the root.

Definition at line 29 of file fsmTree.c.

Function Documentation

void addSymbol ( fsmTree_t  tree,
const Uchar  sym 
)

Adds a new symbol to this tree node.

Definition at line 543 of file fsmTree.c.

static void canonize ( fsmTree_t  tree,
Uint  xLeft,
Uint  xRight,
fsmTree_t r,
Uint uLeft,
Uint uRight,
Uint vLeft,
Uint vRight 
)
static

Calculates the canonical decomposition of a string.

Parameters
[in]treenode from where to start the search.
[in]xLeftleft index in the input data of the string to canonize.
[in]xRightright index in the input data of the string to canonize.
[out]rnode whose label is the longest prefix of the string in the tree.
[out]uLeftindex of the leftmost character of the string u in the input data. ru is the longest prefix of the string which is a word of the tree.
[out]uRightindex of the rightmost character of the string u in the input data. ru is the longest prefix of the string which is a word of the tree.
[out]vLeftindex of the leftmost character of the remaining part of the string (after ru).
[out]vRightindex of the rightmost character of the remaining part of the string (after ru).

Definition at line 105 of file fsmTree.c.

void compareTrees ( const fsmTree_t  treeA,
const fsmTree_t  treeB 
)

Definition at line 659 of file fsmTree.c.

static int compareTreesRec ( const fsmTree_t  treeA,
const fsmTree_t  treeB,
int  level 
)
static

Definition at line 637 of file fsmTree.c.

void copyStatistics ( const fsmTree_t  orig,
fsmTree_t  dest,
const Uchar text2 
)

Definition at line 664 of file fsmTree.c.

static void copyStatisticsRec ( const fsmTree_t  orig,
const int  offsetOrig,
fsmTree_t  dest,
const int  offsetDest,
const Uchar text2 
)
static

Definition at line 394 of file fsmTree.c.

static void fastCanonize ( fsmTree_t  tree,
Uint  xLeft,
Uint  xRight,
fsmTree_t r,
Uint uLeft,
Uint uRight,
Uint vLeft,
Uint vRight 
)
static

Calculates the canonical decomposition of a string in a faster way.

It is only possible to use this variant in some special cases.

Parameters
[in]treenode from where to start the search.
[in]xLeftleft index in the input data of the string to canonize.
[in]xRightright index in the input data of the string to canonize.
[out]rnode whose label is the longest prefix of the string in the tree.
[out]uLeftindex of the leftmost character of the string v in the input data. ru is the longest prefix of the string which is a word of the tree.
[out]uRightindex of the leftmost character of the string v in the input data. ru is the longest prefix of the string which is a word of the tree.
[out]vLeftindex of the leftmost character of the remaining part of the string (after ru).
[out]vRightindex of the rightmost character of the remaining part of the string (after ru).

Definition at line 56 of file fsmTree.c.

void freeFsmTree ( fsmTree_t  tree)

Deletes a fsm tree structure instance.

Parameters
[in]treetree to delete.

Definition at line 530 of file fsmTree.c.

Uint getHeight ( const fsmTree_t  tree)

Definition at line 606 of file fsmTree.c.

static Ushort getInternalNodeCount ( const fsmTree_t  tree,
const Uint  offset 
)
static

Counts the number of internal nodes in a full tree.

If it is not full counts also the nodes needed to make it full.

Parameters
[in]treetree to count nodes from.
[in]offsetindex of the node label string current being processed. This is needed in order to simulate a full tree.

Definition at line 375 of file fsmTree.c.

fsmTree_t initFsmTree ( )

Creates and initializes a new fsm tree structure instance.

Returns
a new fsm tree insance.

Definition at line 487 of file fsmTree.c.

static fsmTree_t insert ( fsmTree_t  r,
Uint  uLeft,
Uint  uRight,
Uint  vLeft,
Uint  vRight 
)
static

Inserts new nodes in the FSM closure of the tree.

Inserts nodes ru and ruv doing edge splits if necessary.

Parameters
[in]rparent of the new node to be added.
[in]uLeftindex of the leftmost character of the u string.
[in]uRightindex of the rightmost character of the u string.
[in]vLeftindex of the leftmost character of the v string.
[in]vRightindex of the rightmost character of the v string.
Returns
a pointer to the new added node.

Definition at line 159 of file fsmTree.c.

BOOL isRootFsmTree ( const fsmTree_t  tree)

Indicates if the parameter node is the root of the tree.

Parameters
[in]treetree node.
Returns
True if the node is the root, False otherwise.

Definition at line 602 of file fsmTree.c.

void makeFsm ( fsmTree_t  tree)

Calculates the FSM closure of this tree.

Parameters
[in]treetree to process.

Definition at line 553 of file fsmTree.c.

void printContext ( fsmTree_t  tree)

Definition at line 619 of file fsmTree.c.

static void propagateTransitions ( fsmTree_t transitions,
fsmTree_t  tree 
)
static

Adds to the FSM a set of transitions originating from tree if they were not defined by verify.

Parameters
[in]transitionsset of transitions.
[in]treeorigin node of the transitions.

Definition at line 264 of file fsmTree.c.

static void verify ( const fsmTree_t  root,
fsmTree_t  node,
BOOL  fast 
)
static

Verifies that the tail of root is in the tree, adding it if necessary and continuing recursively on the children of root.

Parameters
[in]rootnode to use as the start point in the search of canonize.
[in]nodenode to verify.
[in]fastflag to indicate that it is possible to use fastCanonize in this invocation of verify.

Definition at line 216 of file fsmTree.c.

static BOOL writeEncoder ( BOOL  internal,
FILE *  file 
)
static

Auxiliary function to write a tree node to a file using an arithmetic encoder.

Parameters
[in]internalflag indicating if the node to write is internal or a leaf.
[in]filefile where the tree is written.
Returns
True if there is no need to write any more data to the file.

Definition at line 286 of file fsmTree.c.

void writeFsmTree ( const fsmTree_t  tree,
FILE *  file 
)

Writes this tree into a file.

Parameters
[in]treetree to write.
[in]fileoutput file to write the data.

Definition at line 572 of file fsmTree.c.

static BOOL writeFsmTreeRec ( const fsmTree_t  tree,
Uint  offset,
FILE *  file 
)
static

Auxiliary recursive function to write a tree to a file using an arithemtic encoder.

Parameters
[in]treenode to write.
[in]offsetindex of the node label string current being processed. This is needed in order to write a full tree.
[in]filefile where the tree is written.

Definition at line 324 of file fsmTree.c.

Variable Documentation

Ushort internalNodes
static

Number of internal nodes in the tree.

Equals the number of ones in the natural code file.

Definition at line 38 of file fsmTree.c.

short pos
static

Current position in the buffer.

Definition at line 35 of file fsmTree.c.

Uint totalNodes
static

Total number of nodes in the tree.

Definition at line 41 of file fsmTree.c.