Context algorithm
Semi-predictive context algorithm implementation
|
#include <assert.h>
#include "fsmTree.h"
#include "alpha.h"
#include "text.h"
#include "spacedef.h"
#include "debug.h"
#include "arithmetic/coder.h"
#include "arithmetic/bitio.h"
Go to the source code of this file.
Macros | |
#define | ROOT -1 |
Flag to indicate a node is the root. More... | |
#define | obstack_chunk_alloc malloc |
#define | obstack_chunk_free free |
Functions | |
static void | fastCanonize (fsmTree_t tree, Uint xLeft, Uint xRight, fsmTree_t *r, Uint *uLeft, Uint *uRight, Uint *vLeft, Uint *vRight) |
Calculates the canonical decomposition of a string in a faster way. More... | |
static void | canonize (fsmTree_t tree, Uint xLeft, Uint xRight, fsmTree_t *r, Uint *uLeft, Uint *uRight, Uint *vLeft, Uint *vRight) |
Calculates the canonical decomposition of a string. More... | |
static fsmTree_t | insert (fsmTree_t r, Uint uLeft, Uint uRight, Uint vLeft, Uint vRight) |
Inserts new nodes in the FSM closure of the tree. More... | |
static void | verify (const fsmTree_t root, fsmTree_t node, BOOL fast) |
Verifies that the tail of root is in the tree, adding it if necessary and continuing recursively on the children of root. More... | |
static void | propagateTransitions (fsmTree_t *transitions, fsmTree_t tree) |
Adds to the FSM a set of transitions originating from tree if they were not defined by verify. More... | |
static BOOL | writeEncoder (BOOL internal, FILE *file) |
Auxiliary function to write a tree node to a file using an arithmetic encoder. More... | |
static BOOL | writeFsmTreeRec (const fsmTree_t tree, Uint offset, FILE *file) |
Auxiliary recursive function to write a tree to a file using an arithemtic encoder. More... | |
static Ushort | getInternalNodeCount (const fsmTree_t tree, const Uint offset) |
Counts the number of internal nodes in a full tree. More... | |
static void | copyStatisticsRec (const fsmTree_t orig, const int offsetOrig, fsmTree_t dest, const int offsetDest, const Uchar *text2) |
fsmTree_t | initFsmTree () |
Creates and initializes a new fsm tree structure instance. More... | |
void | freeFsmTree (fsmTree_t tree) |
Deletes a fsm tree structure instance. More... | |
void | addSymbol (fsmTree_t tree, const Uchar sym) |
Adds a new symbol to this tree node. More... | |
void | makeFsm (fsmTree_t tree) |
Calculates the FSM closure of this tree. More... | |
void | writeFsmTree (const fsmTree_t tree, FILE *file) |
Writes this tree into a file. More... | |
BOOL | isRootFsmTree (const fsmTree_t tree) |
Indicates if the parameter node is the root of the tree. More... | |
Uint | getHeight (const fsmTree_t tree) |
void | printContext (fsmTree_t tree) |
static int | compareTreesRec (const fsmTree_t treeA, const fsmTree_t treeB, int level) |
void | compareTrees (const fsmTree_t treeA, const fsmTree_t treeB) |
void | copyStatistics (const fsmTree_t orig, fsmTree_t dest, const Uchar *text2) |
Variables | |
static short | pos |
Current position in the buffer. More... | |
static Ushort | internalNodes |
Number of internal nodes in the tree. More... | |
static Uint | totalNodes |
Total number of nodes in the tree. More... | |
|
static |
Calculates the canonical decomposition of a string.
[in] | tree | node from where to start the search. |
[in] | xLeft | left index in the input data of the string to canonize. |
[in] | xRight | right index in the input data of the string to canonize. |
[out] | r | node whose label is the longest prefix of the string in the tree. |
[out] | uLeft | index of the leftmost character of the string u in the input data. ru is the longest prefix of the string which is a word of the tree. |
[out] | uRight | index of the rightmost character of the string u in the input data. ru is the longest prefix of the string which is a word of the tree. |
[out] | vLeft | index of the leftmost character of the remaining part of the string (after ru). |
[out] | vRight | index of the rightmost character of the remaining part of the string (after ru). |
|
static |
Calculates the canonical decomposition of a string in a faster way.
It is only possible to use this variant in some special cases.
[in] | tree | node from where to start the search. |
[in] | xLeft | left index in the input data of the string to canonize. |
[in] | xRight | right index in the input data of the string to canonize. |
[out] | r | node whose label is the longest prefix of the string in the tree. |
[out] | uLeft | index of the leftmost character of the string v in the input data. ru is the longest prefix of the string which is a word of the tree. |
[out] | uRight | index of the leftmost character of the string v in the input data. ru is the longest prefix of the string which is a word of the tree. |
[out] | vLeft | index of the leftmost character of the remaining part of the string (after ru). |
[out] | vRight | index of the rightmost character of the remaining part of the string (after ru). |
void freeFsmTree | ( | fsmTree_t | tree | ) |
Counts the number of internal nodes in a full tree.
If it is not full counts also the nodes needed to make it full.
[in] | tree | tree to count nodes from. |
[in] | offset | index of the node label string current being processed. This is needed in order to simulate a full tree. |
fsmTree_t initFsmTree | ( | ) |
Inserts new nodes in the FSM closure of the tree.
Inserts nodes ru and ruv doing edge splits if necessary.
[in] | r | parent of the new node to be added. |
[in] | uLeft | index of the leftmost character of the u string. |
[in] | uRight | index of the rightmost character of the u string. |
[in] | vLeft | index of the leftmost character of the v string. |
[in] | vRight | index of the rightmost character of the v string. |
void makeFsm | ( | fsmTree_t | tree | ) |
Verifies that the tail of root is in the tree, adding it if necessary and continuing recursively on the children of root.
[in] | root | node to use as the start point in the search of canonize. |
[in] | node | node to verify. |
[in] | fast | flag to indicate that it is possible to use fastCanonize in this invocation of verify. |
Auxiliary function to write a tree node to a file using an arithmetic encoder.
[in] | internal | flag indicating if the node to write is internal or a leaf. |
[in] | file | file where the tree is written. |
void writeFsmTree | ( | const fsmTree_t | tree, |
FILE * | file | ||
) |
Auxiliary recursive function to write a tree to a file using an arithemtic encoder.
[in] | tree | node to write. |
[in] | offset | index of the node label string current being processed. This is needed in order to write a full tree. |
[in] | file | file where the tree is written. |
|
static |