Context algorithm
Semi-predictive context algorithm implementation
 All Data Structures Files Functions Variables Typedefs Macros Pages
gammaFunc.c
Go to the documentation of this file.
1 /* Copyright 2013 Jorge Merlino
2 
3  This file is part of Context.
4 
5  Context is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  Context is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with Context. If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include "gammaTable.h"
20 #include "gammaFunc.h"
21 #include "debug.h"
22 #include "reset.h"
23 #include <math.h> /* for log */
24 
25 #ifdef DEBUG
26 
28 static int gammaHits=0;
29 
31 static int gammaMisses=0;
32 
33 #endif
34 
36 #define LNPI_2 M_LNPI / 2
37 
39 static double alphasizeLog = 0;
40 
42 static double alphaEntropy = 0;
43 
49 static double evalLogGamma(int n)
50 {
51  int index = n-2;
52 
53  if (index >= 0 && index < TBL_SIZE) {
54  DEBUGCODE(gammaHits++);
55  return logGammaTbl[index];
56  }
57  else if (n == 1) {
58  DEBUGCODE(gammaHits++);
59  return 0;
60  }
61  else {
62  DEBUGCODE(gammaMisses++);
63  return gsl_sf_lngamma(n);
64  }
65 }
66 
67 
73 static double evalLogGamma2(int n)
74 {
75  int index = n-1;
76 
77  if (index >= 0 && index < TBL_SIZE) {
78  DEBUGCODE(gammaHits++);
79  return logGammaTbl2[index];
80  }
81  else if (n == 0) {
82  DEBUGCODE(gammaHits++);
83  return LNPI_2; /* Log (gamma (1/2)) */
84  }
85  else {
86  DEBUGCODE(gammaMisses++);
87  return gsl_sf_lngamma(n + 0.5);
88  }
89 }
90 
91 
96 double kt (statistics_t stats) {
97  Uint i, ns = 0, alpha_2 = alphasize / 2; /* integer division */
98  double sum = 0, gamma1, gamma2, gamma3;
99 
100  for(i=0; i<alphasize; i++) {
101  sum += evalLogGamma2(stats->count[i]);
102  ns += stats->count[i];
103  }
104 
105  if (alphasize % 2 == 0) {
106  gamma1 = evalLogGamma(ns + alpha_2);
107  gamma2 = alpha_2 * M_LNPI;
108  gamma3 = evalLogGamma(alpha_2);
109  }
110  else {
111  gamma1 = evalLogGamma2(ns + alpha_2);
112  gamma2 = (alpha_2 + 0.5) * M_LNPI;
113  gamma3 = evalLogGamma2(alpha_2);
114  }
115 
116  return (gamma1 + gamma2 - gamma3 - sum) / M_LN2;
117 }
118 
123 double nodeCost (statistics_t stats) {
124  Uint ns = 0, i;
125  double sum = 0;
126  double loghalf = -0.6931471805599452862;
127  double x;
128 
129  for (i=0; i<stats->symbolCount; i++) {
130  ns += stats->count[stats->symbols[i]];
131  if (stats->count[stats->symbols[i]] > 0) {
132  sum += evalLogGamma2(stats->count[stats->symbols[i]]-1);
133  }
134  }
135  x = (evalLogGamma(ns) - ((stats->symbolCount - 1) * loghalf) - evalLogGamma(stats->symbolCount) - sum + (stats->symbolCount * 0.5 * M_LNPI)) / M_LN2;
136  return x;
137 }
138 
143 double escapeCost (statistics_t stats, Uint * distinct) {
144  Uint ns = 0, i;
145  double sum = 0;
146  double loghalf = -0.6931471805599452862;
147 
148  for (i=0; i<stats->symbolCount; i++) {
149  ns += distinct[stats->symbols[i]];
150  if (distinct[stats->symbols[i]] > 0) {
151  sum += evalLogGamma2(distinct[stats->symbols[i]]-1);
152  }
153  }
154  return (evalLogGamma(ns) - ((stats->symbolCount - 1) * loghalf) - evalLogGamma(stats->symbolCount) - sum + (stats->symbolCount * 0.5 * M_LNPI)) / M_LN2;
155 }
156 
160 double log2Alpha () {
161  if (alphasizeLog == 0) {
162  alphasizeLog = gsl_sf_log(alphasize) / M_LN2;
163  }
164  return alphasizeLog;
165 }
166 
167 
171 double hAlpha () {
172  if (alphaEntropy == 0) {
173  double invAlpha = (double)1 / alphasize;
174  alphaEntropy = (invAlpha * log2Alpha()) - ((1 - invAlpha) * gsl_sf_log(1 - invAlpha) / M_LN2);
175  DEBUGCODE(printf(">>>> %e\n", alphaEntropy));
176  }
177  return alphaEntropy;
178 }
179 
180 #ifdef DEBUG
181 
185 int getHits() {
186  return gammaHits;
187 }
188 
189 
193 int getMisses() {
194  return gammaMisses;
195 }
196 
197 #endif
static double alphasizeLog
Log2 of the alphabet size.
Definition: gammaFunc.c:39
Uint * count
List with the number of occurrences of each character in this state.
Definition: statistics.h:26
double escapeCost(statistics_t stats, Uint *distinct)
Calculates the cost of escapes in a node.
Definition: gammaFunc.c:143
double kt(statistics_t stats)
Calculates the Krichevsky-Trofimov probability assignment.
Definition: gammaFunc.c:96
static double evalLogGamma(int n)
Calculates Ln (gamma(n)).
Definition: gammaFunc.c:49
double hAlpha()
Returns the binary entropy of 1/alphasize.
Definition: gammaFunc.c:171
double log2Alpha()
Returns the log2 of the alphabet size.
Definition: gammaFunc.c:160
Uint alphasize
Alphabet size.
Definition: alpha.h:25
unsigned long Uint
Unsigned int type.
Definition: types.h:54
Uint symbolCount
Number of occuring symbols.
Definition: statistics.h:28
#define TBL_SIZE
Size of the lookup tables.
Definition: gammaTable.h:23
#define LNPI_2
Ln of PI/2.
Definition: gammaFunc.c:36
static double alphaEntropy
Binary entropy of 1/alphasize.
Definition: gammaFunc.c:42
double logGammaTbl2[TBL_SIZE]
logGammaTbl[i] = ln(gamma(i + 1 + 1/2)).
Definition: gammaTable.h:2030
static double evalLogGamma2(int n)
Calculates Ln (gamma(n + 1/2)).
Definition: gammaFunc.c:73
Structure that saves statistics for each tree node in the encoder.
Definition: statistics.h:25
#define DEBUGCODE(S)
Definition: debug.h:32
Uchar * symbols
List of occuring symbols.
Definition: statistics.h:27
double logGammaTbl[TBL_SIZE]
logGammaTbl[i] = ln(gamma(i+2)).
Definition: gammaTable.h:26
double nodeCost(statistics_t stats)
Calculates the cost of a node.
Definition: gammaFunc.c:123