presage 0.9.1
ARPAPredictor.h
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#ifndef PRESAGE_ARPAPREDICTOR
26#define PRESAGE_ARPAPREDICTOR
27
28#include "predictor.h"
29#include "../core/logger.h"
30#include "../core/progress.h"
31#include "../core/dispatcher.h"
32
33#include <assert.h>
34#include <fstream>
35#include <iomanip>
36
37
38class cmp {
39 public:
40 bool operator() (const float& f1, const float& f2 ) const {
41 return f2 < f1;
42 }
43};
44
46{
47 public:
49 ARPAData(float lp,float la) : logProb(lp), logAlfa(la) {};
50 float logProb;
51 float logAlfa;
52};
53
55{
56 public:
57 TrigramKey(int wd1, int wd2, int wd3) : key1(wd1), key2(wd2), key3(wd3) {};
58
59 bool operator<(const TrigramKey &right) const
60 {
61 if(key1 < right.key1)
62 return true;
63
64 if(key1 == right.key1)
65 if(key2 < right.key2 )
66 return true;
67
68 if(key1 == right.key1 && key2 == right.key2)
69 if(key3 < right.key3)
70 return true;
71
72 return false;
73 }
74
75 bool operator==(const TrigramKey &right) const
76 {
77 return (key1 == right.key1 && key2 == right.key2 && key3 == right.key3 );
78 }
79 int key1;
80 int key2;
81 int key3;
82};
83
85{
86 public:
87 BigramKey(int wd1, int wd2) : key1(wd1), key2(wd2) {};
88
89 bool operator<(const BigramKey &right) const
90 {
91 if(key1 < right.key1)
92 return true;
93
94 if(key1 == right.key1)
95 if(key2 < right.key2 )
96 return true;
97
98 return false;
99 }
100
101 bool operator==(const TrigramKey &right) const
102 {
103 return (key1 == right.key1 && key2 == right.key2);
104 }
105 int key1;
106 int key2;
107};
108
112class ARPAPredictor : public Predictor, public Observer {
113
114public:
117
118 virtual Prediction predict(const size_t size, const char** filter) const;
119
120 virtual void learn(const std::vector<std::string>& change);
121
122 virtual void update (const Observable* variable);
123
124 void set_vocab_filename (const std::string& value);
125 void set_arpa_filename (const std::string& value);
126 void set_timeout (const std::string& value);
127
128private:
129 std::string LOGGER;
130 std::string ARPAFILENAME;
131 std::string VOCABFILENAME;
132 std::string TIMEOUT;
133
134 std::string arpaFilename;
135 std::string vocabFilename;
137
138 std::map<std::string,int> vocabCode;
139 std::map<int,std::string> vocabDecode;
140
141 std::map<int,ARPAData> unigramMap;
142 std::map<BigramKey,ARPAData>bigramMap;
143 std::map<TrigramKey,float>trigramMap;
144
145 void loadVocabulary();
146 void createARPATable();
147 bool matchesPrefixAndFilter(std::string , std::string , const char** ) const;
148
149 void addUnigram(std::string);
150 void addBigram(std::string);
151 void addTrigram(std::string);
152
153 inline float computeTrigramBackoff(int,int,int) const;
154 inline float computeBigramBackoff(int,int) const;
155
159
163
167
169};
170
171#endif // PRESAGE_ARPAPREDICTOR
ARPAData(float lp, float la)
Definition: ARPAPredictor.h:49
float logAlfa
Definition: ARPAPredictor.h:51
float logProb
Definition: ARPAPredictor.h:50
virtual Prediction predict(const size_t size, const char **filter) const
Generate prediction.
void addBigram(std::string)
ProgressBar< char > * bigramProg
void addUnigram(std::string)
std::string vocabFilename
std::map< std::string, int > vocabCode
void createARPATable()
std::map< TrigramKey, float > trigramMap
void set_arpa_filename(const std::string &value)
virtual void update(const Observable *variable)
float computeBigramBackoff(int, int) const
std::string VOCABFILENAME
Dispatcher< ARPAPredictor > dispatcher
std::map< int, std::string > vocabDecode
ProgressBar< char > * unigramProg
bool matchesPrefixAndFilter(std::string, std::string, const char **) const
float computeTrigramBackoff(int, int, int) const
ProgressBar< char > * trigramProg
virtual void learn(const std::vector< std::string > &change)
std::string arpaFilename
std::string ARPAFILENAME
void set_vocab_filename(const std::string &value)
void addTrigram(std::string)
void set_timeout(const std::string &value)
std::map< int, ARPAData > unigramMap
std::map< BigramKey, ARPAData > bigramMap
std::string TIMEOUT
std::string LOGGER
void loadVocabulary()
bool operator<(const BigramKey &right) const
Definition: ARPAPredictor.h:89
bool operator==(const TrigramKey &right) const
BigramKey(int wd1, int wd2)
Definition: ARPAPredictor.h:87
Tracks user interaction and context.
TrigramKey(int wd1, int wd2, int wd3)
Definition: ARPAPredictor.h:57
bool operator<(const TrigramKey &right) const
Definition: ARPAPredictor.h:59
bool operator==(const TrigramKey &right) const
Definition: ARPAPredictor.h:75
bool operator()(const float &f1, const float &f2) const
Definition: ARPAPredictor.h:40