PocketSphinx
0.6
Main Page
Data Structures
Files
File List
Globals
fsg_lextree.h
1
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* ====================================================================
3
* Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4
* reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
*
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in
15
* the documentation and/or other materials provided with the
16
* distribution.
17
*
18
*
19
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
*
31
* ====================================================================
32
*
33
*/
34
/*
35
* fsg_lextree.h -- The collection of all the lextrees for the entire FSM.
36
*
37
* **********************************************
38
* CMU ARPA Speech Project
39
*
40
* Copyright (c) 2004 Carnegie Mellon University.
41
* ALL RIGHTS RESERVED.
42
* **********************************************
43
*
44
* HISTORY
45
*
46
* $Log: fsg_lextree.h,v $
47
* Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins
48
* re-importation
49
*
50
* Revision 1.1 2004/07/16 00:57:12 egouvea
51
* Added Ravi's implementation of FSG support.
52
*
53
* Revision 1.3 2004/06/23 20:32:16 rkm
54
* *** empty log message ***
55
*
56
* Revision 1.2 2004/05/27 14:22:57 rkm
57
* FSG cross-word triphones completed (but for single-phone words)
58
*
59
* Revision 1.1.1.1 2004/03/01 14:30:31 rkm
60
*
61
*
62
* Revision 1.1 2004/02/23 15:53:45 rkm
63
* Renamed from fst to fsg
64
*
65
* Revision 1.2 2004/02/19 21:16:54 rkm
66
* Added fsg_search.{c,h}
67
*
68
* Revision 1.1 2004/02/18 15:02:34 rkm
69
* Added fsg_lextree.{c,h}
70
*
71
*
72
* 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
73
* Started.
74
*/
75
76
77
#ifndef __S2_FSG_LEXTREE_H__
78
#define __S2_FSG_LEXTREE_H__
79
80
/* SphinxBase headers. */
81
#include <sphinxbase/cmd_ln.h>
82
#include <sphinxbase/fsg_model.h>
83
84
/* Local headers. */
85
#include "
hmm.h
"
86
#include "
dict.h
"
87
#include "
dict2pid.h
"
88
89
/*
90
* **HACK-ALERT**!! Compile-time constant determining the size of the
91
* bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.)
92
* But it makes memory allocation simpler and more efficient.
93
*/
94
#define FSG_PNODE_CTXT_BVSZ 2
95
96
typedef
struct
{
97
uint32 bv[FSG_PNODE_CTXT_BVSZ];
98
}
fsg_pnode_ctxt_t
;
99
100
101
/*
102
* All transitions (words) out of any given FSG state represented are by a
103
* phonetic prefix lextree (except for epsilon or null transitions; they
104
* are not part of the lextree). Lextree leaf nodes represent individual
105
* FSG transitions, so no sharing is allowed at the leaf nodes. The FSG
106
* transition probs are distributed along the lextree: the prob at a node
107
* is the max of the probs of all leaf nodes (and, hence, FSG transitions)
108
* reachable from that node.
109
*
110
* To conserve memory, the underlying HMMs with state-level information are
111
* allocated only as needed. Root and leaf nodes must also account for all
112
* the possible phonetic contexts, with an independent HMM for each distinct
113
* context.
114
*/
115
typedef
struct
fsg_pnode_s
{
116
/*
117
* If this is not a leaf node, the first successor (child) node. Otherwise
118
* the parent FSG transition for which this is the leaf node (for figuring
119
* the FSG destination state, and word emitted by the transition). A node
120
* may have several children. The succ ptr gives just the first; the rest
121
* are linked via the sibling ptr below.
122
*/
123
union
{
124
struct
fsg_pnode_s
*succ;
125
fsg_link_t *fsglink;
126
} next;
127
128
/*
129
* For simplicity of memory management (i.e., freeing the pnodes), all
130
* pnodes allocated for all transitions out of a state are maintained in a
131
* linear linked list through the alloc_next pointer.
132
*/
133
struct
fsg_pnode_s
*alloc_next;
134
135
/*
136
* The next node that is also a child of the parent of this node; NULL if
137
* none.
138
*/
139
struct
fsg_pnode_s
*sibling;
140
141
/*
142
* The transition (log) probability to be incurred upon transitioning to
143
* this node. (Transition probabilities are really associated with the
144
* transitions. But a lextree node has exactly one incoming transition.
145
* Hence, the prob can be associated with the node.)
146
* This is a logs2(prob) value, and includes the language weight.
147
*/
148
int32 logs2prob;
149
150
/*
151
* The root and leaf positions associated with any transition have to deal
152
* with multiple phonetic contexts. However, different contexts may result
153
* in the same SSID (senone-seq ID), and can share a single pnode with that
154
* SSID. But the pnode should track the set of context CI phones that share
155
* it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For
156
* simplicity of implementation, its size is a compile-time constant for
157
* now.) Single phone words would need a 2-D array of context, but that's
158
* too expensive. For now, they simply use SIL as right context, so only
159
* the left context is properly modelled.
160
* (For word-internal phones, this field is unused, of course.)
161
*/
162
fsg_pnode_ctxt_t
ctxt;
163
164
uint16 ci_ext;
/* This node's CIphone as viewed externally (context) */
165
uint8 ppos;
/* Phoneme position in pronunciation */
166
uint8 leaf;
/* Whether this is a leaf node */
167
168
/* HMM-state-level stuff here */
169
hmm_context_t
*ctx;
170
hmm_t
hmm;
171
}
fsg_pnode_t
;
172
173
/* Access macros */
174
#define fsg_pnode_leaf(p) ((p)->leaf)
175
#define fsg_pnode_logs2prob(p) ((p)->logs2prob)
176
#define fsg_pnode_succ(p) ((p)->next.succ)
177
#define fsg_pnode_fsglink(p) ((p)->next.fsglink)
178
#define fsg_pnode_sibling(p) ((p)->sibling)
179
#define fsg_pnode_hmmptr(p) (&((p)->hmm))
180
#define fsg_pnode_ci_ext(p) ((p)->ci_ext)
181
#define fsg_pnode_ppos(p) ((p)->ppos)
182
#define fsg_pnode_leaf(p) ((p)->leaf)
183
#define fsg_pnode_ctxt(p) ((p)->ctxt)
184
185
#define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
186
187
/*
188
* The following is macroized because its called very frequently
189
* ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
190
*/
191
/*
192
* Subtract bitvector sub from bitvector src (src updated with the result).
193
* Return 0 if result is all 0, non-zero otherwise.
194
*/
195
196
#if (FSG_PNODE_CTXT_BVSZ == 1)
197
#define FSG_PNODE_CTXT_SUB(src,sub) \
198
((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0]))
199
#elif (FSG_PNODE_CTXT_BVSZ == 2)
200
#define FSG_PNODE_CTXT_SUB(src,sub) \
201
(((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
202
((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])))
203
#elif (FSG_PNODE_CTXT_BVSZ == 4)
204
#define FSG_PNODE_CTXT_SUB(src,sub) \
205
(((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \
206
((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \
207
((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \
208
((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3])))
209
#else
210
#define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub))
211
#endif
212
216
typedef
struct
fsg_lextree_s
{
217
fsg_model_t *
fsg
;
218
hmm_context_t
*
ctx
;
219
dict_t
*
dict
;
220
dict2pid_t
*
d2p
;
221
bin_mdef_t
*
mdef
;
223
/*
224
* Left and right CIphone sets for each state.
225
* Left context CIphones for a state S: If word W transitions into S, W's
226
* final CIphone is in S's {lc}. Words transitioning out of S must consider
227
* these left context CIphones.
228
* Similarly, right contexts for state S: If word W transitions out of S,
229
* W's first CIphone is in S's {rc}. Words transitioning into S must consider
230
* these right contexts.
231
*
232
* NOTE: Words may transition into and out of S INDIRECTLY, with intermediate
233
* null transitions.
234
* NOTE: Single-phone words are difficult; only SILENCE right context is
235
* modelled for them.
236
* NOTE: Non-silence filler phones aren't included in these sets. Filler
237
* words don't use context, and present the SILENCE phone as context to
238
* adjacent words.
239
*/
240
int16 **
lc
;
241
int16 **
rc
;
243
fsg_pnode_t
**root;
/* root[s] = lextree representing all transitions
244
out of state s. Note that the "tree" for each
245
state is actually a collection of trees, linked
246
via fsg_pnode_t.sibling (root[s]->sibling) */
247
fsg_pnode_t
**alloc_head;
/* alloc_head[s] = head of linear list of all
248
pnodes allocated for state s */
249
int32 n_pnode;
/* #HMM nodes in search structure */
250
int32 wip;
251
int32 pip;
252
}
fsg_lextree_t
;
253
254
/* Access macros */
255
#define fsg_lextree_root(lt,s) ((lt)->root[s])
256
#define fsg_lextree_n_pnode(lt) ((lt)->n_pnode)
257
261
fsg_lextree_t
*
fsg_lextree_init
(fsg_model_t *fsg,
dict_t
*dict,
262
dict2pid_t
*d2p,
263
bin_mdef_t
*mdef,
hmm_context_t
*ctx,
264
int32 wip, int32 pip);
265
269
void
fsg_lextree_free
(
fsg_lextree_t
*fsg);
270
274
void
fsg_lextree_dump
(
fsg_lextree_t
*fsg, FILE *fh);
275
279
void
fsg_psubtree_pnode_deactivate
(
fsg_pnode_t
*pnode);
280
284
void
fsg_pnode_add_all_ctxt
(
fsg_pnode_ctxt_t
*ctxt);
285
289
uint32
fsg_pnode_ctxt_sub_generic
(
fsg_pnode_ctxt_t
*src,
fsg_pnode_ctxt_t
*sub);
290
291
#endif
src
libpocketsphinx
fsg_lextree.h
Generated on Sun Jul 7 2013 12:25:59 for PocketSphinx by
1.8.4