summaryrefslogtreecommitdiff
path: root/media/pocketsphinx/src/mdef.h
blob: b0a7ced17597feef3032d386ecdc40b21583f1d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
 * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * This work was supported in part by funding from the Defense Advanced 
 * Research Projects Agency and the National Science Foundation of the 
 * United States of America, and the CMU Sphinx Speech Consortium.
 *
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 *
 */

/*
 * mdef.h -- HMM model definition: base (CI) phones and triphones
 *
 * **********************************************
 * CMU ARPA Speech Project
 *
 * Copyright (c) 1999 Carnegie Mellon University.
 * ALL RIGHTS RESERVED.
 * **********************************************
 */


#ifndef __MDEF_H__
#define __MDEF_H__


/* System headers. */
#include <stdio.h>

/* SphinxBase headers. */
#include <sphinxbase/hash_table.h>

#ifdef __cplusplus
extern "C" {
#endif

/** \file mdef.h
 * \brief Model definition 
 */

/** \enum word_posn_t
 * \brief Union of different type of word position
 */

typedef enum {
    WORD_POSN_INTERNAL = 0,	/**< Internal phone of word */
    WORD_POSN_BEGIN = 1,	/**< Beginning phone of word */
    WORD_POSN_END = 2,		/**< Ending phone of word */
    WORD_POSN_SINGLE = 3,	/**< Single phone word (i.e. begin & end) */
    WORD_POSN_UNDEFINED = 4	/**< Undefined value, used for initial conditions, etc */
} word_posn_t;
#define N_WORD_POSN	4	/**< total # of word positions (excluding undefined) */
#define WPOS_NAME	"ibesu"	/**< Printable code for each word position above */
#define S3_SILENCE_CIPHONE "SIL" /**< Hard-coded silence CI phone name */

/**
   \struct ciphone_t
   \brief CI phone information 
*/
typedef struct {
    char *name;                 /**< The name of the CI phone */
    int32 filler;		/**< Whether a filler phone; if so, can be substituted by
				   silence phone in left or right context position */
} ciphone_t;

/**
 * \struct phone_t
 * \brief Triphone information, including base phones as a subset.  For the latter, lc, rc and wpos are non-existent.
 */
typedef struct {
    int32 ssid;			/**< State sequence (or senone sequence) ID, considering the
				   n_emit_state senone-ids are a unit.  The senone sequences
				   themselves are in a separate table */
    int32 tmat;			/**< Transition matrix id */
    int16 ci, lc, rc;		/**< Base, left, right context ciphones */
    word_posn_t wpos;		/**< Word position */
    
} phone_t;

/**
 * \struct ph_rc_t
 * \brief Structures needed for mapping <ci,lc,rc,wpos> into pid.  (See mdef_t.wpos_ci_lclist below.)  (lc = left context; rc = right context.)
 * NOTE: Both ph_rc_t and ph_lc_t FOR INTERNAL USE ONLY.
 */
typedef struct ph_rc_s {
    int16 rc;			/**< Specific rc for a parent <wpos,ci,lc> */
    int32 pid;			/**< Triphone id for above rc instance */
    struct ph_rc_s *next;	/**< Next rc entry for same parent <wpos,ci,lc> */
} ph_rc_t;

/**
 * \struct ph_lc_t
 * \brief Structures for storing the left context. 
 */

typedef struct ph_lc_s {
    int16 lc;			/**< Specific lc for a parent <wpos,ci> */
    ph_rc_t *rclist;		/**< rc list for above lc instance */
    struct ph_lc_s *next;	/**< Next lc entry for same parent <wpos,ci> */
} ph_lc_t;


/** The main model definition structure */
/**
   \struct mdef_t
   \brief strcture for storing the model definition. 
*/
typedef struct {
    int32 n_ciphone;		/**< number basephones actually present */
    int32 n_phone;		/**< number basephones + number triphones actually present */
    int32 n_emit_state;		/**< number emitting states per phone */
    int32 n_ci_sen;		/**< number CI senones; these are the first */
    int32 n_sen;		/**< number senones (CI+CD) */
    int32 n_tmat;		/**< number transition matrices */
    
    hash_table_t *ciphone_ht;	/**< Hash table for mapping ciphone strings to ids */
    ciphone_t *ciphone;		/**< CI-phone information for all ciphones */
    phone_t *phone;		/**< Information for all ciphones and triphones */
    uint16 **sseq;		/**< Unique state (or senone) sequences in this model, shared
                                   among all phones/triphones */
    int32 n_sseq;		/**< No. of unique senone sequences in this model */
    
    int16 *cd2cisen;		/**< Parent CI-senone id for each senone; the first
				   n_ci_sen are identity mappings; the CD-senones are
				   contiguous for each parent CI-phone */
    int16 *sen2cimap;		/**< Parent CI-phone for each senone (CI or CD) */
    
    int16 sil;			/**< SILENCE_CIPHONE id */
    
    ph_lc_t ***wpos_ci_lclist;	/**< wpos_ci_lclist[wpos][ci] = list of lc for <wpos,ci>.
                                   wpos_ci_lclist[wpos][ci][lc].rclist = list of rc for
                                   <wpos,ci,lc>.  Only entries for the known triphones
                                   are created to conserve space.
                                   (NOTE: FOR INTERNAL USE ONLY.) */
} mdef_t;

/** Access macros; not meant for arbitrary use */
#define mdef_is_fillerphone(m,p)	((m)->ciphone[p].filler)
#define mdef_n_ciphone(m)		((m)->n_ciphone)
#define mdef_n_phone(m)			((m)->n_phone)
#define mdef_n_sseq(m)			((m)->n_sseq)
#define mdef_n_emit_state(m)		((m)->n_emit_state)
#define mdef_n_sen(m)			((m)->n_sen)
#define mdef_n_tmat(m)			((m)->n_tmat)
#define mdef_pid2ssid(m,p)		((m)->phone[p].ssid)
#define mdef_pid2tmatid(m,p)		((m)->phone[p].tmat)
#define mdef_silphone(m)		((m)->sil)
#define mdef_sen2cimap(m)		((m)->sen2cimap)
#define mdef_sseq2sen(m,ss,pos)		((m)->sseq[ss][pos])
#define mdef_pid2ci(m,p)		((m)->phone[p].ci)
#define mdef_cd2cisen(m)		((m)->cd2cisen)

/**
 * Initialize the phone structure from the given model definition file.
 * It should be treated as a READ-ONLY structure.
 * @return pointer to the phone structure created.
 */
mdef_t *mdef_init (char *mdeffile, /**< In: Model definition file */
		   int breport     /**< In: whether to report the progress or not */
    );


/** 
    Get the ciphone id given a string name
    @return ciphone id for the given ciphone string name 
*/
int mdef_ciphone_id(mdef_t *m,		/**< In: Model structure being queried */
                    char *ciphone	/**< In: ciphone for which id wanted */
    );

/** 
    Get the phone string given the ci phone id.
    @return: READ-ONLY ciphone string name for the given ciphone id 
*/
const char *mdef_ciphone_str(mdef_t *m,	/**< In: Model structure being queried */
                             int ci	/**< In: ciphone id for which name wanted */
    );

/** 
    Decide whether the phone is ci phone.
    @return 1 if given triphone argument is a ciphone, 0 if not, -1 if error 
*/
int mdef_is_ciphone (mdef_t *m,		/**< In: Model structure being queried */
                     int p		/**< In: triphone id being queried */
    );

/**
   Decide whether the senone is a senone for a ci phone, or a ci senone
   @return 1 if a given senone is a ci senone
*/  
int mdef_is_cisenone(mdef_t *m,               /**< In: Model structure being queried */
                     int s		        /**< In: senone id being queried */
    );

/** 
    Decide the phone id given the left, right and base phones. 
    @return: phone id for the given constituents if found, else BAD_S3PID 
*/
int mdef_phone_id (mdef_t *m,		/**< In: Model structure being queried */
                   int b,		/**< In: base ciphone id */
                   int l,		/**< In: left context ciphone id */
                   int r,		/**< In: right context ciphone id */
                   word_posn_t pos	/**< In: Word position */
    );

/**
 * Create a phone string for the given phone (base or triphone) id in the given buf.
 * @return 0 if successful, -1 if error.
 */
int mdef_phone_str(mdef_t *m,		/**< In: Model structure being queried */
                   int pid,		/**< In: phone id being queried */
                   char *buf		/**< Out: On return, buf has the string */
    );

/**
 * Compare the underlying HMMs for two given phones (i.e., compare the two transition
 * matrix IDs and the individual state(senone) IDs).
 * @return 0 iff the HMMs are identical, -1 otherwise.
 */
int mdef_hmm_cmp (mdef_t *m,	/**< In: Model being queried */
                  int p1, 	/**< In: One of the two triphones being compared */
                  int p2	/**< In: One of the two triphones being compared */
    );

/** Report the model definition's parameters */
void mdef_report(mdef_t *m /**<  In: model definition structure */
    );

/** RAH, For freeing memory */
void mdef_free_recursive_lc (ph_lc_t *lc /**< In: A list of left context */
    );
void mdef_free_recursive_rc (ph_rc_t *rc /**< In: A list of right context */
    );

/** Free an mdef_t */
void mdef_free (mdef_t *mdef /**< In : The model definition*/
    );


#ifdef __cplusplus
}
#endif

#endif