corpus.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * corpus.h -- Corpus-file related misc functions.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.1 2006/04/05 20:27:30 dhdfu
50  * A Great Reorganzation of header files and executables
51  *
52  * Revision 1.13 2006/02/22 19:49:25 arthchan2003
53  * Merged from SPHINX3_5_2_RCI_IRII:
54  * 1, Add structure utt_res_t, this is an utterance-based resouce
55  * structure. Add basic operation such as free and report.
56  * 2, Modify the structure of the loop in ctl_corpus to make it not so
57  * clunky. Tested with make check .
58  * 3, Completely removed ctl_process_dyn_lm, it is a product of code
59  * duplication (alright, it is written by me......)
60  * 4, Fixed doc-dox.
61  *
62  * Revision 1.12.4.3 2005/07/27 23:19:11 arthchan2003
63  * 1, Added utt_res_t structure and its methods. 2, Changed the function pointer prototype. 3, Removed the lm and mllr set process out of ctl_process
64  *
65  * Revision 1.12.4.2 2005/07/26 03:14:17 arthchan2003
66  * Removed ctl_process_dyn_lm. One of my sin.
67  *
68  * Revision 1.12.4.1 2005/07/05 06:25:40 arthchan2003
69  * Fixed dox-doc.
70  *
71  * Revision 1.12 2005/06/21 20:44:34 arthchan2003
72  * 1, Fixed doxygen documentation, 2, Add the $ keyword.
73  *
74  * Revision 1.4 2005/06/18 20:05:23 archan
75  * Sphinx3 to s3.generic: Set lm correctly in dag.c and astar.c. Same changes should also be applied to decode_anytopo.
76  *
77  * Revision 1.3 2005/03/30 01:22:46 archan
78  * Fixed mistakes in last updates. Add
79  *
80  *
81  * 09-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
82  * Added ctl_process_utt ().
83  *
84  * 01-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
85  * Updated ctl_infile() spec to included check for already existing file extension.
86  *
87  * 23-Mar-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
88  * Added a general purpose data argument to ctl_process() and its function
89  * argument func.
90  *
91  * 22-Nov-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
92  * Added an optional validation function argument and an optional
93  * duplicate-resolution function argument to both corpus_load_headid() and
94  * corpus_load_tailid().
95  *
96  * 25-Oct-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
97  * Created.
98  */
99 
100 
101 #ifndef _S3_CORPUS_H_
102 #define _S3_CORPUS_H_
103 
104 #include <stdio.h>
105 
106 #include <hash_table.h>
107 #include <profile.h>
108 #include <s3types.h>
109 
110 
111 
115 #ifdef __cplusplus
116 extern "C" {
117 #endif
118 #if 0
119 } /* Fool Emacs into not indenting things. */
120 #endif
121 
127 typedef struct
128 {
129  char* uttfile;
130  char* lmname;
132  char* fsgname;
135  char* regmatname;
137  char* cb2mllrname;
139 } utt_res_t;
140 
141 #define utt_res_set_uttfile(ur,name) ur->uttfile=name
142 #define utt_res_set_lmname(ur,name) ur->lmname=name
143 #define utt_res_set_fsgname(ur,name) ur->fsgname=name
144 #define utt_res_set_regmatname(ur,name) ur->regmatname=name
145 #define utt_res_set_cb2mllrname(ur,name) ur->cb2mllrname=name
146 
148 utt_res_t* new_utt_res(void);
149 
151 void free_utt_res(
152  utt_res_t* ur
153  );
154 
156 void report_utt_res(
157  utt_res_t *ur
158  );
159 
169 typedef struct {
170  hash_table_t *ht;
171  int32 n;
172  char **str;
173 } corpus_t;
174 
175 
204 corpus_t *corpus_load_headid (const char *file,
205  int32 (*validate)(char *str),
206  int32 (*dup_resolve)(char *s1, char *s2));
207 
211 corpus_t *corpus_load_tailid (const char *file,
212  int32 (*validate)(char *str),
213  int32 (*dup_resolve)(char *s1, char *s2));
214 
219 char *corpus_lookup (corpus_t *corp, const char *id);
220 
221 
231 int32 ctl_read_entry (FILE *fp,
232  char *uttfile,
233  int32 *sf,
234  int32 *ef,
236  char *uttid
238  );
239 
240 
251 ptmr_t ctl_process (const char *ctlfile,
252  const char *ctllmfile,
253  const char *ctlmllrfile,
254  int32 nskip,
255  int32 count,
256  void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid),
259  void *kb
261  );
262 
263 
273 ptmr_t ctl_process_utt (const char *uttfile,
274  int32 count,
275  void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid),
277  void *kb);
278 
285 void ctl_infile (char *file,
286  const char *dir,
287  const char *ext,
289  const char *utt
292  );
293 
301 void ctl_outfile (char *file,
302  const char *dir,
304  const char *ext,
305  const char *utt,
308  const char *uttid
309  );
310 
311 #if 0
312 { /* Stop indent from complaining */
313 #endif
314 #ifdef __cplusplus
315 }
316 #endif
317 
318 #endif