PocketSphinx  0.6
pocketsphinx.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <stdio.h>
40 #include <assert.h>
41 
42 /* SphinxBase headers. */
43 #include <sphinxbase/err.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/filename.h>
46 #include <sphinxbase/pio.h>
47 
48 /* Local headers. */
49 #include "cmdln_macro.h"
50 #include "pocketsphinx_internal.h"
51 #include "ps_lattice_internal.h"
52 #include "phone_loop_search.h"
53 #include "fsg_search_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static const arg_t ps_args_def[] = {
59  POCKETSPHINX_OPTIONS,
60  CMDLN_EMPTY_OPTION
61 };
62 
63 /* I'm not sure what the portable way to do this is. */
64 static int
65 file_exists(const char *path)
66 {
67  FILE *tmp;
68 
69  tmp = fopen(path, "rb");
70  if (tmp) fclose(tmp);
71  return (tmp != NULL);
72 }
73 
74 static int
75 hmmdir_exists(const char *path)
76 {
77  FILE *tmp;
78  char *mdef = string_join(path, "/mdef", NULL);
79 
80  tmp = fopen(mdef, "rb");
81  if (tmp) fclose(tmp);
82  ckd_free(mdef);
83  return (tmp != NULL);
84 }
85 
86 static void
87 ps_add_file(ps_decoder_t *ps, const char *arg,
88  const char *hmmdir, const char *file)
89 {
90  char *tmp = string_join(hmmdir, "/", file, NULL);
91 
92  if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
93  cmd_ln_set_str_r(ps->config, arg, tmp);
94  ckd_free(tmp);
95 }
96 
97 static void
98 ps_init_defaults(ps_decoder_t *ps)
99 {
100  char const *hmmdir, *lmfile, *dictfile;
101 
102  /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
103 #ifdef __ADSPBLACKFIN__
104  E_INFO("Will not use mmap() on uClinux/Blackfin.");
105  cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
106 #endif
107 
108 #ifdef MODELDIR
109  /* Set default acoustic and language models. */
110  hmmdir = cmd_ln_str_r(ps->config, "-hmm");
111  lmfile = cmd_ln_str_r(ps->config, "-lm");
112  dictfile = cmd_ln_str_r(ps->config, "-dict");
113  if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) {
114  hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k";
115  cmd_ln_set_str_r(ps->config, "-hmm", hmmdir);
116  }
117  if (lmfile == NULL && !cmd_ln_str_r(ps->config, "-fsg")
118  && !cmd_ln_str_r(ps->config, "-jsgf")
119  && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) {
120  lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP";
121  cmd_ln_set_str_r(ps->config, "-lm", lmfile);
122  }
123  if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) {
124  dictfile = MODELDIR "/lm/en_US/cmu07a.dic";
125  cmd_ln_set_str_r(ps->config, "-dict", dictfile);
126  }
127 
128  /* Expand acoustic and language model filenames relative to installation path. */
129  if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
130  char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
131  if (hmmdir_exists(tmphmm)) {
132  cmd_ln_set_str_r(ps->config, "-hmm", tmphmm);
133  } else {
134  E_ERROR("Failed to find mdef file inside the model folder specified with -hmm '%s'\n", hmmdir);
135  }
136  ckd_free(tmphmm);
137  }
138  if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
139  char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
140  cmd_ln_set_str_r(ps->config, "-lm", tmplm);
141  ckd_free(tmplm);
142  }
143  if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
144  char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
145  cmd_ln_set_str_r(ps->config, "-dict", tmpdict);
146  ckd_free(tmpdict);
147  }
148 #endif
149 
150  /* Get acoustic model filenames and add them to the command-line */
151  if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
152  ps_add_file(ps, "-mdef", hmmdir, "mdef");
153  ps_add_file(ps, "-mean", hmmdir, "means");
154  ps_add_file(ps, "-var", hmmdir, "variances");
155  ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
156  ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
157  ps_add_file(ps, "-sendump", hmmdir, "sendump");
158  ps_add_file(ps, "-fdict", hmmdir, "noisedict");
159  ps_add_file(ps, "-lda", hmmdir, "feature_transform");
160  ps_add_file(ps, "-featparams", hmmdir, "feat.params");
161  ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
162  }
163 }
164 
165 static void
166 ps_free_searches(ps_decoder_t *ps)
167 {
168  gnode_t *gn;
169 
170  if (ps->searches == NULL)
171  return;
172 
173  for (gn = ps->searches; gn; gn = gnode_next(gn))
174  ps_search_free(gnode_ptr(gn));
175  glist_free(ps->searches);
176  ps->searches = NULL;
177  ps->search = NULL;
178 }
179 
180 static ps_search_t *
181 ps_find_search(ps_decoder_t *ps, char const *name)
182 {
183  gnode_t *gn;
184 
185  for (gn = ps->searches; gn; gn = gnode_next(gn)) {
186  if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
187  return (ps_search_t *)gnode_ptr(gn);
188  }
189  return NULL;
190 }
191 
192 int
193 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
194 {
195  char const *lmfile, *lmctl = NULL;
196 
197  if (config && config != ps->config) {
198  cmd_ln_free_r(ps->config);
199  ps->config = config;
200  }
201 #ifndef _WIN32_WCE
202  /* Set up logging. */
203  if (cmd_ln_str_r(ps->config, "-logfn"))
204  err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
205 #endif
206  err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
207  ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
208  ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
209  ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
210 
211  /* Fill in some default arguments. */
212  ps_init_defaults(ps);
213 
214  /* Free old searches (do this before other reinit) */
215  ps_free_searches(ps);
216 
217  /* Free old acmod. */
218  acmod_free(ps->acmod);
219  ps->acmod = NULL;
220 
221  /* Free old dictionary (must be done after the two things above) */
222  dict_free(ps->dict);
223  ps->dict = NULL;
224 
225 
226  /* Logmath computation (used in acmod and search) */
227  if (ps->lmath == NULL
228  || (logmath_get_base(ps->lmath) !=
229  (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
230  if (ps->lmath)
231  logmath_free(ps->lmath);
232  ps->lmath = logmath_init
233  ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
234  cmd_ln_boolean_r(ps->config, "-bestpath"));
235  }
236 
237  /* Acoustic model (this is basically everything that
238  * uttproc.c, senscr.c, and others used to do) */
239  if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
240  return -1;
241  /* Make the acmod's feature buffer growable if we are doing two-pass search. */
242  if (cmd_ln_boolean_r(ps->config, "-fwdflat")
243  && cmd_ln_boolean_r(ps->config, "-fwdtree"))
244  acmod_set_grow(ps->acmod, TRUE);
245 
246  if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
247  /* Initialize an auxiliary phone loop search, which will run in
248  * "parallel" with FSG or N-Gram search. */
249  if ((ps->phone_loop = phone_loop_search_init(ps->config,
250  ps->acmod, ps->dict)) == NULL)
251  return -1;
252  ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
253  }
254 
255  /* Dictionary and triphone mappings (depends on acmod). */
256  /* FIXME: pass config, change arguments, implement LTS, etc. */
257  if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
258  return -1;
259 
260  /* Determine whether we are starting out in FSG or N-Gram search mode. */
261  if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
262  ps_search_t *fsgs;
263 
264  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
265  return -1;
266  if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
267  return -1;
268  fsgs->pls = ps->phone_loop;
269  ps->searches = glist_add_ptr(ps->searches, fsgs);
270  ps->search = fsgs;
271  }
272  else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
273  || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
274  ps_search_t *ngs;
275 
276  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
277  return -1;
278  if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
279  return -1;
280  ngs->pls = ps->phone_loop;
281  ps->searches = glist_add_ptr(ps->searches, ngs);
282  ps->search = ngs;
283  }
284  /* Otherwise, we will initialize the search whenever the user
285  * decides to load an FSG or a language model. */
286  else {
287  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
288  return -1;
289  }
290 
291  /* Initialize performance timer. */
292  ps->perf.name = "decode";
293  ptmr_init(&ps->perf);
294 
295  return 0;
296 }
297 
298 ps_decoder_t *
299 ps_init(cmd_ln_t *config)
300 {
301  ps_decoder_t *ps;
302 
303  ps = ckd_calloc(1, sizeof(*ps));
304  ps->refcount = 1;
305  if (ps_reinit(ps, config) < 0) {
306  ps_free(ps);
307  return NULL;
308  }
309  return ps;
310 }
311 
312 arg_t const *
313 ps_args(void)
314 {
315  return ps_args_def;
316 }
317 
318 ps_decoder_t *
320 {
321  ++ps->refcount;
322  return ps;
323 }
324 
325 int
327 {
328  gnode_t *gn;
329 
330  if (ps == NULL)
331  return 0;
332  if (--ps->refcount > 0)
333  return ps->refcount;
334  for (gn = ps->searches; gn; gn = gnode_next(gn))
335  ps_search_free(gnode_ptr(gn));
336  glist_free(ps->searches);
337  dict_free(ps->dict);
338  dict2pid_free(ps->d2p);
339  acmod_free(ps->acmod);
340  logmath_free(ps->lmath);
341  cmd_ln_free_r(ps->config);
342  ckd_free(ps->uttid);
343  ckd_free(ps);
344  return 0;
345 }
346 
347 char const *
349 {
350  return ps->uttid;
351 }
352 
353 cmd_ln_t *
355 {
356  return ps->config;
357 }
358 
359 logmath_t *
361 {
362  return ps->lmath;
363 }
364 
365 fe_t *
367 {
368  return ps->acmod->fe;
369 }
370 
371 feat_t *
373 {
374  return ps->acmod->fcb;
375 }
376 
377 ps_mllr_t *
379 {
380  return acmod_update_mllr(ps->acmod, mllr);
381 }
382 
383 ngram_model_t *
385 {
386  if (ps->search == NULL
387  || 0 != strcmp(ps_search_name(ps->search), "ngram"))
388  return NULL;
389  return ((ngram_search_t *)ps->search)->lmset;
390 }
391 
392 ngram_model_t *
393 ps_update_lmset(ps_decoder_t *ps, ngram_model_t *lmset)
394 {
395  ngram_search_t *ngs;
396  ps_search_t *search;
397 
398  /* Look for N-Gram search. */
399  search = ps_find_search(ps, "ngram");
400  if (search == NULL) {
401  /* Initialize N-Gram search. */
402  search = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p);
403  if (search == NULL)
404  return NULL;
405  search->pls = ps->phone_loop;
406  ps->searches = glist_add_ptr(ps->searches, search);
407  ngs = (ngram_search_t *)search;
408  }
409  else {
410  ngs = (ngram_search_t *)search;
411  /* Free any previous lmset if this is a new one. */
412  if (ngs->lmset != NULL && ngs->lmset != lmset)
413  ngram_model_free(ngs->lmset);
414  ngs->lmset = lmset;
415  /* Tell N-Gram search to update its view of the world. */
416  if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
417  return NULL;
418  }
419  ps->search = search;
420  return ngs->lmset;
421 }
422 
423 fsg_set_t *
425 {
426  if (ps->search == NULL
427  || 0 != strcmp(ps_search_name(ps->search), "fsg"))
428  return NULL;
429  return (fsg_set_t *)ps->search;
430 }
431 
432 fsg_set_t *
434 {
435  ps_search_t *search;
436 
437  /* Look for FSG search. */
438  search = ps_find_search(ps, "fsg");
439  if (search == NULL) {
440  /* Initialize FSG search. */
441  search = fsg_search_init(ps->config,
442  ps->acmod, ps->dict, ps->d2p);
443  search->pls = ps->phone_loop;
444  ps->searches = glist_add_ptr(ps->searches, search);
445  }
446  else {
447  /* Tell FSG search to update its view of the world. */
448  if (ps_search_reinit(search, ps->dict, ps->d2p) < 0)
449  return NULL;
450  }
451  ps->search = search;
452  return (fsg_set_t *)search;
453 }
454 
455 int
456 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
457  char const *fdictfile, char const *format)
458 {
459  cmd_ln_t *newconfig;
460  dict2pid_t *d2p;
461  dict_t *dict;
462  gnode_t *gn;
463  int rv;
464 
465  /* Create a new scratch config to load this dict (so existing one
466  * won't be affected if it fails) */
467  newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
468  cmd_ln_set_boolean_r(newconfig, "-dictcase",
469  cmd_ln_boolean_r(ps->config, "-dictcase"));
470  cmd_ln_set_str_r(newconfig, "-dict", dictfile);
471  if (fdictfile)
472  cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
473  else
474  cmd_ln_set_str_r(newconfig, "-fdict",
475  cmd_ln_str_r(ps->config, "-fdict"));
476 
477  /* Try to load it. */
478  if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
479  cmd_ln_free_r(newconfig);
480  return -1;
481  }
482 
483  /* Reinit the dict2pid. */
484  if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
485  cmd_ln_free_r(newconfig);
486  return -1;
487  }
488 
489  /* Success! Update the existing config to reflect new dicts and
490  * drop everything into place. */
491  cmd_ln_free_r(newconfig);
492  cmd_ln_set_str_r(ps->config, "-dict", dictfile);
493  if (fdictfile)
494  cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
495  dict_free(ps->dict);
496  ps->dict = dict;
497  dict2pid_free(ps->d2p);
498  ps->d2p = d2p;
499 
500  /* And tell all searches to reconfigure themselves. */
501  for (gn = ps->searches; gn; gn = gnode_next(gn)) {
502  ps_search_t *search = gnode_ptr(gn);
503  if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
504  return rv;
505  }
506 
507  return 0;
508 }
509 
510 int
511 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
512  char const *format)
513 {
514  return dict_write(ps->dict, dictfile, format);
515 }
516 
517 int
519  char const *word,
520  char const *phones,
521  int update)
522 {
523  int32 wid, lmwid;
524  ngram_model_t *lmset;
525  s3cipid_t *pron;
526  char **phonestr, *tmp;
527  int np, i, rv;
528 
529  /* Parse phones into an array of phone IDs. */
530  tmp = ckd_salloc(phones);
531  np = str2words(tmp, NULL, 0);
532  phonestr = ckd_calloc(np, sizeof(*phonestr));
533  str2words(tmp, phonestr, np);
534  pron = ckd_calloc(np, sizeof(*pron));
535  for (i = 0; i < np; ++i) {
536  pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
537  if (pron[i] == -1) {
538  E_ERROR("Unknown phone %s in phone string %s\n",
539  phonestr[i], tmp);
540  ckd_free(phonestr);
541  ckd_free(tmp);
542  ckd_free(pron);
543  return -1;
544  }
545  }
546  /* No longer needed. */
547  ckd_free(phonestr);
548  ckd_free(tmp);
549 
550  /* Add it to the dictionary. */
551  if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
552  ckd_free(pron);
553  return -1;
554  }
555  /* No longer needed. */
556  ckd_free(pron);
557 
558  /* Now we also have to add it to dict2pid. */
559  dict2pid_add_word(ps->d2p, wid);
560 
561  if ((lmset = ps_get_lmset(ps)) != NULL) {
562  /* Add it to the LM set (meaning, the current LM). In a perfect
563  * world, this would result in the same WID, but because of the
564  * weird way that word IDs are handled, it doesn't. */
565  if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
566  == NGRAM_INVALID_WID)
567  return -1;
568  }
569 
570  /* Rebuild the widmap and search tree if requested. */
571  if (update) {
572  if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
573  return rv;
574  }
575  return wid;
576 }
577 
578 int
579 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
580  char const *uttid, long maxsamps)
581 {
582  long total, pos;
583 
584  ps_start_utt(ps, uttid);
585  /* If this file is seekable or maxsamps is specified, then decode
586  * the whole thing at once. */
587  if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
588  int16 *data;
589 
590  if (maxsamps == -1) {
591  long endpos;
592  fseek(rawfh, 0, SEEK_END);
593  endpos = ftell(rawfh);
594  fseek(rawfh, pos, SEEK_SET);
595  maxsamps = endpos - pos;
596  }
597  data = ckd_calloc(maxsamps, sizeof(*data));
598  total = fread(data, sizeof(*data), maxsamps, rawfh);
599  ps_process_raw(ps, data, total, FALSE, TRUE);
600  ckd_free(data);
601  }
602  else {
603  /* Otherwise decode it in a stream. */
604  total = 0;
605  while (!feof(rawfh)) {
606  int16 data[256];
607  size_t nread;
608 
609  nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
610  ps_process_raw(ps, data, nread, FALSE, FALSE);
611  total += nread;
612  }
613  }
614  ps_end_utt(ps);
615  return total;
616 }
617 
618 int
619 ps_start_utt(ps_decoder_t *ps, char const *uttid)
620 {
621  int rv;
622 
623  if (ps->search == NULL) {
624  E_ERROR("No search module is selected, did you forget to "
625  "specify a language model or grammar?\n");
626  return -1;
627  }
628 
629  ptmr_reset(&ps->perf);
630  ptmr_start(&ps->perf);
631 
632  if (uttid) {
633  ckd_free(ps->uttid);
634  ps->uttid = ckd_salloc(uttid);
635  }
636  else {
637  char nuttid[16];
638  ckd_free(ps->uttid);
639  sprintf(nuttid, "%09u", ps->uttno);
640  ps->uttid = ckd_salloc(nuttid);
641  ++ps->uttno;
642  }
643  /* Remove any residual word lattice and hypothesis. */
644  ps_lattice_free(ps->search->dag);
645  ps->search->dag = NULL;
646  ps->search->last_link = NULL;
647  ps->search->post = 0;
648  ckd_free(ps->search->hyp_str);
649  ps->search->hyp_str = NULL;
650 
651  if ((rv = acmod_start_utt(ps->acmod)) < 0)
652  return rv;
653 
654  /* Start logging features and audio if requested. */
655  if (ps->mfclogdir) {
656  char *logfn = string_join(ps->mfclogdir, "/",
657  ps->uttid, ".mfc", NULL);
658  FILE *mfcfh;
659  E_INFO("Writing MFCC log file: %s\n", logfn);
660  if ((mfcfh = fopen(logfn, "wb")) == NULL) {
661  E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
662  ckd_free(logfn);
663  return -1;
664  }
665  ckd_free(logfn);
666  acmod_set_mfcfh(ps->acmod, mfcfh);
667  }
668  if (ps->rawlogdir) {
669  char *logfn = string_join(ps->rawlogdir, "/",
670  ps->uttid, ".raw", NULL);
671  FILE *rawfh;
672  E_INFO("Writing raw audio log file: %s\n", logfn);
673  if ((rawfh = fopen(logfn, "wb")) == NULL) {
674  E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
675  ckd_free(logfn);
676  return -1;
677  }
678  ckd_free(logfn);
679  acmod_set_rawfh(ps->acmod, rawfh);
680  }
681  if (ps->senlogdir) {
682  char *logfn = string_join(ps->senlogdir, "/",
683  ps->uttid, ".sen", NULL);
684  FILE *senfh;
685  E_INFO("Writing senone score log file: %s\n", logfn);
686  if ((senfh = fopen(logfn, "wb")) == NULL) {
687  E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn);
688  ckd_free(logfn);
689  return -1;
690  }
691  ckd_free(logfn);
692  acmod_set_senfh(ps->acmod, senfh);
693  }
694 
695  /* Start auxiliary phone loop search. */
696  if (ps->phone_loop)
697  ps_search_start(ps->phone_loop);
698 
699  return ps_search_start(ps->search);
700 }
701 
702 static int
703 ps_search_forward(ps_decoder_t *ps)
704 {
705  int nfr;
706 
707  nfr = 0;
708  while (ps->acmod->n_feat_frame > 0) {
709  int k;
710  if (ps->phone_loop)
711  if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
712  return k;
713  if (ps->acmod->output_frame >= ps->pl_window)
714  if ((k = ps_search_step(ps->search,
715  ps->acmod->output_frame - ps->pl_window)) < 0)
716  return k;
717  acmod_advance(ps->acmod);
718  ++ps->n_frame;
719  ++nfr;
720  }
721  return nfr;
722 }
723 
724 int
726  char const *uttid)
727 {
728  int nfr, n_searchfr;
729 
730  ps_start_utt(ps, uttid);
731  n_searchfr = 0;
732  acmod_set_insenfh(ps->acmod, senfh);
733  while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
734  if ((nfr = ps_search_forward(ps)) < 0) {
735  ps_end_utt(ps);
736  return nfr;
737  }
738  n_searchfr += nfr;
739  }
740  ps_end_utt(ps);
741  acmod_set_insenfh(ps->acmod, NULL);
742 
743  return n_searchfr;
744 }
745 
746 int
748  int16 const *data,
749  size_t n_samples,
750  int no_search,
751  int full_utt)
752 {
753  int n_searchfr = 0;
754 
755  if (no_search)
756  acmod_set_grow(ps->acmod, TRUE);
757 
758  while (n_samples) {
759  int nfr;
760 
761  /* Process some data into features. */
762  if ((nfr = acmod_process_raw(ps->acmod, &data,
763  &n_samples, full_utt)) < 0)
764  return nfr;
765 
766  /* Score and search as much data as possible */
767  if (no_search)
768  continue;
769  if ((nfr = ps_search_forward(ps)) < 0)
770  return nfr;
771  n_searchfr += nfr;
772  }
773 
774  return n_searchfr;
775 }
776 
777 int
779  mfcc_t **data,
780  int32 n_frames,
781  int no_search,
782  int full_utt)
783 {
784  int n_searchfr = 0;
785 
786  if (no_search)
787  acmod_set_grow(ps->acmod, TRUE);
788 
789  while (n_frames) {
790  int nfr;
791 
792  /* Process some data into features. */
793  if ((nfr = acmod_process_cep(ps->acmod, &data,
794  &n_frames, full_utt)) < 0)
795  return nfr;
796 
797  /* Score and search as much data as possible */
798  if (no_search)
799  continue;
800  if ((nfr = ps_search_forward(ps)) < 0)
801  return nfr;
802  n_searchfr += nfr;
803  }
804 
805  return n_searchfr;
806 }
807 
808 int
810 {
811  int rv, i;
812 
813  acmod_end_utt(ps->acmod);
814 
815  /* Search any remaining frames. */
816  if ((rv = ps_search_forward(ps)) < 0) {
817  ptmr_stop(&ps->perf);
818  return rv;
819  }
820  /* Finish phone loop search. */
821  if (ps->phone_loop) {
822  if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
823  ptmr_stop(&ps->perf);
824  return rv;
825  }
826  }
827  /* Search any frames remaining in the lookahead window. */
828  for (i = ps->acmod->output_frame - ps->pl_window;
829  i < ps->acmod->output_frame; ++i)
830  ps_search_step(ps->search, i);
831  /* Finish main search. */
832  if ((rv = ps_search_finish(ps->search)) < 0) {
833  ptmr_stop(&ps->perf);
834  return rv;
835  }
836  ptmr_stop(&ps->perf);
837 
838  /* Log a backtrace if requested. */
839  if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
840  char const *uttid, *hyp;
841  ps_seg_t *seg;
842  int32 score;
843 
844  hyp = ps_get_hyp(ps, &score, &uttid);
845  E_INFO("%s: %s (%d)\n", uttid, hyp, score);
846  E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
847  "word", "start", "end", "pprob", "ascr", "lscr", "lback");
848  for (seg = ps_seg_iter(ps, &score); seg;
849  seg = ps_seg_next(seg)) {
850  char const *word;
851  int sf, ef;
852  int32 post, lscr, ascr, lback;
853 
854  word = ps_seg_word(seg);
855  ps_seg_frames(seg, &sf, &ef);
856  post = ps_seg_prob(seg, &ascr, &lscr, &lback);
857  E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
858  word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
859  }
860  }
861  return rv;
862 }
863 
864 char const *
865 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid)
866 {
867  char const *hyp;
868 
869  ptmr_start(&ps->perf);
870  hyp = ps_search_hyp(ps->search, out_best_score);
871  if (out_uttid)
872  *out_uttid = ps->uttid;
873  ptmr_stop(&ps->perf);
874  return hyp;
875 }
876 
877 int32
878 ps_get_prob(ps_decoder_t *ps, char const **out_uttid)
879 {
880  int32 prob;
881 
882  ptmr_start(&ps->perf);
883  prob = ps_search_prob(ps->search);
884  if (out_uttid)
885  *out_uttid = ps->uttid;
886  ptmr_stop(&ps->perf);
887  return prob;
888 }
889 
890 ps_seg_t *
891 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
892 {
893  ps_seg_t *itor;
894 
895  ptmr_start(&ps->perf);
896  itor = ps_search_seg_iter(ps->search, out_best_score);
897  ptmr_stop(&ps->perf);
898  return itor;
899 }
900 
901 ps_seg_t *
903 {
904  return ps_search_seg_next(seg);
905 }
906 
907 char const *
909 {
910  return seg->word;
911 }
912 
913 void
914 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
915 {
916  if (out_sf) *out_sf = seg->sf;
917  if (out_ef) *out_ef = seg->ef;
918 }
919 
920 int32
921 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
922 {
923  if (out_ascr) *out_ascr = seg->ascr;
924  if (out_lscr) *out_lscr = seg->lscr;
925  if (out_lback) *out_lback = seg->lback;
926  return seg->prob;
927 }
928 
929 void
931 {
932  ps_search_seg_free(seg);
933 }
934 
935 ps_lattice_t *
937 {
938  return ps_search_lattice(ps->search);
939 }
940 
941 ps_nbest_t *
942 ps_nbest(ps_decoder_t *ps, int sf, int ef,
943  char const *ctx1, char const *ctx2)
944 {
945  ps_lattice_t *dag;
946  ngram_model_t *lmset;
947  ps_astar_t *nbest;
948  float32 lwf;
949  int32 w1, w2;
950 
951  if (ps->search == NULL)
952  return NULL;
953  if ((dag = ps_get_lattice(ps)) == NULL)
954  return NULL;
955 
956  /* FIXME: This is all quite specific to N-Gram search. Either we
957  * should make N-best a method for each search module or it needs
958  * to be abstracted to work for N-Gram and FSG. */
959  if (0 != strcmp(ps_search_name(ps->search), "ngram")) {
960  lmset = NULL;
961  lwf = 1.0f;
962  }
963  else {
964  lmset = ((ngram_search_t *)ps->search)->lmset;
965  lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
966  }
967 
968  w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
969  w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
970  nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
971 
972  return (ps_nbest_t *)nbest;
973 }
974 
975 void
977 {
978  ps_astar_finish(nbest);
979 }
980 
981 ps_nbest_t *
983 {
984  ps_latpath_t *next;
985 
986  next = ps_astar_next(nbest);
987  if (next == NULL) {
988  ps_nbest_free(nbest);
989  return NULL;
990  }
991  return nbest;
992 }
993 
994 char const *
995 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
996 {
997  if (nbest->top == NULL)
998  return NULL;
999  if (out_score) *out_score = nbest->top->score;
1000  return ps_astar_hyp(nbest, nbest->top);
1001 }
1002 
1003 ps_seg_t *
1004 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
1005 {
1006  if (nbest->top == NULL)
1007  return NULL;
1008  if (out_score) *out_score = nbest->top->score;
1009  return ps_astar_seg_iter(nbest, nbest->top, 1.0);
1010 }
1011 
1012 int
1014 {
1015  return ps->acmod->output_frame + 1;
1016 }
1017 
1018 void
1019 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
1020  double *out_ncpu, double *out_nwall)
1021 {
1022  int32 frate;
1023 
1024  frate = cmd_ln_int32_r(ps->config, "-frate");
1025  *out_nspeech = (double)ps->acmod->output_frame / frate;
1026  *out_ncpu = ps->perf.t_cpu;
1027  *out_nwall = ps->perf.t_elapsed;
1028 }
1029 
1030 void
1031 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
1032  double *out_ncpu, double *out_nwall)
1033 {
1034  int32 frate;
1035 
1036  frate = cmd_ln_int32_r(ps->config, "-frate");
1037  *out_nspeech = (double)ps->n_frame / frate;
1038  *out_ncpu = ps->perf.t_tot_cpu;
1039  *out_nwall = ps->perf.t_tot_elapsed;
1040 }
1041 
1042 void
1043 ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt,
1044  cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
1045  dict2pid_t *d2p)
1046 {
1047  search->vt = vt;
1048  search->config = config;
1049  search->acmod = acmod;
1050  if (d2p)
1051  search->d2p = dict2pid_retain(d2p);
1052  else
1053  search->d2p = NULL;
1054  if (dict) {
1055  search->dict = dict_retain(dict);
1056  search->start_wid = dict_startwid(dict);
1057  search->finish_wid = dict_finishwid(dict);
1058  search->silence_wid = dict_silwid(dict);
1059  search->n_words = dict_size(dict);
1060  }
1061  else {
1062  search->dict = NULL;
1063  search->start_wid = search->finish_wid = search->silence_wid = -1;
1064  search->n_words = 0;
1065  }
1066 }
1067 
1068 void
1069 ps_search_base_reinit(ps_search_t *search, dict_t *dict,
1070  dict2pid_t *d2p)
1071 {
1072  dict_free(search->dict);
1073  dict2pid_free(search->d2p);
1074  /* FIXME: _retain() should just return NULL if passed NULL. */
1075  if (dict) {
1076  search->dict = dict_retain(dict);
1077  search->start_wid = dict_startwid(dict);
1078  search->finish_wid = dict_finishwid(dict);
1079  search->silence_wid = dict_silwid(dict);
1080  search->n_words = dict_size(dict);
1081  }
1082  else {
1083  search->dict = NULL;
1084  search->start_wid = search->finish_wid = search->silence_wid = -1;
1085  search->n_words = 0;
1086  }
1087  if (d2p)
1088  search->d2p = dict2pid_retain(d2p);
1089  else
1090  search->d2p = NULL;
1091 }
1092 
1093 
1094 void
1095 ps_search_deinit(ps_search_t *search)
1096 {
1097  /* FIXME: We will have refcounting on acmod, config, etc, at which
1098  * point we will free them here too. */
1099  dict_free(search->dict);
1100  dict2pid_free(search->d2p);
1101  ckd_free(search->hyp_str);
1102  ps_lattice_free(search->dag);
1103 }