49 #include <sphinxbase/prim_type.h>
50 #include <sphinxbase/err.h>
51 #include <sphinxbase/cmd_ln.h>
52 #include <sphinxbase/strfuncs.h>
53 #include <sphinxbase/byteorder.h>
54 #include <sphinxbase/feat.h>
55 #include <sphinxbase/bio.h>
58 #include "cmdln_macro.h"
60 #include "s2_semi_mgau.h"
65 static const arg_t feat_defn[] = {
66 waveform_to_cepstral_command_line_macro(),
67 cepstral_to_feature_command_line_macro(),
71 #ifndef WORDS_BIGENDIAN
72 #define WORDS_BIGENDIAN 1
75 static int32 acmod_process_mfcbuf(
acmod_t *acmod);
80 char const *mdeffn, *tmatfn, *mllrfn;
83 if ((mdeffn = cmd_ln_str_r(acmod->
config,
"-mdef")) == NULL) {
84 E_ERROR(
"Acoustic model definition is not specified neither with -mdef option nor with -hmm\n");
88 if ((acmod->
mdef = bin_mdef_read(acmod->
config, mdeffn)) == NULL) {
89 E_ERROR(
"Failed to read acoustic model definition from %s\n", mdeffn);
94 if ((tmatfn = cmd_ln_str_r(acmod->
config,
"-tmat")) == NULL) {
95 E_ERROR(
"No tmat file specified\n");
98 acmod->
tmat = tmat_init(tmatfn, acmod->
lmath,
99 cmd_ln_float32_r(acmod->
config,
"-tmatfloor"),
103 if ((cmd_ln_str_r(acmod->
config,
"-mean") == NULL)
104 || (cmd_ln_str_r(acmod->
config,
"-var") == NULL)
105 || (cmd_ln_str_r(acmod->
config,
"-tmat") == NULL)) {
106 E_ERROR(
"No mean/var/tmat files specified\n");
110 if (cmd_ln_str_r(acmod->
config,
"-senmgau")) {
111 E_INFO(
"Using general multi-stream GMM computation\n");
113 if (acmod->
mgau == NULL)
117 E_INFO(
"Attempting to use SCHMM computation module\n");
118 if ((acmod->
mgau = s2_semi_mgau_init(acmod)) == NULL) {
119 E_INFO(
"Attempting to use PTHMM computation module\n");
120 if ((acmod->
mgau = ptm_mgau_init(acmod, acmod->
mdef)) == NULL) {
121 E_INFO(
"Falling back to general multi-stream GMM computation\n");
123 if (acmod->
mgau == NULL)
130 if ((mllrfn = cmd_ln_str_r(acmod->
config,
"-mllr"))) {
141 acmod_init_feat(
acmod_t *acmod)
144 feat_init(cmd_ln_str_r(acmod->
config,
"-feat"),
145 cmn_type_from_str(cmd_ln_str_r(acmod->
config,
"-cmn")),
146 cmd_ln_boolean_r(acmod->
config,
"-varnorm"),
147 agc_type_from_str(cmd_ln_str_r(acmod->
config,
"-agc")),
148 1, cmd_ln_int32_r(acmod->
config,
"-ceplen"));
149 if (acmod->
fcb == NULL)
152 if (cmd_ln_str_r(acmod->
config,
"-lda")) {
153 E_INFO(
"Reading linear feature transformation from %s\n",
154 cmd_ln_str_r(acmod->
config,
"-lda"));
155 if (feat_read_lda(acmod->
fcb,
156 cmd_ln_str_r(acmod->
config,
"-lda"),
157 cmd_ln_int32_r(acmod->
config,
"-ldadim")) < 0)
161 if (cmd_ln_str_r(acmod->
config,
"-svspec")) {
163 E_INFO(
"Using subvector specification %s\n",
164 cmd_ln_str_r(acmod->
config,
"-svspec"));
165 if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->
config,
"-svspec"))) == NULL)
167 if ((feat_set_subvecs(acmod->
fcb, subvecs)) < 0)
171 if (cmd_ln_exists_r(acmod->
config,
"-agcthresh")
172 && 0 != strcmp(cmd_ln_str_r(acmod->
config,
"-agc"),
"none")) {
173 agc_set_threshold(acmod->
fcb->agc_struct,
174 cmd_ln_float32_r(acmod->
config,
"-agcthresh"));
177 if (acmod->
fcb->cmn_struct
178 && cmd_ln_exists_r(acmod->
config,
"-cmninit")) {
179 char *c, *cc, *vallist;
182 vallist = ckd_salloc(cmd_ln_str_r(acmod->
config,
"-cmninit"));
185 while (nvals < acmod->fcb->cmn_struct->veclen
186 && (cc = strchr(c,
',')) != NULL) {
188 acmod->
fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
192 if (nvals < acmod->fcb->cmn_struct->veclen && *c !=
'\0') {
193 acmod->
fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
201 acmod_fe_mismatch(
acmod_t *acmod, fe_t *fe)
204 if (cmd_ln_int32_r(acmod->
config,
"-ceplen") != fe_get_output_size(fe)) {
205 E_ERROR(
"Configured feature length %d doesn't match feature extraction output size %d\n",
206 cmd_ln_int32_r(acmod->
config,
"-ceplen"),
207 fe_get_output_size(fe));
216 acmod_feat_mismatch(
acmod_t *acmod, feat_t *fcb)
219 if (0 != strcmp(cmd_ln_str_r(acmod->
config,
"-feat"), feat_name(fcb)))
222 if (cmd_ln_int32_r(acmod->
config,
"-ceplen") != feat_cepsize(fcb))
229 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
232 char const *featparams;
234 acmod = ckd_calloc(1,
sizeof(*acmod));
236 acmod->
lmath = lmath;
240 if ((featparams = cmd_ln_str_r(acmod->
config,
"-featparams"))) {
241 if (cmd_ln_parse_file_r(acmod->
config, feat_defn, featparams, FALSE) != NULL) {
242 E_INFO(
"Parsed model-specific feature parameters from %s\n", featparams);
248 if (acmod_fe_mismatch(acmod, fe))
255 cmd_ln_retain(config);
256 acmod->
fe = fe_init_auto_r(config);
257 if (acmod->
fe == NULL)
259 if (acmod_fe_mismatch(acmod, acmod->
fe))
263 if (acmod_feat_mismatch(acmod, fcb))
270 if (acmod_init_feat(acmod) < 0)
275 if (acmod_init_am(acmod) < 0)
298 acmod->
compallsen = cmd_ln_boolean_r(config,
"-compallsen");
312 feat_free(acmod->
fcb);
316 ckd_free_2d((
void **)acmod->
mfc_buf);
321 fclose(acmod->
mfcfh);
323 fclose(acmod->
rawfh);
325 fclose(acmod->
senfh);
333 bin_mdef_free(acmod->
mdef);
335 tmat_free(acmod->
tmat);
337 ps_mgau_free(acmod->
mgau);
350 ps_mgau_transform(acmod->
mgau, mllr);
358 char nsenstr[64], logbasestr[64];
360 sprintf(nsenstr,
"%d", bin_mdef_n_sen(acmod->
mdef));
361 sprintf(logbasestr,
"%f", logmath_get_base(acmod->
lmath));
362 return bio_writehdr(logfh,
364 "mdef_file", cmd_ln_str_r(acmod->
config,
"-mdef"),
366 "logbase", logbasestr, NULL);
373 fclose(acmod->
senfh);
374 acmod->
senfh = logfh;
386 fclose(acmod->
mfcfh);
387 acmod->
mfcfh = logfh;
388 fwrite(&rv, 4, 1, acmod->
mfcfh);
396 fclose(acmod->
rawfh);
397 acmod->
rawfh = logfh;
402 acmod_grow_feat_buf(
acmod_t *acmod,
int nfr)
404 mfcc_t ***new_feat_buf;
406 new_feat_buf = feat_array_alloc(acmod->
fcb, nfr);
408 memcpy(new_feat_buf[0][0], acmod->
feat_buf[0][0],
410 * feat_dimension(acmod->
fcb)
428 acmod_grow_feat_buf(acmod, 128);
436 fe_start_utt(acmod->
fe);
460 fe_end_utt(acmod->
fe, acmod->
mfc_buf[inptr], &nfr);
464 nfr = acmod_process_mfcbuf(acmod);
468 outlen = (ftell(acmod->
mfcfh) - 4) / 4;
469 if (!WORDS_BIGENDIAN)
472 if ((rv = fseek(acmod->
mfcfh, 0, SEEK_SET)) == 0) {
473 fwrite(&outlen, 4, 1, acmod->
mfcfh);
475 fclose(acmod->
mfcfh);
479 fclose(acmod->
rawfh);
484 fclose(acmod->
senfh);
493 mfcc_t **cep,
int n_frames)
496 int32 *ptr = (int32 *)cep[0];
498 n = n_frames * feat_cepsize(acmod->
fcb);
500 if (!WORDS_BIGENDIAN) {
501 for (i = 0; i < (n *
sizeof(mfcc_t)); ++i) {
506 if (fwrite(cep[0],
sizeof(mfcc_t), n, acmod->
mfcfh) != n) {
507 E_ERROR_SYSTEM(
"Failed to write %d values to log file", n);
511 if (!WORDS_BIGENDIAN) {
512 for (i = 0; i < (n *
sizeof(mfcc_t)); ++i) {
520 acmod_process_full_cep(
acmod_t *acmod,
528 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
533 acmod->
feat_buf = feat_array_alloc(acmod->
fcb, *inout_n_frames);
539 nfr = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep, inout_n_frames,
543 *inout_cep += *inout_n_frames;
549 acmod_process_full_raw(
acmod_t *acmod,
550 int16
const **inout_raw,
551 size_t *inout_n_samps)
558 fwrite(*inout_raw, 2, *inout_n_samps, acmod->
rawfh);
560 if (fe_process_frames(acmod->
fe, NULL, inout_n_samps, NULL, &nfr) < 0)
564 acmod->
mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->
fe),
570 fe_start_utt(acmod->
fe);
571 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
574 fe_end_utt(acmod->
fe, acmod->
mfc_buf[nfr], &ntail);
578 nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
587 acmod_process_mfcbuf(
acmod_t *acmod)
596 int saved_state = acmod->
state;
609 acmod->
state = saved_state;
621 int16
const **inout_raw,
622 size_t *inout_n_samps,
629 return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
633 if (inout_n_samps && *inout_n_samps) {
634 int16
const *prev_audio_inptr = *inout_raw;
645 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
646 acmod->
mfc_buf + inptr, &ncep1) < 0)
650 fwrite(prev_audio_inptr, 2,
651 *inout_raw - prev_audio_inptr,
653 prev_audio_inptr = *inout_raw;
669 assert(inptr + ncep <= acmod->n_mfc_alloc);
670 if (fe_process_frames(acmod->
fe, inout_raw, inout_n_samps,
671 acmod->
mfc_buf + inptr, &ncep) < 0)
675 fwrite(prev_audio_inptr, 2,
676 *inout_raw - prev_audio_inptr, acmod->
rawfh);
677 prev_audio_inptr = *inout_raw;
685 return acmod_process_mfcbuf(acmod);
694 int32 nfeat, ncep, inptr;
699 return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
703 acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
706 orig_n_frames = ncep = nfeat = *inout_n_frames;
710 nfeat += feat_window_size(acmod->
fcb);
712 nfeat -= feat_window_size(acmod->
fcb);
719 acmod_grow_feat_buf(acmod, acmod->
n_feat_alloc + nfeat);
738 int saved_state = acmod->
state;
743 nfeat = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep,
756 *inout_n_frames -= ncep1;
760 acmod->
state = saved_state;
763 nfeat = feat_s2mfc2feat_live(acmod->
fcb, *inout_cep,
773 *inout_n_frames -= ncep;
777 return orig_n_frames - *inout_n_frames;
802 for (i = 0; i < feat_dimension1(acmod->
fcb); ++i)
804 feat[i], feat_dimension2(acmod->
fcb, i) *
sizeof(**feat));
812 acmod_read_senfh_header(
acmod_t *acmod)
818 if (bio_readhdr(acmod->
insenfh, &name, &val, &swap) < 0)
820 for (i = 0; name[i] != NULL; ++i) {
821 if (!strcmp(name[i],
"n_sen")) {
822 if (atoi(val[i]) != bin_mdef_n_sen(acmod->
mdef)) {
823 E_ERROR(
"Number of senones in senone file (%d) does not match mdef (%d)\n",
824 atoi(val[i]), bin_mdef_n_sen(acmod->
mdef));
828 if (!strcmp(name[i],
"logbase")) {
829 if (abs(atof(val[i]) - logmath_get_base(acmod->
lmath)) > 0.001) {
830 E_ERROR(
"Logbase in senone file (%f) does not match acmod (%f)\n",
831 atof(val[i]), logmath_get_base(acmod->
lmath));
837 bio_hdrarg_free(name, val);
840 bio_hdrarg_free(name, val);
854 return acmod_read_senfh_header(acmod);
862 E_ERROR(
"Circular feature buffer cannot be rewound (output frame %d, alloc %d)\n",
893 int16
const *senscr, FILE *senfh)
908 n_active2 = n_active;
909 if (fwrite(&n_active2, 2, 1, senfh) != 1)
911 if (n_active == bin_mdef_n_sen(acmod->
mdef)) {
912 if (fwrite(senscr, 2, n_active, senfh) != n_active)
917 if (fwrite(active, 1, n_active, senfh) != n_active)
919 for (i = n = 0; i < n_active; ++i) {
921 if (fwrite(senscr + n, 2, 1, senfh) != 1)
927 E_ERROR_SYSTEM(
"Failed to write frame to senone file");
935 acmod_read_scores_internal(
acmod_t *acmod)
950 if ((rv = fread(&n_active, 2, 1, senfh)) < 0)
972 for (j = n + 1; j < sen; ++j)
974 if ((rv = fread(acmod->
senone_scores + sen, 2, 1, senfh)) < 0)
981 while (n < bin_mdef_n_sen(acmod->
mdef))
986 E_ERROR_SYSTEM(
"Failed to read frame from senone file");
1007 if ((rv = acmod_read_scores_internal(acmod)) != 1)
1014 E_DEBUG(1,(
"Frame %d has %d active states\n",
1027 calc_frame_idx(
acmod_t *acmod,
int *inout_frame_idx)
1032 if (inout_frame_idx == NULL)
1034 else if (*inout_frame_idx < 0)
1035 frame_idx = acmod->
output_frame + 1 + *inout_frame_idx;
1037 frame_idx = *inout_frame_idx;
1043 calc_feat_idx(
acmod_t *acmod,
int frame_idx)
1045 int n_backfr, feat_idx;
1048 if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1049 E_ERROR(
"Frame %d outside queue of %d frames, %d alloc (%d > %d), cannot score\n",
1066 int frame_idx, feat_idx;
1069 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1072 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1075 if (inout_frame_idx)
1076 *inout_frame_idx = frame_idx;
1084 int frame_idx, feat_idx;
1087 frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1093 if (inout_frame_idx)
1094 *inout_frame_idx = frame_idx;
1099 if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1105 if (acmod_read_scores_internal(acmod) < 0)
1113 ps_mgau_frame_eval(acmod->
mgau,
1122 if (inout_frame_idx)
1123 *inout_frame_idx = frame_idx;
1133 E_DEBUG(1,(
"Frame %d has %d active states\n", frame_idx, acmod->
n_senone_active));
1146 for (i = 0; i < bin_mdef_n_sen(acmod->
mdef); ++i) {
1149 *out_best_senid = i;
1158 if (*senscr < best) {
1160 *out_best_senid = i;
1177 #define MPX_BITVEC_SET(a,h,i) \
1178 if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1179 bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1180 #define NONMPX_BITVEC_SET(a,h,i) \
1181 bitvec_set((a)->senone_active_vec, \
1182 hmm_nonmpx_senid(h,i))
1191 if (hmm_is_mpx(hmm)) {
1192 switch (hmm_n_emit_state(hmm)) {
1194 MPX_BITVEC_SET(acmod, hmm, 4);
1195 MPX_BITVEC_SET(acmod, hmm, 3);
1197 MPX_BITVEC_SET(acmod, hmm, 2);
1198 MPX_BITVEC_SET(acmod, hmm, 1);
1199 MPX_BITVEC_SET(acmod, hmm, 0);
1202 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1203 MPX_BITVEC_SET(acmod, hmm, i);
1208 switch (hmm_n_emit_state(hmm)) {
1210 NONMPX_BITVEC_SET(acmod, hmm, 4);
1211 NONMPX_BITVEC_SET(acmod, hmm, 3);
1213 NONMPX_BITVEC_SET(acmod, hmm, 2);
1214 NONMPX_BITVEC_SET(acmod, hmm, 1);
1215 NONMPX_BITVEC_SET(acmod, hmm, 0);
1218 for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1219 NONMPX_BITVEC_SET(acmod, hmm, i);
1228 int32 w, l, n,
b, total_dists, total_words, extra_bits;
1231 total_dists = bin_mdef_n_sen(acmod->
mdef);
1236 total_words = total_dists / BITVEC_BITS;
1237 extra_bits = total_dists % BITVEC_BITS;
1242 for (b = 0; b < BITVEC_BITS; ++
b) {
1243 if (*flagptr & (1UL << b)) {
1244 int32 sen = w * BITVEC_BITS +
b;
1245 int32 delta = sen - l;
1248 while (delta > 255) {
1258 for (b = 0; b < extra_bits; ++
b) {
1259 if (*flagptr & (1UL << b)) {
1260 int32 sen = w * BITVEC_BITS +
b;
1261 int32 delta = sen - l;
1264 while (delta > 255) {
1274 E_DEBUG(1, (
"acmod_flags2list: %d active in frame %d\n",