SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * feat.c -- Feature vector description and cepstra->feature computation. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.22 2006/02/23 03:59:40 arthchan2003 00050 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc. 00051 * 00052 * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003 00053 * Free stuffs in cmn and feat corectly. 00054 * 00055 * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003 00056 * Add message to show the directory which the feature is searched for. 00057 * 00058 * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003 00059 * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point. 00060 * 00061 * Revision 1.21 2005/06/22 03:29:35 arthchan2003 00062 * Makefile.am s for all subdirectory of libs3decoder/ 00063 * 00064 * Revision 1.4 2005/04/21 23:50:26 archan 00065 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00066 * 00067 * Revision 1.3 2005/03/30 01:22:46 archan 00068 * Fixed mistakes in last updates. Add 00069 * 00070 * 00071 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) 00072 * Adding feat_free() to free allocated memory 00073 * 00074 * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University 00075 * Modified feat_s2mfc2feat_block() to handle empty buffers at 00076 * the end of an utterance 00077 * 00078 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University 00079 * Added feat_s2mfc2feat_block() to allow feature computation 00080 * from sequences of blocks of cepstral vectors 00081 * 00082 * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00083 * Major changes to accommodate arbitrary feature input types. Added 00084 * feat_read(), moved various cep2feat functions from other files into 00085 * this one. Also, made this module object-oriented with the feat_t type. 00086 * Changed definition of s2mfc_read to let the caller manage MFC buffers. 00087 * 00088 * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00089 * Added unistd.h include. 00090 * 00091 * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00092 * Added check for sf argument to s2mfc_read being within file size. 00093 * 00094 * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00095 * Added sf, ef parameters to s2mfc_read(). 00096 * 00097 * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00098 * Added feat_cepsize(). 00099 * Added different feature-handling (s2_4x, s3_1x39 at this point). 00100 * Moved feature-dependent functions to feature-dependent files. 00101 * 00102 * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00103 * Moved constant declarations from feat.h into here. 00104 * 00105 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00106 * Created. 00107 */ 00108 00109 00110 /* 00111 * This module encapsulates different feature streams used by the Sphinx group. New 00112 * stream types can be added by augmenting feat_init() and providing an accompanying 00113 * compute_feat function. It also provides a "generic" feature vector definition for 00114 * handling "arbitrary" speech input feature types (see the last section in feat_init()). 00115 * In this case the speech input data should already be feature vectors; no computation, 00116 * such as MFC->feature conversion, is available or needed. 00117 */ 00118 00119 #include <assert.h> 00120 #include <string.h> 00121 #ifdef HAVE_CONFIG_H 00122 #include <config.h> 00123 #endif 00124 00125 #ifdef _MSC_VER 00126 #pragma warning (disable: 4244 4996) 00127 #endif 00128 00129 #include "sphinxbase/fe.h" 00130 #include "sphinxbase/feat.h" 00131 #include "sphinxbase/bio.h" 00132 #include "sphinxbase/pio.h" 00133 #include "sphinxbase/cmn.h" 00134 #include "sphinxbase/agc.h" 00135 #include "sphinxbase/err.h" 00136 #include "sphinxbase/ckd_alloc.h" 00137 #include "sphinxbase/prim_type.h" 00138 #include "sphinxbase/glist.h" 00139 00140 #define FEAT_VERSION "1.0" 00141 #define FEAT_DCEP_WIN 2 00142 00143 #ifdef DUMP_FEATURES 00144 static void 00145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text) 00146 { 00147 int32 i, j; 00148 00149 E_INFO("%s\n", text); 00150 for (i = 0; i < nfr; i++) { 00151 for (j = 0; j < fcb->cepsize; j++) { 00152 fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j])); 00153 } 00154 fprintf(stderr, "\n"); 00155 } 00156 } 00157 static void 00158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text) 00159 { 00160 E_INFO("%s\n", text); 00161 feat_print(fcb, feat, nfr, stderr); 00162 } 00163 #else /* !DUMP_FEATURES */ 00164 #define cep_dump_dbg(fcb,mfc,nfr,text) 00165 #define feat_print_dbg(fcb,mfc,nfr,text) 00166 #endif 00167 00168 int32 ** 00169 parse_subvecs(char const *str) 00170 { 00171 char const *strp; 00172 int32 n, n2, l; 00173 glist_t dimlist; /* List of dimensions in one subvector */ 00174 glist_t veclist; /* List of dimlists (subvectors) */ 00175 int32 **subvec; 00176 gnode_t *gn, *gn2; 00177 00178 veclist = NULL; 00179 00180 strp = str; 00181 for (;;) { 00182 dimlist = NULL; 00183 00184 for (;;) { 00185 if (sscanf(strp, "%d%n", &n, &l) != 1) 00186 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, 00187 strp - str); 00188 strp += l; 00189 00190 if (*strp == '-') { 00191 strp++; 00192 00193 if (sscanf(strp, "%d%n", &n2, &l) != 1) 00194 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, 00195 strp - str); 00196 strp += l; 00197 } 00198 else 00199 n2 = n; 00200 00201 if ((n < 0) || (n > n2)) 00202 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str, 00203 strp - str); 00204 00205 for (; n <= n2; n++) { 00206 gnode_t *gn; 00207 for (gn = dimlist; gn; gn = gnode_next(gn)) 00208 if (gnode_int32(gn) == n) 00209 break; 00210 if (gn != NULL) 00211 E_FATAL("'%s': Duplicate dimension ending @pos %d\n", 00212 str, strp - str); 00213 00214 dimlist = glist_add_int32(dimlist, n); 00215 } 00216 00217 if ((*strp == '\0') || (*strp == '/')) 00218 break; 00219 00220 if (*strp != ',') 00221 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str); 00222 00223 strp++; 00224 } 00225 00226 veclist = glist_add_ptr(veclist, (void *) dimlist); 00227 00228 if (*strp == '\0') 00229 break; 00230 00231 assert(*strp == '/'); 00232 strp++; 00233 } 00234 00235 /* Convert the glists to arrays; remember the glists are in reverse order of the input! */ 00236 n = glist_count(veclist); /* #Subvectors */ 00237 subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */ 00238 subvec[n] = NULL; /* sentinel */ 00239 00240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) { 00241 gn2 = (glist_t) gnode_ptr(gn); 00242 00243 n2 = glist_count(gn2); /* Length of this subvector */ 00244 if (n2 <= 0) 00245 E_FATAL("'%s': 0-length subvector\n", str); 00246 00247 subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */ 00248 subvec[n][n2] = -1; /* sentinel */ 00249 00250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2) 00251 subvec[n][n2] = gnode_int32(gn2); 00252 assert((n2 < 0) && (!gn2)); 00253 } 00254 assert((n < 0) && (!gn)); 00255 00256 /* Free the glists */ 00257 for (gn = veclist; gn; gn = gnode_next(gn)) { 00258 gn2 = (glist_t) gnode_ptr(gn); 00259 glist_free(gn2); 00260 } 00261 glist_free(veclist); 00262 00263 return subvec; 00264 } 00265 00266 void 00267 subvecs_free(int32 **subvecs) 00268 { 00269 int32 **sv; 00270 00271 for (sv = subvecs; sv && *sv; ++sv) 00272 ckd_free(*sv); 00273 ckd_free(subvecs); 00274 } 00275 00276 int 00277 feat_set_subvecs(feat_t *fcb, int32 **subvecs) 00278 { 00279 int32 **sv; 00280 int32 n_sv, n_dim, i; 00281 00282 if (subvecs == NULL) { 00283 subvecs_free(fcb->subvecs); 00284 ckd_free(fcb->sv_buf); 00285 ckd_free(fcb->sv_len); 00286 fcb->n_sv = 0; 00287 fcb->subvecs = NULL; 00288 fcb->sv_len = NULL; 00289 fcb->sv_buf = NULL; 00290 fcb->sv_dim = 0; 00291 return 0; 00292 } 00293 00294 if (fcb->n_stream != 1) { 00295 E_ERROR("Subvector specifications require single-stream features!"); 00296 return -1; 00297 } 00298 00299 n_sv = 0; 00300 n_dim = 0; 00301 for (sv = subvecs; sv && *sv; ++sv) { 00302 int32 *d; 00303 00304 for (d = *sv; d && *d != -1; ++d) { 00305 ++n_dim; 00306 } 00307 ++n_sv; 00308 } 00309 if (n_dim > feat_dimension(fcb)) { 00310 E_ERROR("Total dimensionality of subvector specification %d " 00311 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb)); 00312 return -1; 00313 } 00314 00315 fcb->n_sv = n_sv; 00316 fcb->subvecs = subvecs; 00317 fcb->sv_len = ckd_calloc(n_sv, sizeof(*fcb->sv_len)); 00318 fcb->sv_buf = ckd_calloc(n_dim, sizeof(*fcb->sv_buf)); 00319 fcb->sv_dim = n_dim; 00320 for (i = 0; i < n_sv; ++i) { 00321 int32 *d; 00322 for (d = subvecs[i]; d && *d != -1; ++d) { 00323 ++fcb->sv_len[i]; 00324 } 00325 } 00326 00327 return 0; 00328 } 00329 00333 static void 00334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) 00335 { 00336 uint32 i; 00337 00338 if (fcb->subvecs == NULL) 00339 return; 00340 for (i = 0; i < nfr; ++i) { 00341 mfcc_t *out; 00342 int32 j; 00343 00344 out = fcb->sv_buf; 00345 for (j = 0; j < fcb->n_sv; ++j) { 00346 int32 *d; 00347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) { 00348 *out++ = inout_feat[i][0][*d]; 00349 } 00350 } 00351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf)); 00352 } 00353 } 00354 00355 mfcc_t *** 00356 feat_array_alloc(feat_t * fcb, int32 nfr) 00357 { 00358 int32 i, j, k; 00359 mfcc_t *data, *d, ***feat; 00360 00361 assert(fcb); 00362 assert(nfr > 0); 00363 assert(feat_dimension(fcb) > 0); 00364 00365 /* Make sure to use the dimensionality of the features *before* 00366 LDA and subvector projection. */ 00367 k = 0; 00368 for (i = 0; i < fcb->n_stream; ++i) 00369 k += fcb->stream_len[i]; 00370 assert(k >= feat_dimension(fcb)); 00371 assert(k >= fcb->sv_dim); 00372 00373 feat = 00374 (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *)); 00375 data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t)); 00376 00377 for (i = 0; i < nfr; i++) { 00378 d = data + i * k; 00379 for (j = 0; j < feat_dimension1(fcb); j++) { 00380 feat[i][j] = d; 00381 d += feat_dimension2(fcb, j); 00382 } 00383 } 00384 00385 return feat; 00386 } 00387 00388 mfcc_t *** 00389 feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr) 00390 { 00391 int32 i, k, cf; 00392 mfcc_t*** new_feat; 00393 00394 assert(fcb); 00395 assert(nfr > 0); 00396 assert(ofr > 0); 00397 assert(feat_dimension(fcb) > 0); 00398 00399 /* Make sure to use the dimensionality of the features *before* 00400 LDA and subvector projection. */ 00401 k = 0; 00402 for (i = 0; i < fcb->n_stream; ++i) 00403 k += fcb->stream_len[i]; 00404 assert(k >= feat_dimension(fcb)); 00405 assert(k >= fcb->sv_dim); 00406 00407 new_feat = feat_array_alloc(fcb, nfr); 00408 00409 cf = (nfr < ofr) ? nfr : ofr; 00410 memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t)); 00411 00412 feat_array_free(old_feat); 00413 00414 return new_feat; 00415 } 00416 00417 void 00418 feat_array_free(mfcc_t ***feat) 00419 { 00420 ckd_free(feat[0][0]); 00421 ckd_free_2d((void **)feat); 00422 } 00423 00424 static void 00425 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00426 { 00427 mfcc_t *f; 00428 mfcc_t *w, *_w; 00429 mfcc_t *w1, *w_1, *_w1, *_w_1; 00430 mfcc_t d1, d2; 00431 int32 i, j; 00432 00433 assert(fcb); 00434 assert(feat_cepsize(fcb) == 13); 00435 assert(feat_n_stream(fcb) == 4); 00436 assert(feat_stream_len(fcb, 0) == 12); 00437 assert(feat_stream_len(fcb, 1) == 24); 00438 assert(feat_stream_len(fcb, 2) == 3); 00439 assert(feat_stream_len(fcb, 3) == 12); 00440 assert(feat_window_size(fcb) == 4); 00441 00442 /* CEP; skip C0 */ 00443 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); 00444 00445 /* 00446 * DCEP(SHORT): mfc[2] - mfc[-2] 00447 * DCEP(LONG): mfc[4] - mfc[-4] 00448 */ 00449 w = mfc[2] + 1; /* +1 to skip C0 */ 00450 _w = mfc[-2] + 1; 00451 00452 f = feat[1]; 00453 for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ 00454 f[i] = w[i] - _w[i]; 00455 00456 w = mfc[4] + 1; /* +1 to skip C0 */ 00457 _w = mfc[-4] + 1; 00458 00459 for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ 00460 f[i] = w[j] - _w[j]; 00461 00462 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ 00463 w1 = mfc[3] + 1; /* Final +1 to skip C0 */ 00464 _w1 = mfc[-1] + 1; 00465 w_1 = mfc[1] + 1; 00466 _w_1 = mfc[-3] + 1; 00467 00468 f = feat[3]; 00469 for (i = 0; i < feat_cepsize(fcb) - 1; i++) { 00470 d1 = w1[i] - _w1[i]; 00471 d2 = w_1[i] - _w_1[i]; 00472 00473 f[i] = d1 - d2; 00474 } 00475 00476 /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ 00477 f = feat[2]; 00478 f[0] = mfc[0][0]; 00479 f[1] = mfc[2][0] - mfc[-2][0]; 00480 00481 d1 = mfc[3][0] - mfc[-1][0]; 00482 d2 = mfc[1][0] - mfc[-3][0]; 00483 f[2] = d1 - d2; 00484 } 00485 00486 00487 static void 00488 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00489 { 00490 mfcc_t *f; 00491 mfcc_t *w, *_w; 00492 mfcc_t *w1, *w_1, *_w1, *_w_1; 00493 mfcc_t d1, d2; 00494 int32 i; 00495 00496 assert(fcb); 00497 assert(feat_cepsize(fcb) == 13); 00498 assert(feat_n_stream(fcb) == 1); 00499 assert(feat_stream_len(fcb, 0) == 39); 00500 assert(feat_window_size(fcb) == 3); 00501 00502 /* CEP; skip C0 */ 00503 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); 00504 /* 00505 * DCEP: mfc[2] - mfc[-2]; 00506 */ 00507 f = feat[0] + feat_cepsize(fcb) - 1; 00508 w = mfc[2] + 1; /* +1 to skip C0 */ 00509 _w = mfc[-2] + 1; 00510 00511 for (i = 0; i < feat_cepsize(fcb) - 1; i++) 00512 f[i] = w[i] - _w[i]; 00513 00514 /* POW: C0, DC0, D2C0 */ 00515 f += feat_cepsize(fcb) - 1; 00516 00517 f[0] = mfc[0][0]; 00518 f[1] = mfc[2][0] - mfc[-2][0]; 00519 00520 d1 = mfc[3][0] - mfc[-1][0]; 00521 d2 = mfc[1][0] - mfc[-3][0]; 00522 f[2] = d1 - d2; 00523 00524 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ 00525 f += 3; 00526 00527 w1 = mfc[3] + 1; /* Final +1 to skip C0 */ 00528 _w1 = mfc[-1] + 1; 00529 w_1 = mfc[1] + 1; 00530 _w_1 = mfc[-3] + 1; 00531 00532 for (i = 0; i < feat_cepsize(fcb) - 1; i++) { 00533 d1 = w1[i] - _w1[i]; 00534 d2 = w_1[i] - _w_1[i]; 00535 00536 f[i] = d1 - d2; 00537 } 00538 } 00539 00540 00541 static void 00542 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00543 { 00544 assert(fcb); 00545 assert(feat_n_stream(fcb) == 1); 00546 assert(feat_window_size(fcb) == 0); 00547 00548 /* CEP */ 00549 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00550 } 00551 00552 static void 00553 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00554 { 00555 mfcc_t *f; 00556 mfcc_t *w, *_w; 00557 int32 i; 00558 00559 assert(fcb); 00560 assert(feat_n_stream(fcb) == 1); 00561 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2); 00562 assert(feat_window_size(fcb) == 2); 00563 00564 /* CEP */ 00565 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00566 00567 /* 00568 * DCEP: mfc[2] - mfc[-2]; 00569 */ 00570 f = feat[0] + feat_cepsize(fcb); 00571 w = mfc[2]; 00572 _w = mfc[-2]; 00573 00574 for (i = 0; i < feat_cepsize(fcb); i++) 00575 f[i] = w[i] - _w[i]; 00576 } 00577 00578 static void 00579 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00580 { 00581 mfcc_t *f; 00582 mfcc_t *w, *_w; 00583 mfcc_t *w1, *w_1, *_w1, *_w_1; 00584 mfcc_t d1, d2; 00585 int32 i; 00586 00587 assert(fcb); 00588 assert(feat_n_stream(fcb) == 1); 00589 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3); 00590 assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1); 00591 00592 /* CEP */ 00593 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00594 00595 /* 00596 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; 00597 */ 00598 f = feat[0] + feat_cepsize(fcb); 00599 w = mfc[FEAT_DCEP_WIN]; 00600 _w = mfc[-FEAT_DCEP_WIN]; 00601 00602 for (i = 0; i < feat_cepsize(fcb); i++) 00603 f[i] = w[i] - _w[i]; 00604 00605 /* 00606 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 00607 * where w = FEAT_DCEP_WIN 00608 */ 00609 f += feat_cepsize(fcb); 00610 00611 w1 = mfc[FEAT_DCEP_WIN + 1]; 00612 _w1 = mfc[-FEAT_DCEP_WIN + 1]; 00613 w_1 = mfc[FEAT_DCEP_WIN - 1]; 00614 _w_1 = mfc[-FEAT_DCEP_WIN - 1]; 00615 00616 for (i = 0; i < feat_cepsize(fcb); i++) { 00617 d1 = w1[i] - _w1[i]; 00618 d2 = w_1[i] - _w_1[i]; 00619 00620 f[i] = d1 - d2; 00621 } 00622 } 00623 00624 static void 00625 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00626 { 00627 mfcc_t *f; 00628 mfcc_t *w, *_w; 00629 mfcc_t *w1, *w_1, *_w1, *_w_1; 00630 mfcc_t d1, d2; 00631 int32 i; 00632 00633 assert(fcb); 00634 assert(feat_n_stream(fcb) == 1); 00635 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4); 00636 assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); 00637 00638 /* CEP */ 00639 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00640 00641 /* 00642 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; 00643 */ 00644 f = feat[0] + feat_cepsize(fcb); 00645 w = mfc[FEAT_DCEP_WIN]; 00646 _w = mfc[-FEAT_DCEP_WIN]; 00647 00648 for (i = 0; i < feat_cepsize(fcb); i++) 00649 f[i] = w[i] - _w[i]; 00650 00651 /* 00652 * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; 00653 */ 00654 f += feat_cepsize(fcb); 00655 w = mfc[FEAT_DCEP_WIN * 2]; 00656 _w = mfc[-FEAT_DCEP_WIN * 2]; 00657 00658 for (i = 0; i < feat_cepsize(fcb); i++) 00659 f[i] = w[i] - _w[i]; 00660 00661 /* 00662 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 00663 * where w = FEAT_DCEP_WIN 00664 */ 00665 f += feat_cepsize(fcb); 00666 00667 w1 = mfc[FEAT_DCEP_WIN + 1]; 00668 _w1 = mfc[-FEAT_DCEP_WIN + 1]; 00669 w_1 = mfc[FEAT_DCEP_WIN - 1]; 00670 _w_1 = mfc[-FEAT_DCEP_WIN - 1]; 00671 00672 for (i = 0; i < feat_cepsize(fcb); i++) { 00673 d1 = w1[i] - _w1[i]; 00674 d2 = w_1[i] - _w_1[i]; 00675 00676 f[i] = d1 - d2; 00677 } 00678 } 00679 00680 static void 00681 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00682 { 00683 int32 win, i, j; 00684 00685 win = feat_window_size(fcb); 00686 00687 /* Concatenate input features */ 00688 for (i = -win; i <= win; ++i) { 00689 uint32 spos = 0; 00690 00691 for (j = 0; j < feat_n_stream(fcb); ++j) { 00692 uint32 stream_len; 00693 00694 /* Unscale the stream length by the window. */ 00695 stream_len = feat_stream_len(fcb, j) / (2 * win + 1); 00696 memcpy(feat[j] + ((i + win) * stream_len), 00697 mfc[i] + spos, 00698 stream_len * sizeof(mfcc_t)); 00699 spos += stream_len; 00700 } 00701 } 00702 } 00703 00704 feat_t * 00705 feat_init(char const *type, cmn_type_t cmn, int32 varnorm, 00706 agc_type_t agc, int32 breport, int32 cepsize) 00707 { 00708 feat_t *fcb; 00709 00710 if (cepsize == 0) 00711 cepsize = 13; 00712 if (breport) 00713 E_INFO 00714 ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", 00715 type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); 00716 00717 fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); 00718 fcb->refcount = 1; 00719 fcb->name = (char *) ckd_salloc(type); 00720 if (strcmp(type, "s2_4x") == 0) { 00721 /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ 00722 if (cepsize != 13) { 00723 E_ERROR("s2_4x features require cepsize == 13\n"); 00724 ckd_free(fcb); 00725 return NULL; 00726 } 00727 fcb->cepsize = 13; 00728 fcb->n_stream = 4; 00729 fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32)); 00730 fcb->stream_len[0] = 12; 00731 fcb->stream_len[1] = 24; 00732 fcb->stream_len[2] = 3; 00733 fcb->stream_len[3] = 12; 00734 fcb->out_dim = 51; 00735 fcb->window_size = 4; 00736 fcb->compute_feat = feat_s2_4x_cep2feat; 00737 } 00738 else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) { 00739 /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ 00740 if (cepsize != 13) { 00741 E_ERROR("s2_4x features require cepsize == 13\n"); 00742 ckd_free(fcb); 00743 return NULL; 00744 } 00745 fcb->cepsize = 13; 00746 fcb->n_stream = 1; 00747 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00748 fcb->stream_len[0] = 39; 00749 fcb->out_dim = 39; 00750 fcb->window_size = 3; 00751 fcb->compute_feat = feat_s3_1x39_cep2feat; 00752 } 00753 else if (strncmp(type, "1s_c_d_dd", 9) == 0) { 00754 fcb->cepsize = cepsize; 00755 fcb->n_stream = 1; 00756 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00757 fcb->stream_len[0] = cepsize * 3; 00758 fcb->out_dim = cepsize * 3; 00759 fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ 00760 fcb->compute_feat = feat_1s_c_d_dd_cep2feat; 00761 } 00762 else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { 00763 fcb->cepsize = cepsize; 00764 fcb->n_stream = 1; 00765 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00766 fcb->stream_len[0] = cepsize * 4; 00767 fcb->out_dim = cepsize * 4; 00768 fcb->window_size = FEAT_DCEP_WIN * 2; 00769 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; 00770 } 00771 else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { 00772 /* 1-stream cep/dcep */ 00773 fcb->cepsize = cepsize; 00774 fcb->n_stream = 1; 00775 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00776 fcb->stream_len[0] = feat_cepsize(fcb) * 2; 00777 fcb->out_dim = fcb->stream_len[0]; 00778 fcb->window_size = 2; 00779 fcb->compute_feat = feat_s3_cep_dcep; 00780 } 00781 else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { 00782 /* 1-stream cep */ 00783 fcb->cepsize = cepsize; 00784 fcb->n_stream = 1; 00785 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00786 fcb->stream_len[0] = feat_cepsize(fcb); 00787 fcb->out_dim = fcb->stream_len[0]; 00788 fcb->window_size = 0; 00789 fcb->compute_feat = feat_s3_cep; 00790 } 00791 else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { 00792 /* 1-stream cep with frames concatenated, so called cepwin features */ 00793 if (strncmp(type, "1s_3c", 5) == 0) 00794 fcb->window_size = 3; 00795 else 00796 fcb->window_size = 4; 00797 00798 fcb->cepsize = cepsize; 00799 fcb->n_stream = 1; 00800 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); 00801 fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); 00802 fcb->out_dim = fcb->stream_len[0]; 00803 fcb->compute_feat = feat_copy; 00804 } 00805 else { 00806 int32 i, l, k; 00807 char *strp; 00808 char *mtype = ckd_salloc(type); 00809 char *wd = ckd_salloc(type); 00810 /* 00811 * Generic definition: Format should be %d,%d,%d,...,%d (i.e., 00812 * comma separated list of feature stream widths; #items = 00813 * #streams). An optional window size (frames will be 00814 * concatenated) is also allowed, which can be specified with 00815 * a colon after the list of feature streams. 00816 */ 00817 l = strlen(mtype); 00818 k = 0; 00819 for (i = 1; i < l - 1; i++) { 00820 if (mtype[i] == ',') { 00821 mtype[i] = ' '; 00822 k++; 00823 } 00824 else if (mtype[i] == ':') { 00825 mtype[i] = '\0'; 00826 fcb->window_size = atoi(mtype + i + 1); 00827 break; 00828 } 00829 } 00830 k++; /* Presumably there are (#commas+1) streams */ 00831 fcb->n_stream = k; 00832 fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32)); 00833 00834 /* Scan individual feature stream lengths */ 00835 strp = mtype; 00836 i = 0; 00837 fcb->out_dim = 0; 00838 fcb->cepsize = 0; 00839 while (sscanf(strp, "%s%n", wd, &l) == 1) { 00840 strp += l; 00841 if ((i >= fcb->n_stream) 00842 || (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1) 00843 || (fcb->stream_len[i] <= 0)) 00844 E_FATAL("Bad feature type argument\n"); 00845 /* Input size before windowing */ 00846 fcb->cepsize += fcb->stream_len[i]; 00847 if (fcb->window_size > 0) 00848 fcb->stream_len[i] *= (fcb->window_size * 2 + 1); 00849 /* Output size after windowing */ 00850 fcb->out_dim += fcb->stream_len[i]; 00851 i++; 00852 } 00853 if (i != fcb->n_stream) 00854 E_FATAL("Bad feature type argument\n"); 00855 if (fcb->cepsize != cepsize) 00856 E_FATAL("Bad feature type argument\n"); 00857 00858 /* Input is already the feature stream */ 00859 fcb->compute_feat = feat_copy; 00860 ckd_free(mtype); 00861 ckd_free(wd); 00862 } 00863 00864 if (cmn != CMN_NONE) 00865 fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); 00866 fcb->cmn = cmn; 00867 fcb->varnorm = varnorm; 00868 if (agc != AGC_NONE) { 00869 fcb->agc_struct = agc_init(); 00870 /* 00871 * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things 00872 * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY 00873 * switches to EMAX 00874 */ 00875 /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ 00876 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); 00877 } 00878 fcb->agc = agc; 00879 /* 00880 * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() 00881 */ 00882 fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, 00883 feat_cepsize(fcb), 00884 sizeof(mfcc_t)); 00885 /* This one is actually just an array of pointers to "flatten out" 00886 * wraparounds. */ 00887 fcb->tmpcepbuf = ckd_calloc(2 * feat_window_size(fcb) + 1, 00888 sizeof(*fcb->tmpcepbuf)); 00889 00890 return fcb; 00891 } 00892 00893 00894 void 00895 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) 00896 { 00897 int32 i, j, k; 00898 00899 for (i = 0; i < nfr; i++) { 00900 fprintf(fp, "%8d:\n", i); 00901 00902 for (j = 0; j < feat_dimension1(fcb); j++) { 00903 fprintf(fp, "\t%2d:", j); 00904 00905 for (k = 0; k < feat_dimension2(fcb, j); k++) 00906 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); 00907 fprintf(fp, "\n"); 00908 } 00909 } 00910 00911 fflush(fp); 00912 } 00913 00914 static void 00915 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) 00916 { 00917 cmn_type_t cmn_type = fcb->cmn; 00918 00919 if (!(beginutt && endutt) 00920 && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */ 00921 cmn_type = CMN_PRIOR; 00922 00923 switch (cmn_type) { 00924 case CMN_CURRENT: 00925 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr); 00926 break; 00927 case CMN_PRIOR: 00928 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr); 00929 if (endutt) 00930 cmn_prior_update(fcb->cmn_struct); 00931 break; 00932 default: 00933 ; 00934 } 00935 cep_dump_dbg(fcb, mfc, nfr, "After CMN"); 00936 } 00937 00938 static void 00939 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) 00940 { 00941 agc_type_t agc_type = fcb->agc; 00942 00943 if (!(beginutt && endutt) 00944 && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */ 00945 agc_type = AGC_EMAX; 00946 00947 switch (agc_type) { 00948 case AGC_MAX: 00949 agc_max(fcb->agc_struct, mfc, nfr); 00950 break; 00951 case AGC_EMAX: 00952 agc_emax(fcb->agc_struct, mfc, nfr); 00953 if (endutt) 00954 agc_emax_update(fcb->agc_struct); 00955 break; 00956 case AGC_NOISE: 00957 agc_noise(fcb->agc_struct, mfc, nfr); 00958 break; 00959 default: 00960 ; 00961 } 00962 cep_dump_dbg(fcb, mfc, nfr, "After AGC"); 00963 } 00964 00965 static void 00966 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat) 00967 { 00968 int32 i; 00969 00970 cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)"); 00971 00972 /* Create feature vectors */ 00973 for (i = win; i < nfr - win; i++) { 00974 fcb->compute_feat(fcb, mfc + i, feat[i - win]); 00975 } 00976 00977 feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation"); 00978 00979 if (fcb->lda) { 00980 feat_lda_transform(fcb, feat, nfr - win * 2); 00981 feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA"); 00982 } 00983 00984 if (fcb->subvecs) { 00985 feat_subvec_project(fcb, feat, nfr - win * 2); 00986 feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection"); 00987 } 00988 } 00989 00990 01003 static int32 01004 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win, 01005 int32 sf, int32 ef, 01006 mfcc_t ***out_mfc, 01007 int32 maxfr, 01008 int32 cepsize) 01009 { 01010 FILE *fp; 01011 int32 n_float32; 01012 float32 *float_feat; 01013 struct stat statbuf; 01014 int32 i, n, byterev; 01015 int32 start_pad, end_pad; 01016 mfcc_t **mfc; 01017 01018 /* Initialize the output pointer to NULL, so that any attempts to 01019 free() it if we fail before allocating it will not segfault! */ 01020 if (out_mfc) 01021 *out_mfc = NULL; 01022 E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); 01023 if (ef >= 0 && ef <= sf) { 01024 E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); 01025 return -1; 01026 } 01027 01028 /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ 01029 if ((stat_retry(file, &statbuf) < 0) 01030 || ((fp = fopen(file, "rb")) == NULL)) { 01031 E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno)); 01032 return -1; 01033 } 01034 01035 /* Read #floats in header */ 01036 if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { 01037 E_ERROR("%s: fread(#floats) failed\n", file); 01038 fclose(fp); 01039 return -1; 01040 } 01041 01042 /* Check if n_float32 matches file size */ 01043 byterev = 0; 01044 if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ 01045 n = n_float32; 01046 SWAP_INT32(&n); 01047 01048 if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ 01049 E_ERROR 01050 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", 01051 file, n_float32, n_float32, statbuf.st_size, 01052 statbuf.st_size); 01053 fclose(fp); 01054 return -1; 01055 } 01056 01057 n_float32 = n; 01058 byterev = 1; 01059 } 01060 if (n_float32 <= 0) { 01061 E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); 01062 fclose(fp); 01063 return -1; 01064 } 01065 01066 /* Convert n to #frames of input */ 01067 n = n_float32 / cepsize; 01068 if (n * cepsize != n_float32) { 01069 E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, 01070 cepsize); 01071 fclose(fp); 01072 return -1; 01073 } 01074 01075 /* Check start and end frames */ 01076 if (sf > 0) { 01077 if (sf >= n) { 01078 E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, 01079 sf, n); 01080 fclose(fp); 01081 return -1; 01082 } 01083 } 01084 if (ef < 0) 01085 ef = n-1; 01086 else if (ef >= n) { 01087 E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", 01088 file, ef, n); 01089 ef = n-1; 01090 } 01091 01092 /* Add window to start and end frames */ 01093 sf -= win; 01094 ef += win; 01095 if (sf < 0) { 01096 start_pad = -sf; 01097 sf = 0; 01098 } 01099 else 01100 start_pad = 0; 01101 if (ef >= n) { 01102 end_pad = ef - n + 1; 01103 ef = n - 1; 01104 } 01105 else 01106 end_pad = 0; 01107 01108 /* Limit n if indicated by [sf..ef] */ 01109 if ((ef - sf + 1) < n) 01110 n = (ef - sf + 1); 01111 if (maxfr > 0 && n + start_pad + end_pad > maxfr) { 01112 E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", 01113 file, maxfr, n + start_pad + end_pad); 01114 fclose(fp); 01115 return -1; 01116 } 01117 01118 /* If no output buffer was supplied, then skip the actual data reading. */ 01119 if (out_mfc != NULL) { 01120 /* Position at desired start frame and read actual MFC data */ 01121 mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); 01122 if (sf > 0) 01123 fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); 01124 n_float32 = n * cepsize; 01125 #ifdef FIXED_POINT 01126 float_feat = ckd_calloc(n_float32, sizeof(float32)); 01127 #else 01128 float_feat = mfc[start_pad]; 01129 #endif 01130 if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { 01131 E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); 01132 ckd_free_2d(mfc); 01133 fclose(fp); 01134 return -1; 01135 } 01136 if (byterev) { 01137 for (i = 0; i < n_float32; i++) { 01138 SWAP_FLOAT32(&float_feat[i]); 01139 } 01140 } 01141 #ifdef FIXED_POINT 01142 for (i = 0; i < n_float32; ++i) { 01143 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); 01144 } 01145 ckd_free(float_feat); 01146 #endif 01147 01148 /* Normalize */ 01149 feat_cmn(fcb, mfc + start_pad, n, 1, 1); 01150 feat_agc(fcb, mfc + start_pad, n, 1, 1); 01151 01152 /* Replicate start and end frames if necessary. */ 01153 for (i = 0; i < start_pad; ++i) 01154 memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); 01155 for (i = 0; i < end_pad; ++i) 01156 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], 01157 cepsize * sizeof(mfcc_t)); 01158 01159 *out_mfc = mfc; 01160 } 01161 01162 fclose(fp); 01163 return n + start_pad + end_pad; 01164 } 01165 01166 01167 01168 int32 01169 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext, 01170 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr) 01171 { 01172 char *path; 01173 char *ps = "/"; 01174 int32 win, nfr; 01175 int32 file_length, cepext_length, path_length = 0; 01176 mfcc_t **mfc; 01177 01178 if (fcb->cepsize <= 0) { 01179 E_ERROR("Bad cepsize: %d\n", fcb->cepsize); 01180 return -1; 01181 } 01182 01183 if (cepext == NULL) 01184 cepext = ""; 01185 01186 /* 01187 * Create mfc filename, combining file, dir and extension if 01188 * necessary 01189 */ 01190 01191 /* 01192 * First we decide about the path. If dir is defined, then use 01193 * it. Otherwise assume the filename already contains the path. 01194 */ 01195 if (dir == NULL) { 01196 dir = ""; 01197 ps = ""; 01198 /* 01199 * This is not true but some 3rd party apps 01200 * may parse the output explicitly checking for this line 01201 */ 01202 E_INFO("At directory . (current directory)\n"); 01203 } 01204 else { 01205 E_INFO("At directory %s\n", dir); 01206 /* 01207 * Do not forget the path separator! 01208 */ 01209 path_length += strlen(dir) + 1; 01210 } 01211 01212 /* 01213 * Include cepext, if it's not already part of the filename. 01214 */ 01215 file_length = strlen(file); 01216 cepext_length = strlen(cepext); 01217 if ((file_length > cepext_length) 01218 && (strcmp(file + file_length - cepext_length, cepext) == 0)) { 01219 cepext = ""; 01220 cepext_length = 0; 01221 } 01222 01223 /* 01224 * Do not forget the '\0' 01225 */ 01226 path_length += file_length + cepext_length + 1; 01227 path = (char*) ckd_calloc(path_length, sizeof(char)); 01228 01229 #ifdef HAVE_SNPRINTF 01230 /* 01231 * Paranoia is our best friend... 01232 */ 01233 while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) { 01234 path_length = file_length; 01235 path = (char*) ckd_realloc(path, path_length * sizeof(char)); 01236 } 01237 #else 01238 sprintf(path, "%s%s%s%s", dir, ps, file, cepext); 01239 #endif 01240 01241 win = feat_window_size(fcb); 01242 /* Pad maxfr with win, so we read enough raw feature data to 01243 * calculate the requisite number of dynamic features. */ 01244 if (maxfr >= 0) 01245 maxfr += win * 2; 01246 01247 if (feat != NULL) { 01248 /* Read mfc file including window or padding if necessary. */ 01249 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize); 01250 ckd_free(path); 01251 if (nfr < 0) { 01252 ckd_free_2d((void **) mfc); 01253 return -1; 01254 } 01255 01256 /* Actually compute the features */ 01257 feat_compute_utt(fcb, mfc, nfr, win, feat); 01258 01259 ckd_free_2d((void **) mfc); 01260 } 01261 else { 01262 /* Just calculate the number of frames we would need. */ 01263 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize); 01264 ckd_free(path); 01265 if (nfr < 0) 01266 return nfr; 01267 } 01268 01269 01270 return (nfr - win * 2); 01271 } 01272 01273 static int32 01274 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep, 01275 int32 nfr, mfcc_t *** ofeat) 01276 { 01277 mfcc_t **cepbuf; 01278 int32 i, win, cepsize; 01279 01280 win = feat_window_size(fcb); 01281 cepsize = feat_cepsize(fcb); 01282 01283 /* Copy and pad out the utterance (this requires that the 01284 * feature computation functions always access the buffer via 01285 * the frame pointers, which they do) */ 01286 cepbuf = ckd_calloc(nfr + win * 2, sizeof(mfcc_t *)); 01287 memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *)); 01288 01289 /* Do normalization before we interpolate on the boundary */ 01290 feat_cmn(fcb, cepbuf + win, nfr, 1, 1); 01291 feat_agc(fcb, cepbuf + win, nfr, 1, 1); 01292 01293 /* Now interpolate */ 01294 for (i = 0; i < win; ++i) { 01295 cepbuf[i] = fcb->cepbuf[i]; 01296 memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t)); 01297 cepbuf[nfr + win + i] = fcb->cepbuf[win + i]; 01298 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t)); 01299 } 01300 /* Compute as usual. */ 01301 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat); 01302 ckd_free(cepbuf); 01303 return nfr; 01304 } 01305 01306 int32 01307 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep, 01308 int32 beginutt, int32 endutt, mfcc_t *** ofeat) 01309 { 01310 int32 win, cepsize, nbufcep; 01311 int32 i, j, nfeatvec; 01312 int32 zero = 0; 01313 01314 /* Avoid having to check this everywhere. */ 01315 if (inout_ncep == NULL) inout_ncep = &zero; 01316 01317 /* Special case for entire utterances. */ 01318 if (beginutt && endutt && *inout_ncep > 0) 01319 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat); 01320 01321 win = feat_window_size(fcb); 01322 cepsize = feat_cepsize(fcb); 01323 01324 /* Empty the input buffer on start of utterance. */ 01325 if (beginutt) 01326 fcb->bufpos = fcb->curpos; 01327 01328 /* Calculate how much data is in the buffer already. */ 01329 nbufcep = fcb->bufpos - fcb->curpos; 01330 if (nbufcep < 0) 01331 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos; 01332 /* Add any data that we have to replicate. */ 01333 if (beginutt && *inout_ncep > 0) 01334 nbufcep += win; 01335 if (endutt) 01336 nbufcep += win; 01337 01338 /* Only consume as much input as will fit in the buffer. */ 01339 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) { 01340 /* We also can't overwrite the trailing window, hence the 01341 * reason why win is subtracted here. */ 01342 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win; 01343 /* Cancel end of utterance processing. */ 01344 endutt = FALSE; 01345 } 01346 01347 /* FIXME: Don't modify the input! */ 01348 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt); 01349 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt); 01350 01351 /* Replicate first frame into the first win frames if we're at the 01352 * beginning of the utterance and there was some actual input to 01353 * deal with. (FIXME: Not entirely sure why that condition) */ 01354 if (beginutt && *inout_ncep > 0) { 01355 for (i = 0; i < win; i++) { 01356 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0], 01357 cepsize * sizeof(mfcc_t)); 01358 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01359 } 01360 /* Move the current pointer past this data. */ 01361 fcb->curpos = fcb->bufpos; 01362 nbufcep -= win; 01363 } 01364 01365 /* Copy in frame data to the circular buffer. */ 01366 for (i = 0; i < *inout_ncep; ++i) { 01367 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i], 01368 cepsize * sizeof(mfcc_t)); 01369 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01370 ++nbufcep; 01371 } 01372 01373 /* Replicate last frame into the last win frames if we're at the 01374 * end of the utterance (even if there was no input, so we can 01375 * flush the output). */ 01376 if (endutt) { 01377 int32 tpos; /* Index of last input frame. */ 01378 if (fcb->bufpos == 0) 01379 tpos = LIVEBUFBLOCKSIZE - 1; 01380 else 01381 tpos = fcb->bufpos - 1; 01382 for (i = 0; i < win; ++i) { 01383 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos], 01384 cepsize * sizeof(mfcc_t)); 01385 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01386 } 01387 } 01388 01389 /* We have to leave the trailing window of frames. */ 01390 nfeatvec = nbufcep - win; 01391 if (nfeatvec <= 0) 01392 return 0; /* Do nothing. */ 01393 01394 for (i = 0; i < nfeatvec; ++i) { 01395 /* Handle wraparound cases. */ 01396 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) { 01397 /* Use tmpcepbuf for this case. Actually, we just need the pointers. */ 01398 for (j = -win; j <= win; ++j) { 01399 int32 tmppos = 01400 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE; 01401 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos]; 01402 } 01403 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]); 01404 } 01405 else { 01406 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]); 01407 } 01408 /* Move the read pointer forward. */ 01409 ++fcb->curpos; 01410 fcb->curpos %= LIVEBUFBLOCKSIZE; 01411 } 01412 01413 if (fcb->lda) 01414 feat_lda_transform(fcb, ofeat, nfeatvec); 01415 01416 if (fcb->subvecs) 01417 feat_subvec_project(fcb, ofeat, nfeatvec); 01418 01419 return nfeatvec; 01420 } 01421 01422 feat_t * 01423 feat_retain(feat_t *f) 01424 { 01425 ++f->refcount; 01426 return f; 01427 } 01428 01429 int 01430 feat_free(feat_t * f) 01431 { 01432 if (f == NULL) 01433 return 0; 01434 if (--f->refcount > 0) 01435 return f->refcount; 01436 01437 if (f->cepbuf) 01438 ckd_free_2d((void **) f->cepbuf); 01439 ckd_free(f->tmpcepbuf); 01440 01441 if (f->name) { 01442 ckd_free((void *) f->name); 01443 } 01444 if (f->lda) 01445 ckd_free_3d((void ***) f->lda); 01446 01447 ckd_free(f->stream_len); 01448 ckd_free(f->sv_len); 01449 ckd_free(f->sv_buf); 01450 subvecs_free(f->subvecs); 01451 01452 cmn_free(f->cmn_struct); 01453 agc_free(f->agc_struct); 01454 01455 ckd_free(f); 01456 return 0; 01457 } 01458 01459 01460 void 01461 feat_report(feat_t * f) 01462 { 01463 int i; 01464 E_INFO_NOFN("Initialization of feat_t, report:\n"); 01465 E_INFO_NOFN("Feature type = %s\n", f->name); 01466 E_INFO_NOFN("Cepstral size = %d\n", f->cepsize); 01467 E_INFO_NOFN("Number of streams = %d\n", f->n_stream); 01468 for (i = 0; i < f->n_stream; i++) { 01469 E_INFO_NOFN("Vector size of stream[%d]: %d\n", i, 01470 f->stream_len[i]); 01471 } 01472 E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv); 01473 for (i = 0; i < f->n_sv; i++) { 01474 int32 *sv; 01475 01476 E_INFO_NOFN("Components of subvector[%d]:", i); 01477 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv) 01478 E_INFOCONT(" %d", *sv); 01479 E_INFOCONT("\n"); 01480 } 01481 E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn); 01482 E_INFO_NOFN("Whether AGC is used = %d\n", f->agc); 01483 E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm); 01484 E_INFO_NOFN("\n"); 01485 }