SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 #include <stdio.h> 00038 #include <string.h> 00039 #include <math.h> 00040 #include <stdlib.h> 00041 #include <assert.h> 00042 #ifdef _WIN32_WCE 00043 #include <windows.h> 00044 #else 00045 #include <time.h> 00046 #endif 00047 00048 #ifdef HAVE_CONFIG_H 00049 #include <config.h> 00050 #endif 00051 00052 #include "sphinxbase/prim_type.h" 00053 #include "sphinxbase/byteorder.h" 00054 #include "sphinxbase/fixpoint.h" 00055 #include "sphinxbase/genrand.h" 00056 #include "sphinxbase/err.h" 00057 #include "sphinxbase/cmd_ln.h" 00058 #include "sphinxbase/ckd_alloc.h" 00059 00060 #include "fe_internal.h" 00061 #include "fe_warp.h" 00062 00063 static const arg_t fe_args[] = { 00064 waveform_to_cepstral_command_line_macro(), 00065 { NULL, 0, NULL, NULL } 00066 }; 00067 00068 int 00069 fe_parse_general_params(cmd_ln_t *config, fe_t * fe) 00070 { 00071 int j, frate; 00072 00073 fe->config = config; 00074 fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); 00075 frate = cmd_ln_int32_r(config, "-frate"); 00076 if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) { 00077 E_ERROR 00078 ("Frame rate %d can not be bigger than sample rate %.02f\n", 00079 frate, fe->sampling_rate); 00080 return -1; 00081 } 00082 00083 fe->frame_rate = (int16)frate; 00084 if (cmd_ln_boolean_r(config, "-dither")) { 00085 fe->dither = 1; 00086 fe->seed = cmd_ln_int32_r(config, "-seed"); 00087 } 00088 #ifdef WORDS_BIGENDIAN 00089 fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; 00090 #else 00091 fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1; 00092 #endif 00093 fe->window_length = cmd_ln_float32_r(config, "-wlen"); 00094 fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); 00095 00096 fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); 00097 fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); 00098 00099 /* Check FFT size, compute FFT order (log_2(n)) */ 00100 for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { 00101 if (((j % 2) != 0) || (fe->fft_size <= 0)) { 00102 E_ERROR("fft: number of points must be a power of 2 (is %d)\n", 00103 fe->fft_size); 00104 return -1; 00105 } 00106 } 00107 /* Verify that FFT size is greater or equal to window length. */ 00108 if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) { 00109 E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n", 00110 (int)(fe->window_length * fe->sampling_rate)); 00111 return -1; 00112 } 00113 00114 fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); 00115 00116 if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) 00117 fe->transform = DCT_II; 00118 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) 00119 fe->transform = LEGACY_DCT; 00120 else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) 00121 fe->transform = DCT_HTK; 00122 else { 00123 E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); 00124 return -1; 00125 } 00126 00127 if (cmd_ln_boolean_r(config, "-logspec")) 00128 fe->log_spec = RAW_LOG_SPEC; 00129 if (cmd_ln_boolean_r(config, "-smoothspec")) 00130 fe->log_spec = SMOOTH_LOG_SPEC; 00131 00132 return 0; 00133 } 00134 00135 static int 00136 fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) 00137 { 00138 mel->sampling_rate = fe->sampling_rate; 00139 mel->fft_size = fe->fft_size; 00140 mel->num_cepstra = fe->num_cepstra; 00141 mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); 00142 00143 if (fe->log_spec) 00144 fe->feature_dimension = mel->num_filters; 00145 else 00146 fe->feature_dimension = fe->num_cepstra; 00147 00148 mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); 00149 mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); 00150 00151 mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); 00152 00153 mel->warp_type = cmd_ln_str_r(config, "-warp_type"); 00154 mel->warp_params = cmd_ln_str_r(config, "-warp_params"); 00155 mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); 00156 00157 mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); 00158 mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); 00159 00160 if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { 00161 E_ERROR("Failed to initialize the warping function.\n"); 00162 return -1; 00163 } 00164 fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); 00165 return 0; 00166 } 00167 00168 void 00169 fe_print_current(fe_t const *fe) 00170 { 00171 E_INFO("Current FE Parameters:\n"); 00172 E_INFO("\tSampling Rate: %f\n", fe->sampling_rate); 00173 E_INFO("\tFrame Size: %d\n", fe->frame_size); 00174 E_INFO("\tFrame Shift: %d\n", fe->frame_shift); 00175 E_INFO("\tFFT Size: %d\n", fe->fft_size); 00176 E_INFO("\tLower Frequency: %g\n", 00177 fe->mel_fb->lower_filt_freq); 00178 E_INFO("\tUpper Frequency: %g\n", 00179 fe->mel_fb->upper_filt_freq); 00180 E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters); 00181 E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps); 00182 E_INFO("\tStart Utt Status: %d\n", fe->start_flag); 00183 E_INFO("Will %sremove DC offset at frame level\n", 00184 fe->remove_dc ? "" : "not "); 00185 if (fe->dither) { 00186 E_INFO("Will add dither to audio\n"); 00187 E_INFO("Dither seeded with %d\n", fe->seed); 00188 } 00189 else { 00190 E_INFO("Will not add dither to audio\n"); 00191 } 00192 if (fe->mel_fb->lifter_val) { 00193 E_INFO("Will apply sine-curve liftering, period %d\n", 00194 fe->mel_fb->lifter_val); 00195 } 00196 E_INFO("Will %snormalize filters to unit area\n", 00197 fe->mel_fb->unit_area ? "" : "not "); 00198 E_INFO("Will %sround filter frequencies to DFT points\n", 00199 fe->mel_fb->round_filters ? "" : "not "); 00200 E_INFO("Will %suse double bandwidth in mel filter\n", 00201 fe->mel_fb->doublewide ? "" : "not "); 00202 } 00203 00204 fe_t * 00205 fe_init_auto() 00206 { 00207 return fe_init_auto_r(cmd_ln_get()); 00208 } 00209 00210 fe_t * 00211 fe_init_auto_r(cmd_ln_t *config) 00212 { 00213 fe_t *fe; 00214 00215 fe = ckd_calloc(1, sizeof(*fe)); 00216 fe->refcount = 1; 00217 00218 /* transfer params to front end */ 00219 if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { 00220 fe_free(fe); 00221 return NULL; 00222 } 00223 00224 /* compute remaining fe parameters */ 00225 /* We add 0.5 so approximate the float with the closest 00226 * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 00227 */ 00228 fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); 00229 fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); 00230 fe->prior = 0; 00231 fe->frame_counter = 0; 00232 00233 assert (fe->frame_shift > 1); 00234 00235 if (fe->frame_size > (fe->fft_size)) { 00236 E_WARN 00237 ("Number of FFT points has to be a power of 2 higher than %d\n", 00238 (fe->frame_size)); 00239 fe_free(fe); 00240 return (NULL); 00241 } 00242 00243 if (fe->dither) 00244 fe_init_dither(fe->seed); 00245 00246 /* establish buffers for overflow samps and hamming window */ 00247 fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); 00248 fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); 00249 00250 /* create hamming window */ 00251 fe_create_hamming(fe->hamming_window, fe->frame_size); 00252 00253 /* init and fill appropriate filter structure */ 00254 fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); 00255 00256 /* transfer params to mel fb */ 00257 fe_parse_melfb_params(config, fe, fe->mel_fb); 00258 fe_build_melfilters(fe->mel_fb); 00259 fe_compute_melcosine(fe->mel_fb); 00260 00261 /* Create temporary FFT, spectrum and mel-spectrum buffers. */ 00262 /* FIXME: Gosh there are a lot of these. */ 00263 fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); 00264 fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); 00265 fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); 00266 fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); 00267 00268 /* create twiddle factors */ 00269 fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); 00270 fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); 00271 fe_create_twiddle(fe); 00272 00273 if (cmd_ln_boolean_r(config, "-verbose")) { 00274 fe_print_current(fe); 00275 } 00276 00277 /*** Z.A.B. ***/ 00278 /*** Initialize the overflow buffers ***/ 00279 fe_start_utt(fe); 00280 return fe; 00281 } 00282 00283 arg_t const * 00284 fe_get_args(void) 00285 { 00286 return fe_args; 00287 } 00288 00289 const cmd_ln_t * 00290 fe_get_config(fe_t *fe) 00291 { 00292 return fe->config; 00293 } 00294 00295 void 00296 fe_init_dither(int32 seed) 00297 { 00298 if (seed < 0) { 00299 E_INFO("You are using the internal mechanism to generate the seed.\n"); 00300 #ifdef _WIN32_WCE 00301 s3_rand_seed(GetTickCount()); 00302 #else 00303 s3_rand_seed((long) time(0)); 00304 #endif 00305 } 00306 else { 00307 E_INFO("You are using %d as the seed.\n", seed); 00308 s3_rand_seed(seed); 00309 } 00310 } 00311 00312 int32 00313 fe_start_utt(fe_t * fe) 00314 { 00315 fe->num_overflow_samps = 0; 00316 memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16)); 00317 fe->start_flag = 1; 00318 fe->prior = 0; 00319 return 0; 00320 } 00321 00322 int 00323 fe_get_output_size(fe_t *fe) 00324 { 00325 return (int)fe->feature_dimension; 00326 } 00327 00328 void 00329 fe_get_input_size(fe_t *fe, int *out_frame_shift, 00330 int *out_frame_size) 00331 { 00332 if (out_frame_shift) 00333 *out_frame_shift = fe->frame_shift; 00334 if (out_frame_size) 00335 *out_frame_size = fe->frame_size; 00336 } 00337 00338 int32 00339 fe_process_frame(fe_t * fe, int16 const *spch, int32 nsamps, mfcc_t * fr_cep) 00340 { 00341 fe_read_frame(fe, spch, nsamps); 00342 return fe_write_frame(fe, fr_cep); 00343 } 00344 00345 int 00346 fe_process_frames(fe_t *fe, 00347 int16 const **inout_spch, 00348 size_t *inout_nsamps, 00349 mfcc_t **buf_cep, 00350 int32 *inout_nframes) 00351 { 00352 int32 frame_count; 00353 int outidx, i, n, n_overflow, orig_n_overflow; 00354 int16 const *orig_spch; 00355 00356 /* In the special case where there is no output buffer, return the 00357 * maximum number of frames which would be generated. */ 00358 if (buf_cep == NULL) { 00359 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) 00360 *inout_nframes = 0; 00361 else 00362 *inout_nframes = 1 00363 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) 00364 / fe->frame_shift); 00365 return *inout_nframes; 00366 } 00367 00368 /* Are there not enough samples to make at least 1 frame? */ 00369 if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) { 00370 if (*inout_nsamps > 0) { 00371 /* Append them to the overflow buffer. */ 00372 memcpy(fe->overflow_samps + fe->num_overflow_samps, 00373 *inout_spch, *inout_nsamps * (sizeof(int16))); 00374 fe->num_overflow_samps += *inout_nsamps; 00375 /* Update input-output pointers and counters. */ 00376 *inout_spch += *inout_nsamps; 00377 *inout_nsamps = 0; 00378 } 00379 /* We produced no frames of output, sorry! */ 00380 *inout_nframes = 0; 00381 return 0; 00382 } 00383 00384 /* Can't write a frame? Then do nothing! */ 00385 if (*inout_nframes < 1) { 00386 *inout_nframes = 0; 00387 return 0; 00388 } 00389 00390 /* Keep track of the original start of the buffer. */ 00391 orig_spch = *inout_spch; 00392 orig_n_overflow = fe->num_overflow_samps; 00393 /* How many frames will we be able to get? */ 00394 frame_count = 1 00395 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) 00396 / fe->frame_shift); 00397 /* Limit it to the number of output frames available. */ 00398 if (frame_count > *inout_nframes) 00399 frame_count = *inout_nframes; 00400 /* Index of output frame. */ 00401 outidx = 0; 00402 00403 /* Start processing, taking care of any incoming overflow. */ 00404 if (fe->num_overflow_samps) { 00405 int offset = fe->frame_size - fe->num_overflow_samps; 00406 00407 /* Append start of spch to overflow samples to make a full frame. */ 00408 memcpy(fe->overflow_samps + fe->num_overflow_samps, 00409 *inout_spch, offset * sizeof(**inout_spch)); 00410 fe_read_frame(fe, fe->overflow_samps, fe->frame_size); 00411 assert(outidx < frame_count); 00412 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0) 00413 return -1; 00414 outidx += n; 00415 /* Update input-output pointers and counters. */ 00416 *inout_spch += offset; 00417 *inout_nsamps -= offset; 00418 fe->num_overflow_samps -= fe->frame_shift; 00419 } 00420 else { 00421 fe_read_frame(fe, *inout_spch, fe->frame_size); 00422 assert(outidx < frame_count); 00423 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0) 00424 return -1; 00425 outidx += n; 00426 /* Update input-output pointers and counters. */ 00427 *inout_spch += fe->frame_size; 00428 *inout_nsamps -= fe->frame_size; 00429 } 00430 00431 /* Process all remaining frames. */ 00432 for (i = 1; i < frame_count; ++i) { 00433 assert(*inout_nsamps >= (size_t)fe->frame_shift); 00434 00435 fe_shift_frame(fe, *inout_spch, fe->frame_shift); 00436 assert(outidx < frame_count); 00437 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0) 00438 return -1; 00439 outidx += n; 00440 /* Update input-output pointers and counters. */ 00441 *inout_spch += fe->frame_shift; 00442 *inout_nsamps -= fe->frame_shift; 00443 /* Amount of data behind the original input which is still needed. */ 00444 if (fe->num_overflow_samps > 0) 00445 fe->num_overflow_samps -= fe->frame_shift; 00446 } 00447 00448 /* How many relevant overflow samples are there left? */ 00449 if (fe->num_overflow_samps <= 0) { 00450 /* Maximum number of overflow samples past *inout_spch to save. */ 00451 n_overflow = *inout_nsamps; 00452 if (n_overflow > fe->frame_shift) 00453 n_overflow = fe->frame_shift; 00454 fe->num_overflow_samps = fe->frame_size - fe->frame_shift; 00455 /* Make sure this isn't an illegal read! */ 00456 if (fe->num_overflow_samps > *inout_spch - orig_spch) 00457 fe->num_overflow_samps = *inout_spch - orig_spch; 00458 fe->num_overflow_samps += n_overflow; 00459 if (fe->num_overflow_samps > 0) { 00460 memcpy(fe->overflow_samps, 00461 *inout_spch - (fe->frame_size - fe->frame_shift), 00462 fe->num_overflow_samps * sizeof(**inout_spch)); 00463 /* Update the input pointer to cover this stuff. */ 00464 *inout_spch += n_overflow; 00465 *inout_nsamps -= n_overflow; 00466 } 00467 } 00468 else { 00469 /* There is still some relevant data left in the overflow buffer. */ 00470 /* Shift existing data to the beginning. */ 00471 memmove(fe->overflow_samps, 00472 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps, 00473 fe->num_overflow_samps * sizeof(*fe->overflow_samps)); 00474 /* Copy in whatever we had in the original speech buffer. */ 00475 n_overflow = *inout_spch - orig_spch + *inout_nsamps; 00476 if (n_overflow > fe->frame_size - fe->num_overflow_samps) 00477 n_overflow = fe->frame_size - fe->num_overflow_samps; 00478 memcpy(fe->overflow_samps + fe->num_overflow_samps, 00479 orig_spch, n_overflow * sizeof(*orig_spch)); 00480 fe->num_overflow_samps += n_overflow; 00481 /* Advance the input pointers. */ 00482 if (n_overflow > *inout_spch - orig_spch) { 00483 n_overflow -= (*inout_spch - orig_spch); 00484 *inout_spch += n_overflow; 00485 *inout_nsamps -= n_overflow; 00486 } 00487 } 00488 00489 /* Finally update the frame counter with the number of frames we procesed. */ 00490 *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */ 00491 return 0; 00492 } 00493 00494 int 00495 fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps, 00496 mfcc_t *** cep_block, int32 * nframes) 00497 { 00498 mfcc_t **cep; 00499 int rv; 00500 00501 /* Figure out how many frames we will need. */ 00502 fe_process_frames(fe, NULL, &nsamps, NULL, nframes); 00503 /* Create the output buffer (it has to exist, even if there are no output frames). */ 00504 if (*nframes) 00505 cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep)); 00506 else 00507 cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep)); 00508 /* Now just call fe_process_frames() with the allocated buffer. */ 00509 rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes); 00510 *cep_block = cep; 00511 00512 return rv; 00513 } 00514 00515 00516 int32 00517 fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes) 00518 { 00519 /* Process any remaining data. */ 00520 if (fe->num_overflow_samps > 0) { 00521 fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps); 00522 *nframes = fe_write_frame(fe, cepvector); 00523 } 00524 else { 00525 *nframes = 0; 00526 } 00527 00528 /* reset overflow buffers... */ 00529 fe->num_overflow_samps = 0; 00530 fe->start_flag = 0; 00531 00532 return 0; 00533 } 00534 00535 fe_t * 00536 fe_retain(fe_t *fe) 00537 { 00538 ++fe->refcount; 00539 return fe; 00540 } 00541 00542 int 00543 fe_free(fe_t * fe) 00544 { 00545 if (fe == NULL) 00546 return 0; 00547 if (--fe->refcount > 0) 00548 return fe->refcount; 00549 00550 /* kill FE instance - free everything... */ 00551 if (fe->mel_fb) { 00552 if (fe->mel_fb->mel_cosine) 00553 fe_free_2d((void *) fe->mel_fb->mel_cosine); 00554 ckd_free(fe->mel_fb->lifter); 00555 ckd_free(fe->mel_fb->spec_start); 00556 ckd_free(fe->mel_fb->filt_start); 00557 ckd_free(fe->mel_fb->filt_width); 00558 ckd_free(fe->mel_fb->filt_coeffs); 00559 ckd_free(fe->mel_fb); 00560 } 00561 ckd_free(fe->spch); 00562 ckd_free(fe->frame); 00563 ckd_free(fe->ccc); 00564 ckd_free(fe->sss); 00565 ckd_free(fe->spec); 00566 ckd_free(fe->mfspec); 00567 ckd_free(fe->overflow_samps); 00568 ckd_free(fe->hamming_window); 00569 cmd_ln_free_r(fe->config); 00570 ckd_free(fe); 00571 00572 return 0; 00573 } 00574 00578 int32 00579 fe_mfcc_to_float(fe_t * fe, 00580 mfcc_t ** input, float32 ** output, int32 nframes) 00581 { 00582 int32 i; 00583 00584 #ifndef FIXED_POINT 00585 if ((void *) input == (void *) output) 00586 return nframes * fe->feature_dimension; 00587 #endif 00588 for (i = 0; i < nframes * fe->feature_dimension; ++i) 00589 output[0][i] = MFCC2FLOAT(input[0][i]); 00590 00591 return i; 00592 } 00593 00597 int32 00598 fe_float_to_mfcc(fe_t * fe, 00599 float32 ** input, mfcc_t ** output, int32 nframes) 00600 { 00601 int32 i; 00602 00603 #ifndef FIXED_POINT 00604 if ((void *) input == (void *) output) 00605 return nframes * fe->feature_dimension; 00606 #endif 00607 for (i = 0; i < nframes * fe->feature_dimension; ++i) 00608 output[0][i] = FLOAT2MFCC(input[0][i]); 00609 00610 return i; 00611 } 00612 00613 int32 00614 fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) 00615 { 00616 #ifdef FIXED_POINT 00617 fe_spec2cep(fe, fr_spec, fr_cep); 00618 #else /* ! FIXED_POINT */ 00619 powspec_t *powspec; 00620 int32 i; 00621 00622 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); 00623 for (i = 0; i < fe->mel_fb->num_filters; ++i) 00624 powspec[i] = (powspec_t) fr_spec[i]; 00625 fe_spec2cep(fe, powspec, fr_cep); 00626 ckd_free(powspec); 00627 #endif /* ! FIXED_POINT */ 00628 return 0; 00629 } 00630 00631 int32 00632 fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) 00633 { 00634 #ifdef FIXED_POINT 00635 fe_dct2(fe, fr_spec, fr_cep, 0); 00636 #else /* ! FIXED_POINT */ 00637 powspec_t *powspec; 00638 int32 i; 00639 00640 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); 00641 for (i = 0; i < fe->mel_fb->num_filters; ++i) 00642 powspec[i] = (powspec_t) fr_spec[i]; 00643 fe_dct2(fe, powspec, fr_cep, 0); 00644 ckd_free(powspec); 00645 #endif /* ! FIXED_POINT */ 00646 return 0; 00647 } 00648 00649 int32 00650 fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec) 00651 { 00652 #ifdef FIXED_POINT 00653 fe_dct3(fe, fr_cep, fr_spec); 00654 #else /* ! FIXED_POINT */ 00655 powspec_t *powspec; 00656 int32 i; 00657 00658 powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); 00659 fe_dct3(fe, fr_cep, powspec); 00660 for (i = 0; i < fe->mel_fb->num_filters; ++i) 00661 fr_spec[i] = (mfcc_t) powspec[i]; 00662 ckd_free(powspec); 00663 #endif /* ! FIXED_POINT */ 00664 return 0; 00665 }