SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #include <stdio.h> 00039 #include <math.h> 00040 #include <string.h> 00041 #include <stdlib.h> 00042 #include <assert.h> 00043 00044 #ifdef HAVE_CONFIG_H 00045 #include <config.h> 00046 #endif 00047 00048 #ifdef _MSC_VER 00049 #pragma warning (disable: 4244) 00050 #endif 00051 00055 #ifndef M_PI 00056 #define M_PI 3.14159265358979323846 /* pi */ 00057 #endif // M_PI 00058 00059 #include "sphinxbase/prim_type.h" 00060 #include "sphinxbase/ckd_alloc.h" 00061 #include "sphinxbase/byteorder.h" 00062 #include "sphinxbase/fixpoint.h" 00063 #include "sphinxbase/fe.h" 00064 #include "sphinxbase/genrand.h" 00065 #include "sphinxbase/err.h" 00066 00067 #include "fe_internal.h" 00068 #include "fe_warp.h" 00069 00070 /* Use extra precision for cosines, Hamming window, pre-emphasis 00071 * coefficient, twiddle factors. */ 00072 #ifdef FIXED_POINT 00073 #define FLOAT2COS(x) FLOAT2FIX_ANY(x,30) 00074 #define COSMUL(x,y) FIXMUL_ANY(x,y,30) 00075 #else 00076 #define FLOAT2COS(x) (x) 00077 #define COSMUL(x,y) ((x)*(y)) 00078 #endif 00079 00080 #ifdef FIXED_POINT 00081 /* Internal log-addition table for natural log with radix point at 8 00082 * bits. Each entry is 256 * log(1 + e^{-n/256}). This is used in the 00083 * log-add computation: 00084 * 00085 * e^z = e^x + e^y 00086 * e^z = e^x(1 + e^{y-x}) = e^y(1 + e^{x-y}) 00087 * z = x + log(1 + e^{y-x}) = y + log(1 + e^{x-y}) 00088 * 00089 * So when y > x, z = y + logadd_table[-(x-y)] 00090 * when x > y, z = x + logadd_table[-(y-x)] 00091 */ 00092 static const unsigned char fe_logadd_table[] = { 00093 177, 177, 176, 176, 175, 175, 174, 174, 173, 173, 00094 172, 172, 172, 171, 171, 170, 170, 169, 169, 168, 00095 168, 167, 167, 166, 166, 165, 165, 164, 164, 163, 00096 163, 162, 162, 161, 161, 161, 160, 160, 159, 159, 00097 158, 158, 157, 157, 156, 156, 155, 155, 155, 154, 00098 154, 153, 153, 152, 152, 151, 151, 151, 150, 150, 00099 149, 149, 148, 148, 147, 147, 147, 146, 146, 145, 00100 145, 144, 144, 144, 143, 143, 142, 142, 141, 141, 00101 141, 140, 140, 139, 139, 138, 138, 138, 137, 137, 00102 136, 136, 136, 135, 135, 134, 134, 134, 133, 133, 00103 132, 132, 131, 131, 131, 130, 130, 129, 129, 129, 00104 128, 128, 128, 127, 127, 126, 126, 126, 125, 125, 00105 124, 124, 124, 123, 123, 123, 122, 122, 121, 121, 00106 121, 120, 120, 119, 119, 119, 118, 118, 118, 117, 00107 117, 117, 116, 116, 115, 115, 115, 114, 114, 114, 00108 113, 113, 113, 112, 112, 112, 111, 111, 110, 110, 00109 110, 109, 109, 109, 108, 108, 108, 107, 107, 107, 00110 106, 106, 106, 105, 105, 105, 104, 104, 104, 103, 00111 103, 103, 102, 102, 102, 101, 101, 101, 100, 100, 00112 100, 99, 99, 99, 98, 98, 98, 97, 97, 97, 00113 96, 96, 96, 96, 95, 95, 95, 94, 94, 94, 00114 93, 93, 93, 92, 92, 92, 92, 91, 91, 91, 00115 90, 90, 90, 89, 89, 89, 89, 88, 88, 88, 00116 87, 87, 87, 87, 86, 86, 86, 85, 85, 85, 00117 85, 84, 84, 84, 83, 83, 83, 83, 82, 82, 00118 82, 82, 81, 81, 81, 80, 80, 80, 80, 79, 00119 79, 79, 79, 78, 78, 78, 78, 77, 77, 77, 00120 77, 76, 76, 76, 75, 75, 75, 75, 74, 74, 00121 74, 74, 73, 73, 73, 73, 72, 72, 72, 72, 00122 71, 71, 71, 71, 71, 70, 70, 70, 70, 69, 00123 69, 69, 69, 68, 68, 68, 68, 67, 67, 67, 00124 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, 00125 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 00126 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 00127 60, 60, 60, 60, 59, 59, 59, 59, 59, 58, 00128 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 00129 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 00130 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, 00131 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 00132 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 00133 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 00134 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 00135 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 00136 44, 44, 43, 43, 43, 43, 43, 43, 43, 42, 00137 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 00138 41, 41, 40, 40, 40, 40, 40, 40, 40, 39, 00139 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, 00140 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, 00141 37, 36, 36, 36, 36, 36, 36, 36, 35, 35, 00142 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 00143 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 00144 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 00145 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 00146 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 00147 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 00148 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 00149 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 00150 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 00151 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, 00152 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 00153 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 00154 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 00155 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 00156 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 00157 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 00158 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 00159 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 00160 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 00161 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 00162 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 00163 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 00164 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 00165 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 00166 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 00167 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 00168 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 00169 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 00170 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 00171 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 00172 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 00173 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 00174 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 00175 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 00176 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 00177 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 00178 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 00179 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 00180 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00181 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00182 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00183 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00184 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00185 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00186 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 00187 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00188 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00189 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00190 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00191 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00192 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00193 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00194 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00195 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00196 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 00197 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00198 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00199 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00200 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00201 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00202 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 00203 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00204 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00205 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00206 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00207 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00208 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00209 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00210 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00211 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 00212 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00214 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00216 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00219 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00221 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00223 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00224 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 00225 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00226 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00227 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00229 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00234 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00236 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00237 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00238 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00239 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00240 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00241 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00243 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00244 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00245 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00246 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00247 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00248 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00249 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00250 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00251 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00252 1, 1, 1, 1, 1, 1, 1, 0 00253 }; 00254 static const int fe_logadd_table_size = sizeof(fe_logadd_table) / sizeof(fe_logadd_table[0]); 00255 00256 static fixed32 00257 fe_log_add(fixed32 x, fixed32 y) 00258 { 00259 fixed32 d, r; 00260 00261 if (x > y) { 00262 d = (x - y) >> (DEFAULT_RADIX - 8); 00263 r = x; 00264 } 00265 else { 00266 d = (y - x) >> (DEFAULT_RADIX - 8); 00267 r = y; 00268 } 00269 if (d > fe_logadd_table_size - 1) 00270 return r; 00271 else { 00272 r += ((fixed32)fe_logadd_table[d] << (DEFAULT_RADIX - 8)); 00273 /* 00274 printf("%d + %d = %d | %f + %f = %f | %f + %f = %f\n", 00275 x, y, r, FIX2FLOAT(x), FIX2FLOAT(y), FIX2FLOAT(r), 00276 exp(FIX2FLOAT(x)), exp(FIX2FLOAT(y)), exp(FIX2FLOAT(r))); 00277 */ 00278 return r; 00279 } 00280 } 00281 00282 static fixed32 00283 fe_log(float32 x) 00284 { 00285 if (x <= 0) { 00286 return MIN_FIXLOG; 00287 } 00288 else { 00289 return FLOAT2FIX(log(x)); 00290 } 00291 } 00292 #endif 00293 00294 static float32 00295 fe_mel(melfb_t *mel, float32 x) 00296 { 00297 float32 warped = fe_warp_unwarped_to_warped(mel, x); 00298 00299 return (float32) (2595.0 * log10(1.0 + warped / 700.0)); 00300 } 00301 00302 static float32 00303 fe_melinv(melfb_t *mel, float32 x) 00304 { 00305 float32 warped = (float32) (700.0 * (pow(10.0, x / 2595.0) - 1.0)); 00306 return fe_warp_warped_to_unwarped(mel, warped); 00307 } 00308 00309 int32 00310 fe_build_melfilters(melfb_t *mel_fb) 00311 { 00312 float32 melmin, melmax, melbw, fftfreq; 00313 int n_coeffs, i, j; 00314 00315 /* Filter coefficient matrix, in flattened form. */ 00316 mel_fb->spec_start = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->spec_start)); 00317 mel_fb->filt_start = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->filt_start)); 00318 mel_fb->filt_width = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->filt_width)); 00319 00320 /* First calculate the widths of each filter. */ 00321 /* Minimum and maximum frequencies in mel scale. */ 00322 melmin = fe_mel(mel_fb, mel_fb->lower_filt_freq); 00323 melmax = fe_mel(mel_fb, mel_fb->upper_filt_freq); 00324 00325 /* Width of filters in mel scale */ 00326 melbw = (melmax - melmin) / (mel_fb->num_filters + 1); 00327 if (mel_fb->doublewide) { 00328 melmin -= melbw; 00329 melmax += melbw; 00330 if ((fe_melinv(mel_fb, melmin) < 0) || 00331 (fe_melinv(mel_fb, melmax) > mel_fb->sampling_rate / 2)) { 00332 E_WARN 00333 ("Out of Range: low filter edge = %f (%f)\n", 00334 fe_melinv(mel_fb, melmin), 0.0); 00335 E_WARN 00336 (" high filter edge = %f (%f)\n", 00337 fe_melinv(mel_fb, melmax), mel_fb->sampling_rate / 2); 00338 return FE_INVALID_PARAM_ERROR; 00339 } 00340 } 00341 00342 /* DFT point spacing */ 00343 fftfreq = mel_fb->sampling_rate / (float32) mel_fb->fft_size; 00344 00345 /* Count and place filter coefficients. */ 00346 n_coeffs = 0; 00347 for (i = 0; i < mel_fb->num_filters; ++i) { 00348 float32 freqs[3]; 00349 00350 /* Left, center, right frequencies in Hertz */ 00351 for (j = 0; j < 3; ++j) { 00352 if (mel_fb->doublewide) 00353 freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); 00354 else 00355 freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); 00356 /* Round them to DFT points if requested */ 00357 if (mel_fb->round_filters) 00358 freqs[j] = ((int)(freqs[j] / fftfreq + 0.5)) * fftfreq; 00359 } 00360 00361 /* spec_start is the start of this filter in the power spectrum. */ 00362 mel_fb->spec_start[i] = -1; 00363 /* There must be a better way... */ 00364 for (j = 0; j < mel_fb->fft_size/2+1; ++j) { 00365 float32 hz = j * fftfreq; 00366 if (hz < freqs[0]) 00367 continue; 00368 else if (hz > freqs[2] || j == mel_fb->fft_size/2) { 00369 /* filt_width is the width in DFT points of this filter. */ 00370 mel_fb->filt_width[i] = j - mel_fb->spec_start[i]; 00371 /* filt_start is the start of this filter in the filt_coeffs array. */ 00372 mel_fb->filt_start[i] = n_coeffs; 00373 n_coeffs += mel_fb->filt_width[i]; 00374 break; 00375 } 00376 if (mel_fb->spec_start[i] == -1) 00377 mel_fb->spec_start[i] = j; 00378 } 00379 } 00380 00381 /* Now go back and allocate the coefficient array. */ 00382 mel_fb->filt_coeffs = ckd_malloc(n_coeffs * sizeof(*mel_fb->filt_coeffs)); 00383 00384 /* And now generate the coefficients. */ 00385 n_coeffs = 0; 00386 for (i = 0; i < mel_fb->num_filters; ++i) { 00387 float32 freqs[3]; 00388 00389 /* Left, center, right frequencies in Hertz */ 00390 for (j = 0; j < 3; ++j) { 00391 if (mel_fb->doublewide) 00392 freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); 00393 else 00394 freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); 00395 /* Round them to DFT points if requested */ 00396 if (mel_fb->round_filters) 00397 freqs[j] = ((int)(freqs[j] / fftfreq + 0.5)) * fftfreq; 00398 } 00399 00400 for (j = 0; j < mel_fb->filt_width[i]; ++j) { 00401 float32 hz, loslope, hislope; 00402 00403 hz = (mel_fb->spec_start[i] + j) * fftfreq; 00404 if (hz < freqs[0] || hz > freqs[2]) { 00405 E_FATAL("Failed to create filterbank, frequency range does not match. " 00406 "Sample rate %f, FFT size %d, lowerf %f < freq %f > upperf %f.\n", mel_fb->sampling_rate, mel_fb->fft_size, freqs[2], hz, freqs[0]); 00407 } 00408 loslope = (hz - freqs[0]) / (freqs[1] - freqs[0]); 00409 hislope = (freqs[2] - hz) / (freqs[2] - freqs[1]); 00410 if (mel_fb->unit_area) { 00411 loslope *= 2 / (freqs[2] - freqs[0]); 00412 hislope *= 2 / (freqs[2] - freqs[0]); 00413 } 00414 if (loslope < hislope) { 00415 #ifdef FIXED_POINT 00416 mel_fb->filt_coeffs[n_coeffs] = fe_log(loslope); 00417 #else 00418 mel_fb->filt_coeffs[n_coeffs] = loslope; 00419 #endif 00420 } 00421 else { 00422 #ifdef FIXED_POINT 00423 mel_fb->filt_coeffs[n_coeffs] = fe_log(hislope); 00424 #else 00425 mel_fb->filt_coeffs[n_coeffs] = hislope; 00426 #endif 00427 } 00428 ++n_coeffs; 00429 } 00430 } 00431 00432 00433 return FE_SUCCESS; 00434 } 00435 00436 int32 00437 fe_compute_melcosine(melfb_t * mel_fb) 00438 { 00439 00440 float64 freqstep; 00441 int32 i, j; 00442 00443 mel_fb->mel_cosine = 00444 (mfcc_t **) ckd_calloc_2d(mel_fb->num_cepstra, 00445 mel_fb->num_filters, 00446 sizeof(mfcc_t)); 00447 00448 freqstep = M_PI / mel_fb->num_filters; 00449 /* NOTE: The first row vector is actually unnecessary but we leave 00450 * it in to avoid confusion. */ 00451 for (i = 0; i < mel_fb->num_cepstra; i++) { 00452 for (j = 0; j < mel_fb->num_filters; j++) { 00453 float64 cosine; 00454 00455 cosine = cos(freqstep * i * (j + 0.5)); 00456 mel_fb->mel_cosine[i][j] = FLOAT2COS(cosine); 00457 } 00458 } 00459 00460 /* Also precompute normalization constants for unitary DCT. */ 00461 mel_fb->sqrt_inv_n = FLOAT2COS(sqrt(1.0 / mel_fb->num_filters)); 00462 mel_fb->sqrt_inv_2n = FLOAT2COS(sqrt(2.0 / mel_fb->num_filters)); 00463 00464 /* And liftering weights */ 00465 if (mel_fb->lifter_val) { 00466 mel_fb->lifter = calloc(mel_fb->num_cepstra, sizeof(*mel_fb->lifter)); 00467 for (i = 0; i < mel_fb->num_cepstra; ++i) { 00468 mel_fb->lifter[i] = FLOAT2MFCC(1 + mel_fb->lifter_val / 2 00469 * sin(i * M_PI / mel_fb->lifter_val)); 00470 } 00471 } 00472 00473 return (0); 00474 } 00475 00476 static void 00477 fe_pre_emphasis(int16 const *in, frame_t * out, int32 len, 00478 float32 factor, int16 prior) 00479 { 00480 int i; 00481 00482 #if defined(FIXED16) 00483 int16 fxd_alpha = (int16)(factor * 0x8000); 00484 int32 tmp1, tmp2; 00485 00486 tmp1 = (int32)in[0] << 15; 00487 tmp2 = (int32)prior * fxd_alpha; 00488 out[0] = (int16)((tmp1 - tmp2) >> 15); 00489 for (i = 1; i < len; ++i) { 00490 tmp1 = (int32)in[i] << 15; 00491 tmp2 = (int32)in[i-1] * fxd_alpha; 00492 out[i] = (int16)((tmp1 - tmp2) >> 15); 00493 } 00494 #elif defined(FIXED_POINT) 00495 fixed32 fxd_alpha = FLOAT2FIX(factor); 00496 out[0] = ((fixed32)in[0] << DEFAULT_RADIX) - (prior * fxd_alpha); 00497 for (i = 1; i < len; ++i) 00498 out[i] = ((fixed32)in[i] << DEFAULT_RADIX) 00499 - (fixed32)in[i-1] * fxd_alpha; 00500 #else 00501 out[0] = (frame_t) in[0] - (frame_t) prior * factor; 00502 for (i = 1; i < len; i++) 00503 out[i] = (frame_t) in[i] - (frame_t) in[i-1] * factor; 00504 #endif 00505 } 00506 00507 static void 00508 fe_short_to_frame(int16 const *in, frame_t * out, int32 len) 00509 { 00510 int i; 00511 00512 #if defined(FIXED16) 00513 memcpy(out, in, len * sizeof(*out)); 00514 #elif defined(FIXED_POINT) 00515 for (i = 0; i < len; i++) 00516 out[i] = (int32) in[i] << DEFAULT_RADIX; 00517 #else /* FIXED_POINT */ 00518 for (i = 0; i < len; i++) 00519 out[i] = (frame_t) in[i]; 00520 #endif /* FIXED_POINT */ 00521 } 00522 00523 void 00524 fe_create_hamming(window_t * in, int32 in_len) 00525 { 00526 int i; 00527 00528 /* Symmetric, so we only create the first half of it. */ 00529 for (i = 0; i < in_len / 2; i++) { 00530 float64 hamm; 00531 hamm = (0.54 - 0.46 * cos(2 * M_PI * i / 00532 ((float64) in_len - 1.0))); 00533 #ifdef FIXED16 00534 in[i] = (int16)(hamm * 0x8000); 00535 #else 00536 in[i] = FLOAT2COS(hamm); 00537 #endif 00538 } 00539 } 00540 00541 static void 00542 fe_hamming_window(frame_t * in, window_t * window, int32 in_len, int32 remove_dc) 00543 { 00544 int i; 00545 00546 if (remove_dc) { 00547 #ifdef FIXED16 00548 int32 mean = 0; /* Use int32 to avoid possibility of overflow */ 00549 #else 00550 frame_t mean = 0; 00551 #endif 00552 00553 for (i = 0; i < in_len; i++) 00554 mean += in[i]; 00555 mean /= in_len; 00556 for (i = 0; i < in_len; i++) 00557 in[i] -= (frame_t)mean; 00558 } 00559 00560 #ifdef FIXED16 00561 for (i = 0; i < in_len/2; i++) { 00562 int32 tmp1, tmp2; 00563 00564 tmp1 = (int32)in[i] * window[i]; 00565 tmp2 = (int32)in[in_len-1-i] * window[i]; 00566 in[i] = (int16)(tmp1 >> 15); 00567 in[in_len-1-i] = (int16)(tmp2 >> 15); 00568 } 00569 #else 00570 for (i = 0; i < in_len/2; i++) { 00571 in[i] = COSMUL(in[i], window[i]); 00572 in[in_len-1-i] = COSMUL(in[in_len-1-i], window[i]); 00573 } 00574 #endif 00575 } 00576 00577 static int 00578 fe_spch_to_frame(fe_t *fe, int len) 00579 { 00580 /* Copy to the frame buffer. */ 00581 if (fe->pre_emphasis_alpha != 0.0) { 00582 fe_pre_emphasis(fe->spch, fe->frame, len, 00583 fe->pre_emphasis_alpha, fe->prior); 00584 if (len >= fe->frame_shift) 00585 fe->prior = fe->spch[fe->frame_shift - 1]; 00586 else 00587 fe->prior = fe->spch[len - 1]; 00588 } 00589 else 00590 fe_short_to_frame(fe->spch, fe->frame, len); 00591 00592 /* Zero pad up to FFT size. */ 00593 memset(fe->frame + len, 0, 00594 (fe->fft_size - len) * sizeof(*fe->frame)); 00595 00596 /* Window. */ 00597 fe_hamming_window(fe->frame, fe->hamming_window, fe->frame_size, 00598 fe->remove_dc); 00599 00600 return len; 00601 } 00602 00603 int 00604 fe_read_frame(fe_t *fe, int16 const *in, int32 len) 00605 { 00606 int i; 00607 00608 if (len > fe->frame_size) 00609 len = fe->frame_size; 00610 00611 /* Read it into the raw speech buffer. */ 00612 memcpy(fe->spch, in, len * sizeof(*in)); 00613 /* Swap and dither if necessary. */ 00614 if (fe->swap) 00615 for (i = 0; i < len; ++i) 00616 SWAP_INT16(&fe->spch[i]); 00617 if (fe->dither) 00618 for (i = 0; i < len; ++i) 00619 fe->spch[i] += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); 00620 00621 return fe_spch_to_frame(fe, len); 00622 } 00623 00624 int 00625 fe_shift_frame(fe_t *fe, int16 const *in, int32 len) 00626 { 00627 int offset, i; 00628 00629 if (len > fe->frame_shift) 00630 len = fe->frame_shift; 00631 offset = fe->frame_size - fe->frame_shift; 00632 00633 /* Shift data into the raw speech buffer. */ 00634 memmove(fe->spch, fe->spch + fe->frame_shift, 00635 offset * sizeof(*fe->spch)); 00636 memcpy(fe->spch + offset, in, len * sizeof(*fe->spch)); 00637 /* Swap and dither if necessary. */ 00638 if (fe->swap) 00639 for (i = 0; i < len; ++i) 00640 SWAP_INT16(&fe->spch[offset + i]); 00641 if (fe->dither) 00642 for (i = 0; i < len; ++i) 00643 fe->spch[offset + i] 00644 += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); 00645 00646 return fe_spch_to_frame(fe, offset + len); 00647 } 00648 00652 void 00653 fe_create_twiddle(fe_t *fe) 00654 { 00655 int i; 00656 00657 for (i = 0; i < fe->fft_size / 4; ++i) { 00658 float64 a = 2 * M_PI * i / fe->fft_size; 00659 #ifdef FIXED16 00660 fe->ccc[i] = (int16)(cos(a) * 0x8000); 00661 fe->sss[i] = (int16)(sin(a) * 0x8000); 00662 #elif defined(FIXED_POINT) 00663 fe->ccc[i] = FLOAT2COS(cos(a)); 00664 fe->sss[i] = FLOAT2COS(sin(a)); 00665 #else 00666 fe->ccc[i] = cos(a); 00667 fe->sss[i] = sin(a); 00668 #endif 00669 } 00670 } 00671 00672 /* Translated from the FORTRAN (obviously) from "Real-Valued Fast 00673 * Fourier Transform Algorithms" by Henrik V. Sorensen et al., IEEE 00674 * Transactions on Acoustics, Speech, and Signal Processing, vol. 35, 00675 * no.6. The 16-bit version does a version of "block floating 00676 * point" in order to avoid rounding errors. 00677 */ 00678 #if defined(FIXED16) 00679 static int 00680 fe_fft_real(fe_t *fe) 00681 { 00682 int i, j, k, m, n, lz; 00683 frame_t *x, xt, max; 00684 00685 x = fe->frame; 00686 m = fe->fft_order; 00687 n = fe->fft_size; 00688 00689 /* Bit-reverse the input. */ 00690 j = 0; 00691 for (i = 0; i < n - 1; ++i) { 00692 if (i < j) { 00693 xt = x[j]; 00694 x[j] = x[i]; 00695 x[i] = xt; 00696 } 00697 k = n / 2; 00698 while (k <= j) { 00699 j -= k; 00700 k /= 2; 00701 } 00702 j += k; 00703 } 00704 /* Determine how many bits of dynamic range are in the input. */ 00705 max = 0; 00706 for (i = 0; i < n; ++i) 00707 if (abs(x[i]) > max) 00708 max = abs(x[i]); 00709 /* The FFT has a gain of M bits, so we need to attenuate the input 00710 * by M bits minus the number of leading zeroes in the input's 00711 * range in order to avoid overflows. */ 00712 for (lz = 0; lz < m; ++lz) 00713 if (max & (1 << (15-lz))) 00714 break; 00715 00716 /* Basic butterflies (2-point FFT, real twiddle factors): 00717 * x[i] = x[i] + 1 * x[i+1] 00718 * x[i+1] = x[i] + -1 * x[i+1] 00719 */ 00720 /* The quantization error introduced by attenuating the input at 00721 * any given stage of the FFT has a cascading effect, so we hold 00722 * off on it until it's absolutely necessary. */ 00723 for (i = 0; i < n; i += 2) { 00724 int atten = (lz == 0); 00725 xt = x[i] >> atten; 00726 x[i] = xt + (x[i + 1] >> atten); 00727 x[i + 1] = xt - (x[i + 1] >> atten); 00728 } 00729 00730 /* The rest of the butterflies, in stages from 1..m */ 00731 for (k = 1; k < m; ++k) { 00732 int n1, n2, n4; 00733 /* Start attenuating once we hit the number of leading zeros. */ 00734 int atten = (k >= lz); 00735 00736 n4 = k - 1; 00737 n2 = k; 00738 n1 = k + 1; 00739 /* Stride over each (1 << (k+1)) points */ 00740 for (i = 0; i < n; i += (1 << n1)) { 00741 /* Basic butterfly with real twiddle factors: 00742 * x[i] = x[i] + 1 * x[i + (1<<k)] 00743 * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] 00744 */ 00745 xt = x[i] >> atten; 00746 x[i] = xt + (x[i + (1 << n2)] >> atten); 00747 x[i + (1 << n2)] = xt - (x[i + (1 << n2)] >> atten); 00748 00749 /* The other ones with real twiddle factors: 00750 * x[i + (1<<k) + (1<<(k-1))] 00751 * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] 00752 * x[i + (1<<(k-1))] 00753 * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] 00754 */ 00755 x[i + (1 << n2) + (1 << n4)] = -x[i + (1 << n2) + (1 << n4)] >> atten; 00756 x[i + (1 << n4)] = x[i + (1 << n4)] >> atten; 00757 00758 /* Butterflies with complex twiddle factors. 00759 * There are (1<<k-1) of them. 00760 */ 00761 for (j = 1; j < (1 << n4); ++j) { 00762 frame_t cc, ss, t1, t2; 00763 int i1, i2, i3, i4; 00764 00765 i1 = i + j; 00766 i2 = i + (1 << n2) - j; 00767 i3 = i + (1 << n2) + j; 00768 i4 = i + (1 << n2) + (1 << n2) - j; 00769 00770 /* 00771 * cc = real(W[j * n / (1<<(k+1))]) 00772 * ss = imag(W[j * n / (1<<(k+1))]) 00773 */ 00774 cc = fe->ccc[j << (m - n1)]; 00775 ss = fe->sss[j << (m - n1)]; 00776 00777 /* There are some symmetry properties which allow us 00778 * to get away with only four multiplications here. */ 00779 { 00780 int32 tmp1, tmp2; 00781 tmp1 = (int32)x[i3] * cc + (int32)x[i4] * ss; 00782 tmp2 = (int32)x[i3] * ss - (int32)x[i4] * cc; 00783 t1 = (int16)(tmp1 >> 15) >> atten; 00784 t2 = (int16)(tmp2 >> 15) >> atten; 00785 } 00786 00787 x[i4] = (x[i2] >> atten) - t2; 00788 x[i3] = (-x[i2] >> atten) - t2; 00789 x[i2] = (x[i1] >> atten) - t1; 00790 x[i1] = (x[i1] >> atten) + t1; 00791 } 00792 } 00793 } 00794 00795 /* Return the residual scaling factor. */ 00796 return lz; 00797 } 00798 #else /* !FIXED16 */ 00799 static int 00800 fe_fft_real(fe_t *fe) 00801 { 00802 int i, j, k, m, n; 00803 frame_t *x, xt; 00804 00805 x = fe->frame; 00806 m = fe->fft_order; 00807 n = fe->fft_size; 00808 00809 /* Bit-reverse the input. */ 00810 j = 0; 00811 for (i = 0; i < n - 1; ++i) { 00812 if (i < j) { 00813 xt = x[j]; 00814 x[j] = x[i]; 00815 x[i] = xt; 00816 } 00817 k = n / 2; 00818 while (k <= j) { 00819 j -= k; 00820 k /= 2; 00821 } 00822 j += k; 00823 } 00824 00825 /* Basic butterflies (2-point FFT, real twiddle factors): 00826 * x[i] = x[i] + 1 * x[i+1] 00827 * x[i+1] = x[i] + -1 * x[i+1] 00828 */ 00829 for (i = 0; i < n; i += 2) { 00830 xt = x[i]; 00831 x[i] = (xt + x[i + 1]); 00832 x[i + 1] = (xt - x[i + 1]); 00833 } 00834 00835 /* The rest of the butterflies, in stages from 1..m */ 00836 for (k = 1; k < m; ++k) { 00837 int n1, n2, n4; 00838 00839 n4 = k - 1; 00840 n2 = k; 00841 n1 = k + 1; 00842 /* Stride over each (1 << (k+1)) points */ 00843 for (i = 0; i < n; i += (1 << n1)) { 00844 /* Basic butterfly with real twiddle factors: 00845 * x[i] = x[i] + 1 * x[i + (1<<k)] 00846 * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] 00847 */ 00848 xt = x[i]; 00849 x[i] = (xt + x[i + (1 << n2)]); 00850 x[i + (1 << n2)] = (xt - x[i + (1 << n2)]); 00851 00852 /* The other ones with real twiddle factors: 00853 * x[i + (1<<k) + (1<<(k-1))] 00854 * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] 00855 * x[i + (1<<(k-1))] 00856 * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] 00857 */ 00858 x[i + (1 << n2) + (1 << n4)] = -x[i + (1 << n2) + (1 << n4)]; 00859 x[i + (1 << n4)] = x[i + (1 << n4)]; 00860 00861 /* Butterflies with complex twiddle factors. 00862 * There are (1<<k-1) of them. 00863 */ 00864 for (j = 1; j < (1 << n4); ++j) { 00865 frame_t cc, ss, t1, t2; 00866 int i1, i2, i3, i4; 00867 00868 i1 = i + j; 00869 i2 = i + (1 << n2) - j; 00870 i3 = i + (1 << n2) + j; 00871 i4 = i + (1 << n2) + (1 << n2) - j; 00872 00873 /* 00874 * cc = real(W[j * n / (1<<(k+1))]) 00875 * ss = imag(W[j * n / (1<<(k+1))]) 00876 */ 00877 cc = fe->ccc[j << (m - n1)]; 00878 ss = fe->sss[j << (m - n1)]; 00879 00880 /* There are some symmetry properties which allow us 00881 * to get away with only four multiplications here. */ 00882 t1 = COSMUL(x[i3], cc) + COSMUL(x[i4], ss); 00883 t2 = COSMUL(x[i3], ss) - COSMUL(x[i4], cc); 00884 00885 x[i4] = (x[i2] - t2); 00886 x[i3] = (-x[i2] - t2); 00887 x[i2] = (x[i1] - t1); 00888 x[i1] = (x[i1] + t1); 00889 } 00890 } 00891 } 00892 00893 /* This isn't used, but return it for completeness. */ 00894 return m; 00895 } 00896 #endif /* !FIXED16 */ 00897 00898 static void 00899 fe_spec_magnitude(fe_t *fe) 00900 { 00901 frame_t *fft; 00902 powspec_t *spec; 00903 int32 j, scale, fftsize; 00904 00905 /* Do FFT and get the scaling factor back (only actually used in 00906 * fixed-point). Note the scaling factor is expressed in bits. */ 00907 scale = fe_fft_real(fe); 00908 00909 /* Convenience pointers to make things less awkward below. */ 00910 fft = fe->frame; 00911 spec = fe->spec; 00912 fftsize = fe->fft_size; 00913 00914 /* We need to scale things up the rest of the way to N. */ 00915 scale = fe->fft_order - scale; 00916 00917 /* The first point (DC coefficient) has no imaginary part */ 00918 { 00919 #ifdef FIXED16 00920 spec[0] = fixlog(abs(fft[0]) << scale) * 2; 00921 #elif defined(FIXED_POINT) 00922 spec[0] = FIXLN(abs(fft[0]) << scale) * 2; 00923 #else 00924 spec[0] = fft[0] * fft[0]; 00925 #endif 00926 } 00927 00928 for (j = 1; j <= fftsize / 2; j++) { 00929 #ifdef FIXED16 00930 int32 rr = fixlog(abs(fft[j]) << scale) * 2; 00931 int32 ii = fixlog(abs(fft[fftsize - j]) << scale) * 2; 00932 spec[j] = fe_log_add(rr, ii); 00933 #elif defined(FIXED_POINT) 00934 int32 rr = FIXLN(abs(fft[j]) << scale) * 2; 00935 int32 ii = FIXLN(abs(fft[fftsize - j]) << scale) * 2; 00936 spec[j] = fe_log_add(rr, ii); 00937 #else 00938 spec[j] = fft[j] * fft[j] + fft[fftsize - j] * fft[fftsize - j]; 00939 #endif 00940 } 00941 } 00942 00943 static void 00944 fe_mel_spec(fe_t * fe) 00945 { 00946 int whichfilt; 00947 powspec_t *spec, *mfspec; 00948 00949 /* Convenience poitners. */ 00950 spec = fe->spec; 00951 mfspec = fe->mfspec; 00952 00953 for (whichfilt = 0; whichfilt < fe->mel_fb->num_filters; whichfilt++) { 00954 int spec_start, filt_start, i; 00955 00956 spec_start = fe->mel_fb->spec_start[whichfilt]; 00957 filt_start = fe->mel_fb->filt_start[whichfilt]; 00958 00959 #ifdef FIXED_POINT 00960 mfspec[whichfilt] = spec[spec_start] + fe->mel_fb->filt_coeffs[filt_start]; 00961 for (i = 1; i < fe->mel_fb->filt_width[whichfilt]; i++) { 00962 mfspec[whichfilt] = fe_log_add(mfspec[whichfilt], 00963 spec[spec_start + i] + 00964 fe->mel_fb->filt_coeffs[filt_start + i]); 00965 } 00966 #else /* !FIXED_POINT */ 00967 mfspec[whichfilt] = 0; 00968 for (i = 0; i < fe->mel_fb->filt_width[whichfilt]; i++) 00969 mfspec[whichfilt] += 00970 spec[spec_start + i] * fe->mel_fb->filt_coeffs[filt_start + i]; 00971 #endif /* !FIXED_POINT */ 00972 } 00973 } 00974 00975 static void 00976 fe_mel_cep(fe_t * fe, mfcc_t *mfcep) 00977 { 00978 int32 i; 00979 powspec_t *mfspec; 00980 00981 /* Convenience pointer. */ 00982 mfspec = fe->mfspec; 00983 00984 for (i = 0; i < fe->mel_fb->num_filters; ++i) { 00985 #ifndef FIXED_POINT /* It's already in log domain for fixed point */ 00986 if (mfspec[i] > 0) 00987 mfspec[i] = log(mfspec[i]); 00988 else /* This number should be smaller than anything 00989 * else, but not too small, so as to avoid 00990 * infinities in the inverse transform (this is 00991 * the frequency-domain equivalent of 00992 * dithering) */ 00993 mfspec[i] = -10.0; 00994 #endif /* !FIXED_POINT */ 00995 } 00996 00997 /* If we are doing LOG_SPEC, then do nothing. */ 00998 if (fe->log_spec == RAW_LOG_SPEC) { 00999 for (i = 0; i < fe->feature_dimension; i++) { 01000 mfcep[i] = (mfcc_t) mfspec[i]; 01001 } 01002 } 01003 /* For smoothed spectrum, do DCT-II followed by (its inverse) DCT-III */ 01004 else if (fe->log_spec == SMOOTH_LOG_SPEC) { 01005 /* FIXME: This is probably broken for fixed-point. */ 01006 fe_dct2(fe, mfspec, mfcep, 0); 01007 fe_dct3(fe, mfcep, mfspec); 01008 for (i = 0; i < fe->feature_dimension; i++) { 01009 mfcep[i] = (mfcc_t) mfspec[i]; 01010 } 01011 } 01012 else if (fe->transform == DCT_II) 01013 fe_dct2(fe, mfspec, mfcep, FALSE); 01014 else if (fe->transform == DCT_HTK) 01015 fe_dct2(fe, mfspec, mfcep, TRUE); 01016 else 01017 fe_spec2cep(fe, mfspec, mfcep); 01018 01019 return; 01020 } 01021 01022 void 01023 fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep) 01024 { 01025 int32 i, j, beta; 01026 01027 /* Compute C0 separately (its basis vector is 1) to avoid 01028 * costly multiplications. */ 01029 mfcep[0] = mflogspec[0] / 2; /* beta = 0.5 */ 01030 for (j = 1; j < fe->mel_fb->num_filters; j++) 01031 mfcep[0] += mflogspec[j]; /* beta = 1.0 */ 01032 mfcep[0] /= (frame_t) fe->mel_fb->num_filters; 01033 01034 for (i = 1; i < fe->num_cepstra; ++i) { 01035 mfcep[i] = 0; 01036 for (j = 0; j < fe->mel_fb->num_filters; j++) { 01037 if (j == 0) 01038 beta = 1; /* 0.5 */ 01039 else 01040 beta = 2; /* 1.0 */ 01041 mfcep[i] += COSMUL(mflogspec[j], 01042 fe->mel_fb->mel_cosine[i][j]) * beta; 01043 } 01044 /* Note that this actually normalizes by num_filters, like the 01045 * original Sphinx front-end, due to the doubled 'beta' factor 01046 * above. */ 01047 mfcep[i] /= (frame_t) fe->mel_fb->num_filters * 2; 01048 } 01049 } 01050 01051 void 01052 fe_dct2(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep, int htk) 01053 { 01054 int32 i, j; 01055 01056 /* Compute C0 separately (its basis vector is 1) to avoid 01057 * costly multiplications. */ 01058 mfcep[0] = mflogspec[0]; 01059 for (j = 1; j < fe->mel_fb->num_filters; j++) 01060 mfcep[0] += mflogspec[j]; 01061 if (htk) 01062 mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_2n); 01063 else /* sqrt(1/N) = sqrt(2/N) * 1/sqrt(2) */ 01064 mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_n); 01065 01066 for (i = 1; i < fe->num_cepstra; ++i) { 01067 mfcep[i] = 0; 01068 for (j = 0; j < fe->mel_fb->num_filters; j++) { 01069 mfcep[i] += COSMUL(mflogspec[j], 01070 fe->mel_fb->mel_cosine[i][j]); 01071 } 01072 mfcep[i] = COSMUL(mfcep[i], fe->mel_fb->sqrt_inv_2n); 01073 } 01074 } 01075 01076 void 01077 fe_lifter(fe_t *fe, mfcc_t *mfcep) 01078 { 01079 int32 i; 01080 01081 if (fe->mel_fb->lifter_val == 0) 01082 return; 01083 01084 for (i = 0; i < fe->num_cepstra; ++i) { 01085 mfcep[i] = MFCCMUL(mfcep[i], fe->mel_fb->lifter[i]); 01086 } 01087 } 01088 01089 void 01090 fe_dct3(fe_t * fe, const mfcc_t * mfcep, powspec_t * mflogspec) 01091 { 01092 int32 i, j; 01093 01094 for (i = 0; i < fe->mel_fb->num_filters; ++i) { 01095 mflogspec[i] = COSMUL(mfcep[0], SQRT_HALF); 01096 for (j = 1; j < fe->num_cepstra; j++) { 01097 mflogspec[i] += COSMUL(mfcep[j], 01098 fe->mel_fb->mel_cosine[j][i]); 01099 } 01100 mflogspec[i] = COSMUL(mflogspec[i], fe->mel_fb->sqrt_inv_2n); 01101 } 01102 } 01103 01104 int32 01105 fe_write_frame(fe_t * fe, mfcc_t * fea) 01106 { 01107 fe_spec_magnitude(fe); 01108 fe_mel_spec(fe); 01109 fe_mel_cep(fe, fea); 01110 fe_lifter(fe, fea); 01111 01112 return 1; 01113 } 01114 01115 void * 01116 fe_create_2d(int32 d1, int32 d2, int32 elem_size) 01117 { 01118 return (void *)ckd_calloc_2d(d1, d2, elem_size); 01119 } 01120 01121 void 01122 fe_free_2d(void *arr) 01123 { 01124 ckd_free_2d((void **)arr); 01125 }