SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * agc.c -- Various forms of automatic gain control (AGC) 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.5 2005/06/21 19:25:41 arthchan2003 00050 * 1, Fixed doxygen documentation. 2, Added $ keyword. 00051 * 00052 * Revision 1.3 2005/03/30 01:22:46 archan 00053 * Fixed mistakes in last updates. Add 00054 * 00055 * 00056 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00057 * Created. 00058 */ 00059 00060 #include <string.h> 00061 #ifdef HAVE_CONFIG_H 00062 #include <config.h> 00063 #endif 00064 00065 #include "sphinxbase/err.h" 00066 #include "sphinxbase/ckd_alloc.h" 00067 #include "sphinxbase/agc.h" 00068 00069 /* NOTE! These must match the enum in agc.h */ 00070 const char *agc_type_str[] = { 00071 "none", 00072 "max", 00073 "emax", 00074 "noise" 00075 }; 00076 static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]); 00077 00078 agc_type_t 00079 agc_type_from_str(const char *str) 00080 { 00081 int i; 00082 00083 for (i = 0; i < n_agc_type_str; ++i) { 00084 if (0 == strcmp(str, agc_type_str[i])) 00085 return (agc_type_t)i; 00086 } 00087 E_FATAL("Unknown AGC type '%s'\n", str); 00088 return AGC_NONE; 00089 } 00090 00091 agc_t *agc_init(void) 00092 { 00093 agc_t *agc; 00094 agc = ckd_calloc(1, sizeof(*agc)); 00095 agc->noise_thresh = FLOAT2MFCC(2.0); 00096 00097 return agc; 00098 } 00099 00100 void agc_free(agc_t *agc) 00101 { 00102 ckd_free(agc); 00103 } 00104 00108 void 00109 agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame) 00110 { 00111 int32 i; 00112 00113 if (n_frame <= 0) 00114 return; 00115 agc->obs_max = mfc[0][0]; 00116 for (i = 1; i < n_frame; i++) { 00117 if (mfc[i][0] > agc->obs_max) { 00118 agc->obs_max = mfc[i][0]; 00119 agc->obs_frame = 1; 00120 } 00121 } 00122 00123 E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max); 00124 for (i = 0; i < n_frame; i++) 00125 mfc[i][0] -= agc->obs_max; 00126 } 00127 00128 void 00129 agc_emax_set(agc_t *agc, float32 m) 00130 { 00131 agc->max = FLOAT2MFCC(m); 00132 E_INFO("AGCEMax: max= %.2f\n", m); 00133 } 00134 00135 float32 00136 agc_emax_get(agc_t *agc) 00137 { 00138 return MFCC2FLOAT(agc->max); 00139 } 00140 00141 void 00142 agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame) 00143 { 00144 int i; 00145 00146 if (n_frame <= 0) 00147 return; 00148 for (i = 0; i < n_frame; ++i) { 00149 if (mfc[i][0] > agc->obs_max) { 00150 agc->obs_max = mfc[i][0]; 00151 agc->obs_frame = 1; 00152 } 00153 mfc[i][0] -= agc->max; 00154 } 00155 } 00156 00157 /* Update estimated max for next utterance */ 00158 void 00159 agc_emax_update(agc_t *agc) 00160 { 00161 if (agc->obs_frame) { /* Update only if some data observed */ 00162 agc->obs_max_sum += agc->obs_max; 00163 agc->obs_utt++; 00164 00165 /* Re-estimate max over past history; decay the history */ 00166 agc->max = agc->obs_max_sum / agc->obs_utt; 00167 if (agc->obs_utt == 8) { 00168 agc->obs_max_sum /= 2; 00169 agc->obs_utt = 4; 00170 } 00171 } 00172 E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max); 00173 00174 /* Reset the accumulators for the next utterance. */ 00175 agc->obs_frame = 0; 00176 agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */ 00177 } 00178 00179 void 00180 agc_noise(agc_t *agc, 00181 mfcc_t **cep, 00182 int32 nfr) 00183 { 00184 mfcc_t min_energy; /* Minimum log-energy */ 00185 mfcc_t noise_level; /* Average noise_level */ 00186 int32 i; /* frame index */ 00187 int32 noise_frames; /* Number of noise frames */ 00188 00189 /* Determine minimum log-energy in utterance */ 00190 min_energy = cep[0][0]; 00191 for (i = 0; i < nfr; ++i) { 00192 if (cep[i][0] < min_energy) 00193 min_energy = cep[i][0]; 00194 } 00195 00196 /* Average all frames between min_energy and min_energy + agc->noise_thresh */ 00197 noise_frames = 0; 00198 noise_level = 0; 00199 min_energy += agc->noise_thresh; 00200 for (i = 0; i < nfr; ++i) { 00201 if (cep[i][0] < min_energy) { 00202 noise_level += cep[i][0]; 00203 noise_frames++; 00204 } 00205 } 00206 noise_level /= noise_frames; 00207 00208 E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level)); 00209 00210 /* Subtract noise_level from all log_energy values */ 00211 for (i = 0; i < nfr; ++i) 00212 cep[i][0] -= noise_level; 00213 } 00214 00215 void 00216 agc_set_threshold(agc_t *agc, float32 threshold) 00217 { 00218 agc->noise_thresh = FLOAT2MFCC(threshold); 00219 } 00220 00221 float32 00222 agc_get_threshold(agc_t *agc) 00223 { 00224 return FLOAT2MFCC(agc->noise_thresh); 00225 }