SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2001 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * cont_adseg.c -- Continuously listen and segment input speech into utterances. 00039 * 00040 * HISTORY 00041 * 00042 * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00043 * Created. 00044 */ 00045 00046 #include <stdio.h> 00047 #include <stdlib.h> 00048 #include <string.h> 00049 #include <assert.h> 00050 #include <math.h> 00051 00052 #include <sphinxbase/prim_type.h> 00053 #include <sphinxbase/ad.h> 00054 #include <sphinxbase/cont_ad.h> 00055 #include <sphinxbase/err.h> 00056 00057 /* 00058 * Segment raw A/D input data into utterances whenever silence region of given 00059 * duration is encountered. 00060 * Utterances are written to files named 0001.raw, 0002.raw, 0003.raw, etc. 00061 */ 00062 int 00063 main(int32 argc, char **argv) 00064 { 00065 ad_rec_t *ad; 00066 cont_ad_t *cont; 00067 int32 k, uttno, ts, uttlen, sps, endsilsamples; 00068 float endsil; 00069 int16 buf[4096]; 00070 FILE *fp; 00071 char file[1024]; 00072 00073 if ((argc != 3) || 00074 (sscanf(argv[1], "%d", &sps) != 1) || 00075 (sscanf(argv[2], "%f", &endsil) != 1) || (endsil <= 0.0)) { 00076 E_FATAL("Usage: %s <sampling-rate> <utt-end-sil(sec)>\n", argv[0]); 00077 } 00078 00079 /* Convert desired min. inter-utterance silence duration to #samples */ 00080 endsilsamples = (int32) (endsil * sps); 00081 00082 /* Open raw A/D device */ 00083 if ((ad = ad_open_sps(sps)) == NULL) 00084 E_FATAL("ad_open_sps(%d) failed\n", sps); 00085 00086 /* Associate new continuous listening module with opened raw A/D device */ 00087 if ((cont = cont_ad_init(ad, ad_read)) == NULL) 00088 E_FATAL("cont_ad_init failed\n"); 00089 00090 /* Calibrate continuous listening for background noise/silence level */ 00091 printf("Calibrating ..."); 00092 fflush(stdout); 00093 ad_start_rec(ad); 00094 if (cont_ad_calib(cont) < 0) 00095 printf(" failed\n"); 00096 else 00097 printf(" done\n"); 00098 00099 /* Forever listen for utterances */ 00100 printf("You may speak now\n"); 00101 fflush(stdout); 00102 uttno = 0; 00103 for (;;) { 00104 /* Wait for beginning of next utterance; for non-silence data */ 00105 while ((k = cont_ad_read(cont, buf, 4096)) == 0); 00106 if (k < 0) 00107 E_FATAL("cont_ad_read failed\n"); 00108 00109 /* Non-silence data received; open and write to new logging file */ 00110 uttno++; 00111 sprintf(file, "%04d.raw", uttno); 00112 if ((fp = fopen(file, "wb")) == NULL) 00113 E_FATAL_SYSTEM("Failed to open '%s' for reading", file); 00114 fwrite(buf, sizeof(int16), k, fp); 00115 uttlen = k; 00116 printf("Utterance %04d, logging to %s\n", uttno, file); 00117 00118 /* Note current timestamp */ 00119 ts = cont->read_ts; 00120 00121 /* Read utterance data until a gap of at least 1 sec observed */ 00122 for (;;) { 00123 if ((k = cont_ad_read(cont, buf, 4096)) < 0) 00124 E_FATAL("cont_ad_read failed\n"); 00125 if (k == 0) { 00126 /* 00127 * No speech data available; check current timestamp. End of 00128 * utterance if no non-silence data been read for at least 1 sec. 00129 */ 00130 if ((cont->read_ts - ts) > endsilsamples) 00131 break; 00132 } 00133 else { 00134 /* Note timestamp at the end of most recently read speech data */ 00135 ts = cont->read_ts; 00136 uttlen += k; 00137 fwrite(buf, sizeof(int16), k, fp); 00138 } 00139 } 00140 fclose(fp); 00141 00142 printf("\tUtterance %04d = %d samples (%.1fsec)\n\n", 00143 uttno, uttlen, (double) uttlen / (double) sps); 00144 } 00145 00146 ad_stop_rec(ad); 00147 cont_ad_close(cont); 00148 ad_close(ad); 00149 return 0; 00150 }