SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * bio.c -- Sphinx-3 binary file I/O functions. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.4 2005/06/21 20:40:46 arthchan2003 00050 * 1, Fixed doxygen documentation, 2, Add the $ keyword. 00051 * 00052 * Revision 1.3 2005/03/30 01:22:46 archan 00053 * Fixed mistakes in last updates. Add 00054 * 00055 * 00056 * 02-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00057 * Bugfix: Added byteswapping in bio_verify_chksum(). 00058 * 00059 * 18-Dec-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00060 * Created. 00061 */ 00062 00063 #include <stdio.h> 00064 #include <string.h> 00065 #include <assert.h> 00066 00067 #ifdef _MSC_VER 00068 #pragma warning (disable: 4996) 00069 #endif 00070 00071 #include "sphinxbase/bio.h" 00072 #include "sphinxbase/err.h" 00073 #include "sphinxbase/ckd_alloc.h" 00074 00075 00076 #define BIO_HDRARG_MAX 32 00077 #define END_COMMENT "*end_comment*\n" 00078 00079 00080 static void 00081 bcomment_read(FILE * fp) 00082 { 00083 __BIGSTACKVARIABLE__ char iline[16384]; 00084 00085 while (fgets(iline, sizeof(iline), fp) != NULL) { 00086 if (strcmp(iline, END_COMMENT) == 0) 00087 return; 00088 } 00089 E_FATAL("Missing %s marker\n", END_COMMENT); 00090 } 00091 00092 00093 static int32 00094 swap_check(FILE * fp) 00095 { 00096 uint32 magic; 00097 00098 if (fread(&magic, sizeof(uint32), 1, fp) != 1) { 00099 E_ERROR("Cannot read BYTEORDER MAGIC NO.\n"); 00100 return -1; 00101 } 00102 00103 if (magic != BYTE_ORDER_MAGIC) { 00104 /* either need to swap or got bogus magic number */ 00105 SWAP_INT32(&magic); 00106 00107 if (magic == BYTE_ORDER_MAGIC) 00108 return 1; 00109 00110 SWAP_INT32(&magic); 00111 E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n", 00112 magic, BYTE_ORDER_MAGIC); 00113 return -1; 00114 } 00115 00116 return 0; 00117 } 00118 00119 00120 void 00121 bio_hdrarg_free(char **argname, char **argval) 00122 { 00123 int32 i; 00124 00125 if (argname == NULL) 00126 return; 00127 for (i = 0; argname[i]; i++) { 00128 ckd_free(argname[i]); 00129 ckd_free(argval[i]); 00130 } 00131 ckd_free(argname); 00132 ckd_free(argval); 00133 } 00134 00135 00136 int32 00137 bio_writehdr_version(FILE * fp, char *version) 00138 { 00139 uint32 b; 00140 00141 fprintf(fp, "s3\n"); 00142 fprintf(fp, "version %s\n", version); 00143 fprintf(fp, "endhdr\n"); 00144 fflush(fp); 00145 00146 b = (uint32) BYTE_ORDER_MAGIC; 00147 fwrite(&b, sizeof(uint32), 1, fp); 00148 fflush(fp); 00149 00150 return 0; 00151 } 00152 00153 00154 int32 00155 bio_writehdr(FILE *fp, ...) 00156 { 00157 char const *key; 00158 va_list args; 00159 uint32 b; 00160 00161 fprintf(fp, "s3\n"); 00162 va_start(args, fp); 00163 while ((key = va_arg(args, char const *)) != NULL) { 00164 char const *val = va_arg(args, char const *); 00165 if (val == NULL) { 00166 E_ERROR("Wrong number of arguments\n"); 00167 va_end(args); 00168 return -1; 00169 } 00170 fprintf(fp, "%s %s\n", key, val); 00171 } 00172 va_end(args); 00173 00174 fprintf(fp, "endhdr\n"); 00175 fflush(fp); 00176 00177 b = (uint32) BYTE_ORDER_MAGIC; 00178 if (fwrite(&b, sizeof(uint32), 1, fp) != 1) 00179 return -1; 00180 fflush(fp); 00181 00182 return 0; 00183 } 00184 00185 00186 int32 00187 bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap) 00188 { 00189 __BIGSTACKVARIABLE__ char line[16384], word[4096]; 00190 int32 i, l; 00191 int32 lineno; 00192 00193 *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *)); 00194 *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *)); 00195 00196 lineno = 0; 00197 if (fgets(line, sizeof(line), fp) == NULL){ 00198 E_ERROR("Premature EOF, line %d\n", lineno); 00199 goto error_out; 00200 } 00201 lineno++; 00202 00203 if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) { 00204 /* New format (post Dec-1996, including checksums); read argument-value pairs */ 00205 for (i = 0;;) { 00206 if (fgets(line, sizeof(line), fp) == NULL) { 00207 E_ERROR("Premature EOF, line %d\n", lineno); 00208 goto error_out; 00209 } 00210 lineno++; 00211 00212 if (sscanf(line, "%s%n", word, &l) != 1) { 00213 E_ERROR("Header format error, line %d\n", lineno); 00214 goto error_out; 00215 } 00216 if (strcmp(word, "endhdr") == 0) 00217 break; 00218 if (word[0] == '#') /* Skip comments */ 00219 continue; 00220 00221 if (i >= BIO_HDRARG_MAX) { 00222 E_ERROR 00223 ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n", 00224 BIO_HDRARG_MAX); 00225 goto error_out; 00226 } 00227 00228 (*argname)[i] = ckd_salloc(word); 00229 if (sscanf(line + l, "%s", word) != 1) { /* Multi-word values not allowed */ 00230 E_ERROR("Header format error, line %d\n", lineno); 00231 goto error_out; 00232 } 00233 (*argval)[i] = ckd_salloc(word); 00234 i++; 00235 } 00236 } 00237 else { 00238 /* Old format (without checksums); the first entry must be the version# */ 00239 if (sscanf(line, "%s", word) != 1) { 00240 E_ERROR("Header format error, line %d\n", lineno); 00241 goto error_out; 00242 } 00243 00244 (*argname)[0] = ckd_salloc("version"); 00245 (*argval)[0] = ckd_salloc(word); 00246 i = 1; 00247 00248 bcomment_read(fp); 00249 } 00250 (*argname)[i] = NULL; 00251 00252 if ((*swap = swap_check(fp)) < 0) { 00253 E_ERROR("swap_check failed\n"); 00254 goto error_out; 00255 } 00256 00257 return 0; 00258 error_out: 00259 bio_hdrarg_free(*argname, *argval); 00260 *argname = *argval = NULL; 00261 return -1; 00262 } 00263 00264 00265 static uint32 00266 chksum_accum(void *buf, int32 el_sz, int32 n_el, uint32 sum) 00267 { 00268 int32 i; 00269 uint8 *i8; 00270 uint16 *i16; 00271 uint32 *i32; 00272 00273 switch (el_sz) { 00274 case 1: 00275 i8 = (uint8 *) buf; 00276 for (i = 0; i < n_el; i++) 00277 sum = (sum << 5 | sum >> 27) + i8[i]; 00278 break; 00279 case 2: 00280 i16 = (uint16 *) buf; 00281 for (i = 0; i < n_el; i++) 00282 sum = (sum << 10 | sum >> 22) + i16[i]; 00283 break; 00284 case 4: 00285 i32 = (uint32 *) buf; 00286 for (i = 0; i < n_el; i++) 00287 sum = (sum << 20 | sum >> 12) + i32[i]; 00288 break; 00289 default: 00290 E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz); 00291 break; 00292 } 00293 00294 return sum; 00295 } 00296 00297 00298 static void 00299 swap_buf(void *buf, int32 el_sz, int32 n_el) 00300 { 00301 int32 i; 00302 uint16 *buf16; 00303 uint32 *buf32; 00304 00305 switch (el_sz) { 00306 case 1: 00307 break; 00308 case 2: 00309 buf16 = (uint16 *) buf; 00310 for (i = 0; i < n_el; i++) 00311 SWAP_INT16(buf16 + i); 00312 break; 00313 case 4: 00314 buf32 = (uint32 *) buf; 00315 for (i = 0; i < n_el; i++) 00316 SWAP_INT32(buf32 + i); 00317 break; 00318 default: 00319 E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz); 00320 break; 00321 } 00322 } 00323 00324 00325 int32 00326 bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap, 00327 uint32 * chksum) 00328 { 00329 if (fread(buf, el_sz, n_el, fp) != (size_t) n_el) 00330 return -1; 00331 00332 if (swap) 00333 swap_buf(buf, el_sz, n_el); 00334 00335 if (chksum) 00336 *chksum = chksum_accum(buf, el_sz, n_el, *chksum); 00337 00338 return n_el; 00339 } 00340 00341 int32 00342 bio_fwrite(void *buf, int32 el_sz, int32 n_el, FILE *fp, 00343 int32 swap, uint32 *chksum) 00344 { 00345 if (chksum) 00346 *chksum = chksum_accum(buf, el_sz, n_el, *chksum); 00347 if (swap) { 00348 void *nbuf; 00349 int rv; 00350 00351 nbuf = ckd_calloc(n_el, el_sz); 00352 memcpy(nbuf, buf, n_el * el_sz); 00353 swap_buf(nbuf, el_sz, n_el); 00354 rv = fwrite(nbuf, el_sz, n_el, fp); 00355 ckd_free(nbuf); 00356 return rv; 00357 } 00358 else { 00359 return fwrite(buf, el_sz, n_el, fp); 00360 } 00361 } 00362 00363 int32 00364 bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp, 00365 int32 sw, uint32 * ck) 00366 { 00367 /* Read 1-d array size */ 00368 if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1) 00369 E_FATAL("fread(arraysize) failed\n"); 00370 if (*n_el <= 0) 00371 E_FATAL("Bad arraysize: %d\n", *n_el); 00372 00373 /* Allocate memory for array data */ 00374 *buf = (void *) ckd_calloc(*n_el, el_sz); 00375 00376 /* Read array data */ 00377 if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != *n_el) 00378 E_FATAL("fread(arraydata) failed\n"); 00379 00380 return *n_el; 00381 } 00382 00383 int32 00384 bio_fread_2d(void ***arr, 00385 size_t e_sz, 00386 uint32 *d1, 00387 uint32 *d2, 00388 FILE *fp, 00389 uint32 swap, 00390 uint32 *chksum) 00391 { 00392 uint32 l_d1, l_d2; 00393 uint32 n; 00394 size_t ret; 00395 void *raw; 00396 00397 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); 00398 if (ret != 1) { 00399 if (ret == 0) { 00400 E_ERROR_SYSTEM("Unable to read complete data"); 00401 } 00402 else { 00403 E_ERROR_SYSTEM("OS error in bio_fread_2d"); 00404 } 00405 return -1; 00406 } 00407 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); 00408 if (ret != 1) { 00409 if (ret == 0) { 00410 E_ERROR_SYSTEM("Unable to read complete data"); 00411 } 00412 else { 00413 E_ERROR_SYSTEM("OS error in bio_fread_2d"); 00414 } 00415 return -1; 00416 } 00417 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) 00418 return -1; 00419 00420 assert(n == l_d1*l_d2); 00421 00422 *d1 = l_d1; 00423 *d2 = l_d2; 00424 *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz); 00425 00426 return n; 00427 } 00428 00429 int32 00430 bio_fread_3d(void ****arr, 00431 size_t e_sz, 00432 uint32 *d1, 00433 uint32 *d2, 00434 uint32 *d3, 00435 FILE *fp, 00436 uint32 swap, 00437 uint32 *chksum) 00438 { 00439 uint32 l_d1; 00440 uint32 l_d2; 00441 uint32 l_d3; 00442 uint32 n; 00443 void *raw; 00444 size_t ret; 00445 00446 ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); 00447 if (ret != 1) { 00448 if (ret == 0) { 00449 E_ERROR_SYSTEM("Unable to read complete data"); 00450 } 00451 else { 00452 E_ERROR_SYSTEM("OS error in bio_fread_3d"); 00453 } 00454 return -1; 00455 } 00456 ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); 00457 if (ret != 1) { 00458 if (ret == 0) { 00459 E_ERROR_SYSTEM("Unable to read complete data"); 00460 } 00461 else { 00462 E_ERROR_SYSTEM("OS error in bio_fread_3d"); 00463 } 00464 return -1; 00465 } 00466 ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum); 00467 if (ret != 1) { 00468 if (ret == 0) { 00469 E_ERROR_SYSTEM("Unable to read complete data"); 00470 } 00471 else { 00472 E_ERROR_SYSTEM("OS error in bio_fread_3d"); 00473 } 00474 return -1; 00475 } 00476 00477 if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) { 00478 return -1; 00479 } 00480 00481 assert(n == l_d1 * l_d2 * l_d3); 00482 00483 *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz); 00484 *d1 = l_d1; 00485 *d2 = l_d2; 00486 *d3 = l_d3; 00487 00488 return n; 00489 } 00490 00491 void 00492 bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum) 00493 { 00494 uint32 file_chksum; 00495 00496 if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1) 00497 E_FATAL("fread(chksum) failed\n"); 00498 if (byteswap) 00499 SWAP_INT32(&file_chksum); 00500 if (file_chksum != chksum) 00501 E_FATAL 00502 ("Checksum error; file-checksum %08x, computed %08x\n", 00503 file_chksum, chksum); 00504 } 00505 00506 int16* 00507 bio_read_wavfile(char const *directory, 00508 char const *filename, 00509 char const *extension, 00510 int32 header, 00511 int32 endian, 00512 int32 *nsamps) 00513 { 00514 FILE *uttfp; 00515 char *inputfile; 00516 int32 n, l; 00517 int16 *data; 00518 00519 n = strlen(extension); 00520 l = strlen(filename); 00521 if ((n <= l) && (0 == strcmp(filename + l - n, extension))) 00522 extension = ""; 00523 inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1); 00524 if (directory) { 00525 sprintf(inputfile, "%s/%s%s", directory, filename, extension); 00526 } else { 00527 sprintf(inputfile, "%s%s", filename, extension); 00528 } 00529 00530 if ((uttfp = fopen(inputfile, "rb")) == NULL) { 00531 E_FATAL_SYSTEM("Failed to open file '%s' for reading", inputfile); 00532 } 00533 fseek(uttfp, 0, SEEK_END); 00534 n = ftell(uttfp); 00535 fseek(uttfp, 0, SEEK_SET); 00536 if (header > 0) { 00537 if (fseek(uttfp, header, SEEK_SET) < 0) { 00538 E_ERROR_SYSTEM("Failed to move to an offset %d in a file '%s'", header, inputfile); 00539 fclose(uttfp); 00540 ckd_free(inputfile); 00541 return NULL; 00542 } 00543 n -= header; 00544 } 00545 n /= sizeof(int16); 00546 data = ckd_calloc(n, sizeof(*data)); 00547 if ((l = fread(data, sizeof(int16), n, uttfp)) < n) { 00548 E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l); 00549 ckd_free(data); 00550 ckd_free(inputfile); 00551 fclose(uttfp); 00552 return NULL; 00553 } 00554 ckd_free(inputfile); 00555 fclose(uttfp); 00556 if (nsamps) *nsamps = n; 00557 00558 return data; 00559 }