SphinxBase 0.6

src/libsphinxbase/util/bio.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * bio.c -- Sphinx-3 binary file I/O functions.
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.4  2005/06/21  20:40:46  arthchan2003
00050  * 1, Fixed doxygen documentation, 2, Add the $ keyword.
00051  * 
00052  * Revision 1.3  2005/03/30 01:22:46  archan
00053  * Fixed mistakes in last updates. Add
00054  *
00055  * 
00056  * 02-Jul-1997  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00057  *              Bugfix: Added byteswapping in bio_verify_chksum().
00058  * 
00059  * 18-Dec-1996  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00060  *              Created.
00061  */
00062 
00063 #include <stdio.h>
00064 #include <string.h>
00065 #include <assert.h>
00066 
00067 #ifdef _MSC_VER
00068 #pragma warning (disable: 4996)
00069 #endif
00070 
00071 #include "sphinxbase/bio.h"
00072 #include "sphinxbase/err.h"
00073 #include "sphinxbase/ckd_alloc.h"
00074 
00075 
00076 #define BIO_HDRARG_MAX  32
00077 #define END_COMMENT     "*end_comment*\n"
00078 
00079 
00080 static void
00081 bcomment_read(FILE * fp)
00082 {
00083     __BIGSTACKVARIABLE__ char iline[16384];
00084 
00085     while (fgets(iline, sizeof(iline), fp) != NULL) {
00086         if (strcmp(iline, END_COMMENT) == 0)
00087             return;
00088     }
00089     E_FATAL("Missing %s marker\n", END_COMMENT);
00090 }
00091 
00092 
00093 static int32
00094 swap_check(FILE * fp)
00095 {
00096     uint32 magic;
00097 
00098     if (fread(&magic, sizeof(uint32), 1, fp) != 1) {
00099         E_ERROR("Cannot read BYTEORDER MAGIC NO.\n");
00100         return -1;
00101     }
00102 
00103     if (magic != BYTE_ORDER_MAGIC) {
00104         /* either need to swap or got bogus magic number */
00105         SWAP_INT32(&magic);
00106 
00107         if (magic == BYTE_ORDER_MAGIC)
00108             return 1;
00109 
00110         SWAP_INT32(&magic);
00111         E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n",
00112                 magic, BYTE_ORDER_MAGIC);
00113         return -1;
00114     }
00115 
00116     return 0;
00117 }
00118 
00119 
00120 void
00121 bio_hdrarg_free(char **argname, char **argval)
00122 {
00123     int32 i;
00124 
00125     if (argname == NULL)
00126         return;
00127     for (i = 0; argname[i]; i++) {
00128         ckd_free(argname[i]);
00129         ckd_free(argval[i]);
00130     }
00131     ckd_free(argname);
00132     ckd_free(argval);
00133 }
00134 
00135 
00136 int32
00137 bio_writehdr_version(FILE * fp, char *version)
00138 {
00139     uint32 b;
00140 
00141     fprintf(fp, "s3\n");
00142     fprintf(fp, "version %s\n", version);
00143     fprintf(fp, "endhdr\n");
00144     fflush(fp);
00145 
00146     b = (uint32) BYTE_ORDER_MAGIC;
00147     fwrite(&b, sizeof(uint32), 1, fp);
00148     fflush(fp);
00149 
00150     return 0;
00151 }
00152 
00153 
00154 int32
00155 bio_writehdr(FILE *fp, ...)
00156 {
00157     char const *key;
00158     va_list args;
00159     uint32 b;
00160 
00161     fprintf(fp, "s3\n");
00162     va_start(args, fp);
00163     while ((key = va_arg(args, char const *)) != NULL) {
00164         char const *val = va_arg(args, char const *);
00165         if (val == NULL) {
00166             E_ERROR("Wrong number of arguments\n");
00167             va_end(args);
00168             return -1;
00169         }
00170         fprintf(fp, "%s %s\n", key, val);
00171     }
00172     va_end(args);
00173 
00174     fprintf(fp, "endhdr\n");
00175     fflush(fp);
00176 
00177     b = (uint32) BYTE_ORDER_MAGIC;
00178     if (fwrite(&b, sizeof(uint32), 1, fp) != 1)
00179         return -1;
00180     fflush(fp);
00181 
00182     return 0;
00183 }
00184 
00185 
00186 int32
00187 bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap)
00188 {
00189     __BIGSTACKVARIABLE__ char line[16384], word[4096];
00190     int32 i, l;
00191     int32 lineno;
00192 
00193     *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *));
00194     *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *));
00195 
00196     lineno = 0;
00197     if (fgets(line, sizeof(line), fp) == NULL){
00198         E_ERROR("Premature EOF, line %d\n", lineno);
00199         goto error_out;
00200     }
00201     lineno++;
00202 
00203     if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) {
00204         /* New format (post Dec-1996, including checksums); read argument-value pairs */
00205         for (i = 0;;) {
00206             if (fgets(line, sizeof(line), fp) == NULL) {
00207                 E_ERROR("Premature EOF, line %d\n", lineno);
00208                 goto error_out;
00209             }
00210             lineno++;
00211 
00212             if (sscanf(line, "%s%n", word, &l) != 1) {
00213                 E_ERROR("Header format error, line %d\n", lineno);
00214                 goto error_out;
00215             }
00216             if (strcmp(word, "endhdr") == 0)
00217                 break;
00218             if (word[0] == '#') /* Skip comments */
00219                 continue;
00220 
00221             if (i >= BIO_HDRARG_MAX) {
00222                 E_ERROR
00223                     ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n",
00224                      BIO_HDRARG_MAX);
00225                 goto error_out;
00226             }
00227 
00228             (*argname)[i] = ckd_salloc(word);
00229             if (sscanf(line + l, "%s", word) != 1) {      /* Multi-word values not allowed */
00230                 E_ERROR("Header format error, line %d\n", lineno);
00231                 goto error_out;
00232             }
00233             (*argval)[i] = ckd_salloc(word);
00234             i++;
00235         }
00236     }
00237     else {
00238         /* Old format (without checksums); the first entry must be the version# */
00239         if (sscanf(line, "%s", word) != 1) {
00240             E_ERROR("Header format error, line %d\n", lineno);
00241             goto error_out;
00242         }
00243 
00244         (*argname)[0] = ckd_salloc("version");
00245         (*argval)[0] = ckd_salloc(word);
00246         i = 1;
00247 
00248         bcomment_read(fp);
00249     }
00250     (*argname)[i] = NULL;
00251 
00252     if ((*swap = swap_check(fp)) < 0) {
00253         E_ERROR("swap_check failed\n");
00254         goto error_out;
00255     }
00256 
00257     return 0;
00258 error_out:
00259     bio_hdrarg_free(*argname, *argval);
00260     *argname = *argval = NULL;
00261     return -1;
00262 }
00263 
00264 
00265 static uint32
00266 chksum_accum(void *buf, int32 el_sz, int32 n_el, uint32 sum)
00267 {
00268     int32 i;
00269     uint8 *i8;
00270     uint16 *i16;
00271     uint32 *i32;
00272 
00273     switch (el_sz) {
00274     case 1:
00275         i8 = (uint8 *) buf;
00276         for (i = 0; i < n_el; i++)
00277             sum = (sum << 5 | sum >> 27) + i8[i];
00278         break;
00279     case 2:
00280         i16 = (uint16 *) buf;
00281         for (i = 0; i < n_el; i++)
00282             sum = (sum << 10 | sum >> 22) + i16[i];
00283         break;
00284     case 4:
00285         i32 = (uint32 *) buf;
00286         for (i = 0; i < n_el; i++)
00287             sum = (sum << 20 | sum >> 12) + i32[i];
00288         break;
00289     default:
00290         E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz);
00291         break;
00292     }
00293 
00294     return sum;
00295 }
00296 
00297 
00298 static void
00299 swap_buf(void *buf, int32 el_sz, int32 n_el)
00300 {
00301     int32 i;
00302     uint16 *buf16;
00303     uint32 *buf32;
00304 
00305     switch (el_sz) {
00306     case 1:
00307         break;
00308     case 2:
00309         buf16 = (uint16 *) buf;
00310         for (i = 0; i < n_el; i++)
00311             SWAP_INT16(buf16 + i);
00312         break;
00313     case 4:
00314         buf32 = (uint32 *) buf;
00315         for (i = 0; i < n_el; i++)
00316             SWAP_INT32(buf32 + i);
00317         break;
00318     default:
00319         E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz);
00320         break;
00321     }
00322 }
00323 
00324 
00325 int32
00326 bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap,
00327           uint32 * chksum)
00328 {
00329     if (fread(buf, el_sz, n_el, fp) != (size_t) n_el)
00330         return -1;
00331 
00332     if (swap)
00333         swap_buf(buf, el_sz, n_el);
00334 
00335     if (chksum)
00336         *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
00337 
00338     return n_el;
00339 }
00340 
00341 int32
00342 bio_fwrite(void *buf, int32 el_sz, int32 n_el, FILE *fp,
00343            int32 swap, uint32 *chksum)
00344 {
00345     if (chksum)
00346         *chksum = chksum_accum(buf, el_sz, n_el, *chksum);
00347     if (swap) {
00348         void *nbuf;
00349         int rv;
00350 
00351         nbuf = ckd_calloc(n_el, el_sz);
00352         memcpy(nbuf, buf, n_el * el_sz);
00353         swap_buf(nbuf, el_sz, n_el);
00354         rv = fwrite(nbuf, el_sz, n_el, fp);
00355         ckd_free(nbuf);
00356         return rv;
00357     }
00358     else {
00359         return fwrite(buf, el_sz, n_el, fp);
00360     }
00361 }
00362 
00363 int32
00364 bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp,
00365              int32 sw, uint32 * ck)
00366 {
00367     /* Read 1-d array size */
00368     if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1)
00369         E_FATAL("fread(arraysize) failed\n");
00370     if (*n_el <= 0)
00371         E_FATAL("Bad arraysize: %d\n", *n_el);
00372 
00373     /* Allocate memory for array data */
00374     *buf = (void *) ckd_calloc(*n_el, el_sz);
00375 
00376     /* Read array data */
00377     if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != *n_el)
00378         E_FATAL("fread(arraydata) failed\n");
00379 
00380     return *n_el;
00381 }
00382 
00383 int32
00384 bio_fread_2d(void ***arr,
00385              size_t e_sz,
00386              uint32 *d1,
00387              uint32 *d2,
00388              FILE *fp,
00389              uint32 swap,
00390              uint32 *chksum)
00391 {
00392     uint32 l_d1, l_d2;
00393     uint32 n;
00394     size_t ret;
00395     void *raw;
00396     
00397     ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
00398     if (ret != 1) {
00399         if (ret == 0) {
00400             E_ERROR_SYSTEM("Unable to read complete data");
00401         }
00402         else {
00403             E_ERROR_SYSTEM("OS error in bio_fread_2d");
00404         }
00405         return -1;
00406     }
00407     ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
00408     if (ret != 1) {
00409         if (ret == 0) {
00410             E_ERROR_SYSTEM("Unable to read complete data");
00411         }
00412         else {
00413             E_ERROR_SYSTEM("OS error in bio_fread_2d");
00414         }
00415         return -1;
00416     }
00417     if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n)
00418         return -1;
00419 
00420     assert(n == l_d1*l_d2);
00421 
00422     *d1 = l_d1;
00423     *d2 = l_d2;
00424     *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz);
00425 
00426     return n;
00427 }
00428 
00429 int32
00430 bio_fread_3d(void ****arr,
00431              size_t e_sz,
00432              uint32 *d1,
00433              uint32 *d2,
00434              uint32 *d3,
00435              FILE *fp,
00436              uint32 swap,
00437              uint32 *chksum)
00438 {
00439     uint32 l_d1;
00440     uint32 l_d2;
00441     uint32 l_d3;
00442     uint32 n;
00443     void *raw;
00444     size_t ret;
00445 
00446     ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum);
00447     if (ret != 1) {
00448         if (ret == 0) {
00449             E_ERROR_SYSTEM("Unable to read complete data");
00450         }
00451         else {
00452             E_ERROR_SYSTEM("OS error in bio_fread_3d");
00453         }
00454         return -1;
00455     }
00456     ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum);
00457     if (ret != 1) {
00458         if (ret == 0) {
00459             E_ERROR_SYSTEM("Unable to read complete data");
00460         }
00461         else {
00462             E_ERROR_SYSTEM("OS error in bio_fread_3d");
00463         }
00464         return -1;
00465     }
00466     ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum);
00467     if (ret != 1) {
00468         if (ret == 0) {
00469             E_ERROR_SYSTEM("Unable to read complete data");
00470         }
00471         else {
00472             E_ERROR_SYSTEM("OS error in bio_fread_3d");
00473         }
00474         return -1;
00475     }
00476 
00477     if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != n) {
00478         return -1;
00479     }
00480 
00481     assert(n == l_d1 * l_d2 * l_d3);
00482 
00483     *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz);
00484     *d1 = l_d1;
00485     *d2 = l_d2;
00486     *d3 = l_d3;
00487     
00488     return n;
00489 }
00490 
00491 void
00492 bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum)
00493 {
00494     uint32 file_chksum;
00495 
00496     if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1)
00497         E_FATAL("fread(chksum) failed\n");
00498     if (byteswap)
00499         SWAP_INT32(&file_chksum);
00500     if (file_chksum != chksum)
00501         E_FATAL
00502             ("Checksum error; file-checksum %08x, computed %08x\n",
00503              file_chksum, chksum);
00504 }
00505 
00506 int16*
00507 bio_read_wavfile(char const *directory,
00508                  char const *filename,
00509                  char const *extension,
00510                  int32 header,
00511                  int32 endian,
00512                  int32 *nsamps)
00513 {
00514     FILE *uttfp;
00515     char *inputfile;
00516     int32 n, l;
00517     int16 *data;
00518 
00519     n = strlen(extension);
00520     l = strlen(filename);
00521     if ((n <= l) && (0 == strcmp(filename + l - n, extension)))
00522         extension = "";
00523     inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1);
00524     if (directory) {
00525         sprintf(inputfile, "%s/%s%s", directory, filename, extension);
00526     } else {
00527         sprintf(inputfile, "%s%s", filename, extension);
00528     }
00529 
00530     if ((uttfp = fopen(inputfile, "rb")) == NULL) {
00531         E_FATAL_SYSTEM("Failed to open file '%s' for reading", inputfile);
00532     }
00533     fseek(uttfp, 0, SEEK_END);
00534     n = ftell(uttfp);
00535     fseek(uttfp, 0, SEEK_SET);
00536     if (header > 0) {
00537         if (fseek(uttfp, header, SEEK_SET) < 0) {
00538             E_ERROR_SYSTEM("Failed to move to an offset %d in a file '%s'", header, inputfile);
00539             fclose(uttfp);
00540             ckd_free(inputfile);
00541             return NULL;
00542         }
00543         n -= header;
00544     }
00545     n /= sizeof(int16);
00546     data = ckd_calloc(n, sizeof(*data));
00547     if ((l = fread(data, sizeof(int16), n, uttfp)) < n) {
00548         E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l);
00549         ckd_free(data);
00550         ckd_free(inputfile);
00551         fclose(uttfp);
00552         return NULL;
00553     }
00554     ckd_free(inputfile);
00555     fclose(uttfp);
00556     if (nsamps) *nsamps = n;
00557 
00558     return data;
00559 }