SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #include <config.h> 00039 00040 #include <stdio.h> 00041 #include <stdlib.h> 00042 #include <string.h> 00043 #ifdef HAVE_UNISTD_H 00044 #include <unistd.h> 00045 #endif 00046 #ifdef HAVE_SYS_STAT_H 00047 #include <sys/stat.h> 00048 #endif 00049 #ifdef HAVE_SYS_TYPES_H 00050 #include <sys/types.h> 00051 #endif 00052 #include <assert.h> 00053 00054 #include "sphinxbase/pio.h" 00055 #include "sphinxbase/filename.h" 00056 #include "sphinxbase/err.h" 00057 #include "sphinxbase/strfuncs.h" 00058 #include "sphinxbase/ckd_alloc.h" 00059 00060 #ifndef EXEEXT 00061 #define EXEEXT "" 00062 #endif 00063 00064 enum { 00065 COMP_NONE, 00066 COMP_COMPRESS, 00067 COMP_GZIP, 00068 COMP_BZIP2 00069 }; 00070 00071 static void 00072 guess_comptype(char const *file, int32 *ispipe, int32 *isgz) 00073 { 00074 int k; 00075 00076 k = strlen(file); 00077 *ispipe = 0; 00078 *isgz = COMP_NONE; 00079 if ((k > 2) 00080 && ((strcmp(file + k - 2, ".Z") == 0) 00081 || (strcmp(file + k - 2, ".z") == 0))) { 00082 *ispipe = 1; 00083 *isgz = COMP_COMPRESS; 00084 } 00085 else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0) 00086 || (strcmp(file + k - 3, ".GZ") == 0))) { 00087 *ispipe = 1; 00088 *isgz = COMP_GZIP; 00089 } 00090 else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0) 00091 || (strcmp(file + k - 4, ".BZ2") == 0))) { 00092 *ispipe = 1; 00093 *isgz = COMP_BZIP2; 00094 } 00095 } 00096 00097 FILE * 00098 fopen_comp(const char *file, const char *mode, int32 * ispipe) 00099 { 00100 FILE *fp; 00101 00102 #ifndef HAVE_POPEN 00103 *ispipe = 0; /* No popen() on WinCE */ 00104 #else /* HAVE_POPEN */ 00105 int32 isgz; 00106 guess_comptype(file, ispipe, &isgz); 00107 #endif /* HAVE_POPEN */ 00108 00109 if (*ispipe) { 00110 #ifndef HAVE_POPEN 00111 /* Shouldn't get here, anyway */ 00112 E_FATAL("No popen() on WinCE\n"); 00113 #else 00114 if (strcmp(mode, "r") == 0) { 00115 char *command; 00116 switch (isgz) { 00117 case COMP_GZIP: 00118 command = string_join("gunzip" EXEEXT, " -c ", file, NULL); 00119 break; 00120 case COMP_COMPRESS: 00121 command = string_join("zcat" EXEEXT, " ", file, NULL); 00122 break; 00123 case COMP_BZIP2: 00124 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL); 00125 break; 00126 default: 00127 command = NULL; /* Make compiler happy. */ 00128 E_FATAL("Unknown compression type %d\n", isgz); 00129 } 00130 if ((fp = popen(command, mode)) == NULL) { 00131 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); 00132 ckd_free(command); 00133 return NULL; 00134 } 00135 ckd_free(command); 00136 } 00137 else if (strcmp(mode, "w") == 0) { 00138 char *command; 00139 switch (isgz) { 00140 case COMP_GZIP: 00141 command = string_join("gzip" EXEEXT, " > ", file, NULL); 00142 break; 00143 case COMP_COMPRESS: 00144 command = string_join("compress" EXEEXT, " -c > ", file, NULL); 00145 break; 00146 case COMP_BZIP2: 00147 command = string_join("bzip2" EXEEXT, " > ", file, NULL); 00148 break; 00149 default: 00150 command = NULL; /* Make compiler happy. */ 00151 E_FATAL("Unknown compression type %d\n", isgz); 00152 } 00153 if ((fp = popen(command, mode)) == NULL) { 00154 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); 00155 ckd_free(command); 00156 return NULL; 00157 } 00158 ckd_free(command); 00159 } 00160 else { 00161 E_ERROR("Compressed file operation for mode %s is not supported", mode); 00162 return NULL; 00163 } 00164 #endif /* HAVE_POPEN */ 00165 } 00166 else { 00167 fp = fopen(file, mode); 00168 } 00169 00170 return (fp); 00171 } 00172 00173 00174 void 00175 fclose_comp(FILE * fp, int32 ispipe) 00176 { 00177 if (ispipe) { 00178 #ifdef HAVE_POPEN 00179 #if defined(_WIN32) && (!defined(__SYMBIAN32__)) 00180 _pclose(fp); 00181 #else 00182 pclose(fp); 00183 #endif 00184 #endif 00185 } 00186 else 00187 fclose(fp); 00188 } 00189 00190 00191 FILE * 00192 fopen_compchk(const char *file, int32 * ispipe) 00193 { 00194 #ifndef HAVE_POPEN 00195 *ispipe = 0; /* No popen() on WinCE */ 00196 /* And therefore the rest of this function is useless. */ 00197 return (fopen_comp(file, "r", ispipe)); 00198 #else /* HAVE_POPEN */ 00199 int32 isgz; 00200 FILE *fh; 00201 00202 /* First just try to fopen_comp() it */ 00203 if ((fh = fopen_comp(file, "r", ispipe)) != NULL) 00204 return fh; 00205 else { 00206 char *tmpfile; 00207 int k; 00208 00209 /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ 00210 guess_comptype(file, ispipe, &isgz); 00211 k = strlen(file); 00212 tmpfile = ckd_calloc(k+5, 1); 00213 strcpy(tmpfile, file); 00214 switch (isgz) { 00215 case COMP_GZIP: 00216 tmpfile[k - 3] = '\0'; 00217 break; 00218 case COMP_BZIP2: 00219 tmpfile[k - 4] = '\0'; 00220 break; 00221 case COMP_COMPRESS: 00222 tmpfile[k - 2] = '\0'; 00223 break; 00224 case COMP_NONE: 00225 strcpy(tmpfile + k, ".gz"); 00226 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00227 E_WARN("Using %s instead of %s\n", tmpfile, file); 00228 ckd_free(tmpfile); 00229 return fh; 00230 } 00231 strcpy(tmpfile + k, ".bz2"); 00232 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00233 E_WARN("Using %s instead of %s\n", tmpfile, file); 00234 ckd_free(tmpfile); 00235 return fh; 00236 } 00237 strcpy(tmpfile + k, ".Z"); 00238 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { 00239 E_WARN("Using %s instead of %s\n", tmpfile, file); 00240 ckd_free(tmpfile); 00241 return fh; 00242 } 00243 ckd_free(tmpfile); 00244 return NULL; 00245 } 00246 E_WARN("Using %s instead of %s\n", tmpfile, file); 00247 fh = fopen_comp(tmpfile, "r", ispipe); 00248 ckd_free(tmpfile); 00249 return NULL; 00250 } 00251 #endif /* HAVE_POPEN */ 00252 } 00253 00254 lineiter_t * 00255 lineiter_start(FILE *fh) 00256 { 00257 lineiter_t *li; 00258 00259 li = ckd_calloc(1, sizeof(*li)); 00260 li->buf = ckd_malloc(128); 00261 li->buf[0] = '\0'; 00262 li->bsiz = 128; 00263 li->len = 0; 00264 li->fh = fh; 00265 00266 li = lineiter_next(li); 00267 00268 /* Strip the UTF-8 BOM */ 00269 00270 if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) { 00271 memmove(li->buf, li->buf + 3, strlen(li->buf + 1)); 00272 li->len -= 3; 00273 } 00274 00275 return li; 00276 } 00277 00278 lineiter_t * 00279 lineiter_start_clean(FILE *fh) 00280 { 00281 lineiter_t *li; 00282 00283 li = lineiter_start(fh); 00284 00285 if (li == NULL) 00286 return li; 00287 00288 li->clean = TRUE; 00289 00290 if (li->buf && li->buf[0] == '#') { 00291 li = lineiter_next(li); 00292 } else { 00293 string_trim(li->buf, STRING_BOTH); 00294 } 00295 00296 return li; 00297 } 00298 00299 00300 static lineiter_t * 00301 lineiter_next_plain(lineiter_t *li) 00302 { 00303 /* We are reading the next line */ 00304 li->lineno++; 00305 00306 /* Read a line and check for EOF. */ 00307 if (fgets(li->buf, li->bsiz, li->fh) == NULL) { 00308 lineiter_free(li); 00309 return NULL; 00310 } 00311 /* If we managed to read the whole thing, then we are done 00312 * (this will be by far the most common result). */ 00313 li->len = strlen(li->buf); 00314 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') 00315 return li; 00316 00317 /* Otherwise we have to reallocate and keep going. */ 00318 while (1) { 00319 li->bsiz *= 2; 00320 li->buf = ckd_realloc(li->buf, li->bsiz); 00321 /* If we get an EOF, we are obviously done. */ 00322 if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) { 00323 li->len += strlen(li->buf + li->len); 00324 return li; 00325 } 00326 li->len += strlen(li->buf + li->len); 00327 /* If we managed to read the whole thing, then we are done. */ 00328 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') 00329 return li; 00330 } 00331 00332 /* Shouldn't get here. */ 00333 return li; 00334 } 00335 00336 00337 lineiter_t * 00338 lineiter_next(lineiter_t *li) 00339 { 00340 if (!li->clean) 00341 return lineiter_next_plain(li); 00342 00343 for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) { 00344 if (li->buf && li->buf[0] != '#') { 00345 li->buf = string_trim(li->buf, STRING_BOTH); 00346 break; 00347 } 00348 } 00349 return li; 00350 } 00351 00352 int lineiter_lineno(lineiter_t *li) 00353 { 00354 return li->lineno; 00355 } 00356 00357 void 00358 lineiter_free(lineiter_t *li) 00359 { 00360 if (li == NULL) 00361 return; 00362 ckd_free(li->buf); 00363 ckd_free(li); 00364 } 00365 00366 char * 00367 fread_line(FILE *stream, size_t *out_len) 00368 { 00369 char *output, *outptr; 00370 char buf[128]; 00371 00372 output = outptr = NULL; 00373 while (fgets(buf, sizeof(buf), stream)) { 00374 size_t len = strlen(buf); 00375 /* Append this data to the buffer. */ 00376 if (output == NULL) { 00377 output = ckd_malloc(len + 1); 00378 outptr = output; 00379 } 00380 else { 00381 size_t cur = outptr - output; 00382 output = ckd_realloc(output, cur + len + 1); 00383 outptr = output + cur; 00384 } 00385 memcpy(outptr, buf, len + 1); 00386 outptr += len; 00387 /* Stop on a short read or end of line. */ 00388 if (len < sizeof(buf)-1 || buf[len-1] == '\n') 00389 break; 00390 } 00391 if (out_len) *out_len = outptr - output; 00392 return output; 00393 } 00394 00395 #define FREAD_RETRY_COUNT 60 00396 00397 int32 00398 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream) 00399 { 00400 char *data; 00401 uint32 n_items_read; 00402 uint32 n_items_rem; 00403 uint32 n_retry_rem; 00404 int32 loc; 00405 00406 n_retry_rem = FREAD_RETRY_COUNT; 00407 00408 data = pointer; 00409 loc = 0; 00410 n_items_rem = num_items; 00411 00412 do { 00413 n_items_read = fread(&data[loc], size, n_items_rem, stream); 00414 00415 n_items_rem -= n_items_read; 00416 00417 if (n_items_rem > 0) { 00418 /* an incomplete read occurred */ 00419 00420 if (n_retry_rem == 0) 00421 return -1; 00422 00423 if (n_retry_rem == FREAD_RETRY_COUNT) { 00424 E_ERROR_SYSTEM("fread() failed; retrying...\n"); 00425 } 00426 00427 --n_retry_rem; 00428 00429 loc += n_items_read * size; 00430 #ifdef HAVE_UNISTD_H 00431 sleep(1); 00432 #endif 00433 } 00434 } while (n_items_rem > 0); 00435 00436 return num_items; 00437 } 00438 00439 00440 /* Silvio Moioli: updated to use Unicode */ 00441 #ifdef _WIN32_WCE /* No stat() on WinCE */ 00442 int32 00443 stat_retry(const char *file, struct stat * statbuf) 00444 { 00445 WIN32_FIND_DATAW file_data; 00446 HANDLE *h; 00447 wchar_t *wfile; 00448 size_t len; 00449 00450 len = mbstowcs(NULL, file, 0) + 1; 00451 wfile = ckd_calloc(len, sizeof(*wfile)); 00452 mbstowcs(wfile, file, len); 00453 if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) { 00454 ckd_free(wfile); 00455 return -1; 00456 } 00457 ckd_free(wfile); 00458 memset(statbuf, 0, sizeof(statbuf)); 00459 statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime; 00460 statbuf->st_size = file_data.nFileSizeLow; 00461 FindClose(h); 00462 00463 return 0; 00464 } 00465 00466 00467 int32 00468 stat_mtime(const char *file) 00469 { 00470 struct stat statbuf; 00471 00472 if (stat_retry(file, &statbuf) != 0) 00473 return -1; 00474 00475 return ((int32) statbuf.st_mtime); 00476 } 00477 #else 00478 #define STAT_RETRY_COUNT 10 00479 int32 00480 stat_retry(const char *file, struct stat * statbuf) 00481 { 00482 int32 i; 00483 00484 00485 00486 for (i = 0; i < STAT_RETRY_COUNT; i++) { 00487 00488 #ifndef HAVE_SYS_STAT_H 00489 FILE *fp; 00490 00491 if ((fp=(FILE *)fopen(file, "r"))!= 0) 00492 { 00493 fseek( fp, 0, SEEK_END); 00494 statbuf->st_size = ftell( fp ); 00495 fclose(fp); 00496 return 0; 00497 } 00498 00499 #else /* HAVE_SYS_STAT_H */ 00500 if (stat(file, statbuf) == 0) 00501 return 0; 00502 #endif 00503 if (i == 0) { 00504 E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file); 00505 } 00506 #ifdef HAVE_UNISTD_H 00507 sleep(1); 00508 #endif 00509 } 00510 00511 return -1; 00512 } 00513 00514 int32 00515 stat_mtime(const char *file) 00516 { 00517 struct stat statbuf; 00518 00519 #ifdef HAVE_SYS_STAT_H 00520 if (stat(file, &statbuf) != 0) 00521 return -1; 00522 #else /* HAVE_SYS_STAT_H */ 00523 if (stat_retry(file, &statbuf) != 0) 00524 return -1; 00525 #endif /* HAVE_SYS_STAT_H */ 00526 00527 return ((int32) statbuf.st_mtime); 00528 } 00529 #endif /* !_WIN32_WCE */ 00530 00531 struct bit_encode_s { 00532 FILE *fh; 00533 unsigned char buf, bbits; 00534 int16 refcount; 00535 }; 00536 00537 bit_encode_t * 00538 bit_encode_attach(FILE *outfh) 00539 { 00540 bit_encode_t *be; 00541 00542 be = ckd_calloc(1, sizeof(*be)); 00543 be->refcount = 1; 00544 be->fh = outfh; 00545 return be; 00546 } 00547 00548 bit_encode_t * 00549 bit_encode_retain(bit_encode_t *be) 00550 { 00551 ++be->refcount; 00552 return be; 00553 } 00554 00555 int 00556 bit_encode_free(bit_encode_t *be) 00557 { 00558 if (be == NULL) 00559 return 0; 00560 if (--be->refcount > 0) 00561 return be->refcount; 00562 ckd_free(be); 00563 00564 return 0; 00565 } 00566 00567 int 00568 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits) 00569 { 00570 int tbits; 00571 00572 tbits = nbits + be->bbits; 00573 if (tbits < 8) { 00574 /* Append to buffer. */ 00575 be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits)); 00576 } 00577 else { 00578 int i = 0; 00579 while (tbits >= 8) { 00580 /* Shift bits out of the buffer and splice with high-order bits */ 00581 fputc(be->buf | ((bits[i]) >> be->bbits), be->fh); 00582 /* Put low-order bits back into buffer */ 00583 be->buf = (bits[i] << (8 - be->bbits)) & 0xff; 00584 tbits -= 8; 00585 ++i; 00586 } 00587 } 00588 /* tbits contains remaining number of bits. */ 00589 be->bbits = tbits; 00590 00591 return nbits; 00592 } 00593 00594 int 00595 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits) 00596 { 00597 unsigned char bits[4]; 00598 codeword <<= (32 - nbits); 00599 bits[0] = (codeword >> 24) & 0xff; 00600 bits[1] = (codeword >> 16) & 0xff; 00601 bits[2] = (codeword >> 8) & 0xff; 00602 bits[3] = codeword & 0xff; 00603 return bit_encode_write(be, bits, nbits); 00604 } 00605 00606 int 00607 bit_encode_flush(bit_encode_t *be) 00608 { 00609 if (be->bbits) { 00610 fputc(be->buf, be->fh); 00611 be->bbits = 0; 00612 } 00613 return 0; 00614 } 00615 00616 #if defined(HAVE_SYS_STAT_H) && !defined(__MINGW32__) /* Unix, Cygwin, doesn't work on MINGW */ 00617 int 00618 build_directory(const char *path) 00619 { 00620 int rv; 00621 00622 /* Utterly failed... */ 00623 if (strlen(path) == 0) 00624 return -1; 00625 /* Utterly succeeded... */ 00626 else if ((rv = mkdir(path, 0777)) == 0) 00627 return 0; 00628 /* Or, it already exists... */ 00629 else if (errno == EEXIST) 00630 return 0; 00631 else if (errno != ENOENT) { 00632 E_ERROR_SYSTEM("Failed to create %s"); 00633 return -1; 00634 } 00635 else { 00636 char *dirname = ckd_salloc(path); 00637 path2dirname(path, dirname); 00638 build_directory(dirname); 00639 ckd_free(dirname); 00640 return mkdir(path, 0777); 00641 } 00642 } 00643 #elif defined(_WIN32) 00644 /* FIXME: Implement this. */ 00645 int 00646 build_directory(const char *path) 00647 { 00648 E_ERROR("build_directory() unimplemented on your platform!\n"); 00649 return -1; 00650 } 00651 #else 00652 int 00653 build_directory(const char *path) 00654 { 00655 E_ERROR("build_directory() unimplemented on your platform!\n"); 00656 return -1; 00657 } 00658 #endif