SphinxBase 0.6

src/libsphinxbase/util/pio.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 #include <config.h>
00039 
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <string.h>
00043 #ifdef HAVE_UNISTD_H
00044 #include <unistd.h>
00045 #endif
00046 #ifdef HAVE_SYS_STAT_H
00047 #include <sys/stat.h>
00048 #endif
00049 #ifdef HAVE_SYS_TYPES_H
00050 #include <sys/types.h>
00051 #endif
00052 #include <assert.h>
00053 
00054 #include "sphinxbase/pio.h"
00055 #include "sphinxbase/filename.h"
00056 #include "sphinxbase/err.h"
00057 #include "sphinxbase/strfuncs.h"
00058 #include "sphinxbase/ckd_alloc.h"
00059 
00060 #ifndef EXEEXT
00061 #define EXEEXT ""
00062 #endif
00063 
00064 enum {
00065     COMP_NONE,
00066     COMP_COMPRESS,
00067     COMP_GZIP,
00068     COMP_BZIP2
00069 };
00070 
00071 static void
00072 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
00073 {
00074     int k;
00075 
00076     k = strlen(file);
00077     *ispipe = 0;
00078     *isgz = COMP_NONE;
00079     if ((k > 2)
00080         && ((strcmp(file + k - 2, ".Z") == 0)
00081             || (strcmp(file + k - 2, ".z") == 0))) {
00082         *ispipe = 1;
00083         *isgz = COMP_COMPRESS;
00084     }
00085     else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
00086                         || (strcmp(file + k - 3, ".GZ") == 0))) {
00087         *ispipe = 1;
00088         *isgz = COMP_GZIP;
00089     }
00090     else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
00091                         || (strcmp(file + k - 4, ".BZ2") == 0))) {
00092         *ispipe = 1;
00093         *isgz = COMP_BZIP2;
00094     }
00095 }
00096 
00097 FILE *
00098 fopen_comp(const char *file, const char *mode, int32 * ispipe)
00099 {
00100     FILE *fp;
00101 
00102 #ifndef HAVE_POPEN
00103     *ispipe = 0; /* No popen() on WinCE */
00104 #else /* HAVE_POPEN */
00105     int32 isgz;
00106     guess_comptype(file, ispipe, &isgz);
00107 #endif /* HAVE_POPEN */
00108 
00109     if (*ispipe) {
00110 #ifndef HAVE_POPEN
00111         /* Shouldn't get here, anyway */
00112         E_FATAL("No popen() on WinCE\n");
00113 #else
00114         if (strcmp(mode, "r") == 0) {
00115             char *command;
00116             switch (isgz) {
00117             case COMP_GZIP:
00118                 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
00119                 break;
00120             case COMP_COMPRESS:
00121                 command = string_join("zcat" EXEEXT, " ", file, NULL);
00122                 break;
00123             case COMP_BZIP2:
00124                 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
00125                 break;
00126             default:
00127                 command = NULL; /* Make compiler happy. */
00128                 E_FATAL("Unknown  compression type %d\n", isgz);
00129             }
00130             if ((fp = popen(command, mode)) == NULL) {
00131                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
00132                 ckd_free(command);
00133                 return NULL;
00134             }
00135             ckd_free(command);
00136         }
00137         else if (strcmp(mode, "w") == 0) {
00138             char *command;
00139             switch (isgz) {
00140             case COMP_GZIP:
00141                 command = string_join("gzip" EXEEXT, " > ", file, NULL);
00142                 break;
00143             case COMP_COMPRESS:
00144                 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
00145                 break;
00146             case COMP_BZIP2:
00147                 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
00148                 break;
00149             default:
00150                 command = NULL; /* Make compiler happy. */
00151                 E_FATAL("Unknown compression type %d\n", isgz);
00152             }
00153             if ((fp = popen(command, mode)) == NULL) {
00154                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
00155                 ckd_free(command);
00156                 return NULL;
00157             }
00158             ckd_free(command);
00159         }
00160         else {
00161             E_ERROR("Compressed file operation for mode %s is not supported", mode);
00162             return NULL;
00163         }
00164 #endif /* HAVE_POPEN */
00165     }
00166     else {
00167         fp = fopen(file, mode);
00168     }
00169 
00170     return (fp);
00171 }
00172 
00173 
00174 void
00175 fclose_comp(FILE * fp, int32 ispipe)
00176 {
00177     if (ispipe) {
00178 #ifdef HAVE_POPEN
00179 #if defined(_WIN32) && (!defined(__SYMBIAN32__))
00180         _pclose(fp);
00181 #else
00182         pclose(fp);
00183 #endif
00184 #endif
00185     }
00186     else
00187         fclose(fp);
00188 }
00189 
00190 
00191 FILE *
00192 fopen_compchk(const char *file, int32 * ispipe)
00193 {
00194 #ifndef HAVE_POPEN
00195     *ispipe = 0; /* No popen() on WinCE */
00196     /* And therefore the rest of this function is useless. */
00197     return (fopen_comp(file, "r", ispipe));
00198 #else /* HAVE_POPEN */
00199     int32 isgz;
00200     FILE *fh;
00201 
00202     /* First just try to fopen_comp() it */
00203     if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
00204         return fh;
00205     else {
00206         char *tmpfile;
00207         int k;
00208 
00209         /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
00210         guess_comptype(file, ispipe, &isgz);
00211         k = strlen(file);
00212         tmpfile = ckd_calloc(k+5, 1);
00213         strcpy(tmpfile, file);
00214         switch (isgz) {
00215         case COMP_GZIP:
00216             tmpfile[k - 3] = '\0';
00217             break;
00218         case COMP_BZIP2:
00219             tmpfile[k - 4] = '\0';
00220             break;
00221         case COMP_COMPRESS:
00222             tmpfile[k - 2] = '\0';
00223             break;
00224         case COMP_NONE:
00225             strcpy(tmpfile + k, ".gz");
00226             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00227                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00228                 ckd_free(tmpfile);
00229                 return fh;
00230             }
00231             strcpy(tmpfile + k, ".bz2");
00232             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00233                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00234                 ckd_free(tmpfile);
00235                 return fh;
00236             }
00237             strcpy(tmpfile + k, ".Z");
00238             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00239                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00240                 ckd_free(tmpfile);
00241                 return fh;
00242             }
00243             ckd_free(tmpfile);
00244             return NULL;
00245         }
00246         E_WARN("Using %s instead of %s\n", tmpfile, file);
00247         fh = fopen_comp(tmpfile, "r", ispipe);
00248         ckd_free(tmpfile);
00249         return NULL;
00250     }
00251 #endif /* HAVE_POPEN */
00252 }
00253 
00254 lineiter_t *
00255 lineiter_start(FILE *fh)
00256 {
00257     lineiter_t *li;
00258 
00259     li = ckd_calloc(1, sizeof(*li));
00260     li->buf = ckd_malloc(128);
00261     li->buf[0] = '\0';
00262     li->bsiz = 128;
00263     li->len = 0;
00264     li->fh = fh;
00265 
00266     li = lineiter_next(li);
00267     
00268     /* Strip the UTF-8 BOM */
00269     
00270     if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) {
00271         memmove(li->buf, li->buf + 3, strlen(li->buf + 1));
00272         li->len -= 3;
00273     }
00274     
00275     return li;
00276 }
00277 
00278 lineiter_t *
00279 lineiter_start_clean(FILE *fh)
00280 {
00281     lineiter_t *li;
00282     
00283     li = lineiter_start(fh);
00284     
00285     if (li == NULL)
00286         return li;
00287     
00288     li->clean = TRUE;
00289     
00290     if (li->buf && li->buf[0] == '#') {
00291         li = lineiter_next(li);
00292     } else {
00293         string_trim(li->buf, STRING_BOTH);
00294     }
00295     
00296     return li;
00297 }
00298 
00299 
00300 static lineiter_t *
00301 lineiter_next_plain(lineiter_t *li)
00302 {
00303     /* We are reading the next line */
00304     li->lineno++;
00305     
00306     /* Read a line and check for EOF. */
00307     if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
00308         lineiter_free(li);
00309         return NULL;
00310     }
00311     /* If we managed to read the whole thing, then we are done
00312      * (this will be by far the most common result). */
00313     li->len = strlen(li->buf);
00314     if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00315         return li;
00316 
00317     /* Otherwise we have to reallocate and keep going. */
00318     while (1) {
00319         li->bsiz *= 2;
00320         li->buf = ckd_realloc(li->buf, li->bsiz);
00321         /* If we get an EOF, we are obviously done. */
00322         if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
00323             li->len += strlen(li->buf + li->len);
00324             return li;
00325         }
00326         li->len += strlen(li->buf + li->len);
00327         /* If we managed to read the whole thing, then we are done. */
00328         if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00329             return li;
00330     }
00331 
00332     /* Shouldn't get here. */
00333     return li;
00334 }
00335 
00336 
00337 lineiter_t *
00338 lineiter_next(lineiter_t *li)
00339 {
00340     if (!li->clean)
00341         return lineiter_next_plain(li);
00342     
00343     for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) {
00344         if (li->buf && li->buf[0] != '#') {
00345             li->buf = string_trim(li->buf, STRING_BOTH);
00346             break;
00347         }
00348     }
00349     return li;
00350 }
00351 
00352 int lineiter_lineno(lineiter_t *li)
00353 {
00354     return li->lineno;
00355 }
00356 
00357 void
00358 lineiter_free(lineiter_t *li)
00359 {
00360     if (li == NULL)
00361         return;
00362     ckd_free(li->buf);
00363     ckd_free(li);
00364 }
00365 
00366 char *
00367 fread_line(FILE *stream, size_t *out_len)
00368 {
00369     char *output, *outptr;
00370     char buf[128];
00371 
00372     output = outptr = NULL;
00373     while (fgets(buf, sizeof(buf), stream)) {
00374         size_t len = strlen(buf);
00375         /* Append this data to the buffer. */
00376         if (output == NULL) {
00377             output = ckd_malloc(len + 1);
00378             outptr = output;
00379         }
00380         else {
00381             size_t cur = outptr - output;
00382             output = ckd_realloc(output, cur + len + 1);
00383             outptr = output + cur;
00384         }
00385         memcpy(outptr, buf, len + 1);
00386         outptr += len;
00387         /* Stop on a short read or end of line. */
00388         if (len < sizeof(buf)-1 || buf[len-1] == '\n')
00389             break;
00390     }
00391     if (out_len) *out_len = outptr - output;
00392     return output;
00393 }
00394 
00395 #define FREAD_RETRY_COUNT       60
00396 
00397 int32
00398 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
00399 {
00400     char *data;
00401     uint32 n_items_read;
00402     uint32 n_items_rem;
00403     uint32 n_retry_rem;
00404     int32 loc;
00405 
00406     n_retry_rem = FREAD_RETRY_COUNT;
00407 
00408     data = pointer;
00409     loc = 0;
00410     n_items_rem = num_items;
00411 
00412     do {
00413         n_items_read = fread(&data[loc], size, n_items_rem, stream);
00414 
00415         n_items_rem -= n_items_read;
00416 
00417         if (n_items_rem > 0) {
00418             /* an incomplete read occurred */
00419 
00420             if (n_retry_rem == 0)
00421                 return -1;
00422 
00423             if (n_retry_rem == FREAD_RETRY_COUNT) {
00424                 E_ERROR_SYSTEM("fread() failed; retrying...\n");
00425             }
00426 
00427             --n_retry_rem;
00428 
00429             loc += n_items_read * size;
00430 #ifdef HAVE_UNISTD_H
00431             sleep(1);
00432 #endif
00433         }
00434     } while (n_items_rem > 0);
00435 
00436     return num_items;
00437 }
00438 
00439 
00440 /* Silvio Moioli: updated to use Unicode */
00441 #ifdef _WIN32_WCE /* No stat() on WinCE */
00442 int32
00443 stat_retry(const char *file, struct stat * statbuf)
00444 {
00445     WIN32_FIND_DATAW file_data;
00446     HANDLE *h;
00447     wchar_t *wfile;
00448     size_t len;
00449 
00450     len = mbstowcs(NULL, file, 0) + 1;
00451     wfile = ckd_calloc(len, sizeof(*wfile));
00452     mbstowcs(wfile, file, len);
00453     if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
00454         ckd_free(wfile);
00455         return -1;
00456     }
00457     ckd_free(wfile);
00458     memset(statbuf, 0, sizeof(statbuf));
00459     statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
00460     statbuf->st_size = file_data.nFileSizeLow;
00461     FindClose(h);
00462 
00463     return 0;
00464 }
00465 
00466 
00467 int32
00468 stat_mtime(const char *file)
00469 {
00470     struct stat statbuf;
00471 
00472     if (stat_retry(file, &statbuf) != 0)
00473         return -1;
00474 
00475     return ((int32) statbuf.st_mtime);
00476 }
00477 #else
00478 #define STAT_RETRY_COUNT        10
00479 int32
00480 stat_retry(const char *file, struct stat * statbuf)
00481 {
00482     int32 i;
00483 
00484     
00485     
00486     for (i = 0; i < STAT_RETRY_COUNT; i++) {
00487 
00488 #ifndef HAVE_SYS_STAT_H
00489                 FILE *fp;
00490 
00491                 if ((fp=(FILE *)fopen(file, "r"))!= 0)
00492                 {
00493                     fseek( fp, 0, SEEK_END);
00494                     statbuf->st_size = ftell( fp );
00495                     fclose(fp);
00496                     return 0;
00497                 }
00498         
00499 #else /* HAVE_SYS_STAT_H */
00500         if (stat(file, statbuf) == 0)
00501             return 0;
00502 #endif
00503         if (i == 0) {
00504             E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file);
00505         }
00506 #ifdef HAVE_UNISTD_H
00507         sleep(1);
00508 #endif
00509     }
00510 
00511     return -1;
00512 }
00513 
00514 int32
00515 stat_mtime(const char *file)
00516 {
00517     struct stat statbuf;
00518 
00519 #ifdef HAVE_SYS_STAT_H
00520     if (stat(file, &statbuf) != 0)
00521         return -1;
00522 #else /* HAVE_SYS_STAT_H */
00523     if (stat_retry(file, &statbuf) != 0)
00524         return -1;
00525 #endif /* HAVE_SYS_STAT_H */
00526 
00527     return ((int32) statbuf.st_mtime);
00528 }
00529 #endif /* !_WIN32_WCE */
00530 
00531 struct bit_encode_s {
00532     FILE *fh;
00533     unsigned char buf, bbits;
00534     int16 refcount;
00535 };
00536 
00537 bit_encode_t *
00538 bit_encode_attach(FILE *outfh)
00539 {
00540     bit_encode_t *be;
00541 
00542     be = ckd_calloc(1, sizeof(*be));
00543     be->refcount = 1;
00544     be->fh = outfh;
00545     return be;
00546 }
00547 
00548 bit_encode_t *
00549 bit_encode_retain(bit_encode_t *be)
00550 {
00551     ++be->refcount;
00552     return be;
00553 }
00554 
00555 int
00556 bit_encode_free(bit_encode_t *be)
00557 {
00558     if (be == NULL)
00559         return 0;
00560     if (--be->refcount > 0)
00561         return be->refcount;
00562     ckd_free(be);
00563 
00564     return 0;
00565 }
00566 
00567 int
00568 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
00569 {
00570     int tbits;
00571 
00572     tbits = nbits + be->bbits;
00573     if (tbits < 8)  {
00574         /* Append to buffer. */
00575         be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
00576     }
00577     else {
00578         int i = 0;
00579         while (tbits >= 8) {
00580             /* Shift bits out of the buffer and splice with high-order bits */
00581             fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
00582             /* Put low-order bits back into buffer */
00583             be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
00584             tbits -= 8;
00585             ++i;
00586         }
00587     }
00588     /* tbits contains remaining number of  bits. */
00589     be->bbits = tbits;
00590 
00591     return nbits;
00592 }
00593 
00594 int
00595 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
00596 {
00597     unsigned char bits[4];
00598     codeword <<= (32 - nbits);
00599     bits[0] = (codeword >> 24) & 0xff;
00600     bits[1] = (codeword >> 16) & 0xff;
00601     bits[2] = (codeword >> 8) & 0xff;
00602     bits[3] = codeword & 0xff;
00603     return bit_encode_write(be, bits, nbits);
00604 }
00605 
00606 int
00607 bit_encode_flush(bit_encode_t *be)
00608 {
00609     if (be->bbits) {
00610         fputc(be->buf, be->fh);
00611         be->bbits = 0;
00612     }
00613     return 0;
00614 }
00615 
00616 #if defined(HAVE_SYS_STAT_H) && !defined(__MINGW32__) /* Unix, Cygwin, doesn't work on MINGW */
00617 int
00618 build_directory(const char *path)
00619 {
00620     int rv;
00621 
00622     /* Utterly failed... */
00623     if (strlen(path) == 0)
00624         return -1;
00625     /* Utterly succeeded... */
00626     else if ((rv = mkdir(path, 0777)) == 0)
00627         return 0;
00628     /* Or, it already exists... */
00629     else if (errno == EEXIST)
00630         return 0;
00631     else if (errno != ENOENT) {
00632         E_ERROR_SYSTEM("Failed to create %s");
00633         return -1;
00634     }
00635     else {
00636         char *dirname = ckd_salloc(path);
00637         path2dirname(path, dirname);
00638         build_directory(dirname);
00639         ckd_free(dirname);
00640         return mkdir(path, 0777);
00641     }
00642 }
00643 #elif defined(_WIN32)
00644 /* FIXME: Implement this. */
00645 int
00646 build_directory(const char *path)
00647 {
00648     E_ERROR("build_directory() unimplemented on your platform!\n");
00649     return -1;
00650 }
00651 #else
00652 int
00653 build_directory(const char *path)
00654 {
00655     E_ERROR("build_directory() unimplemented on your platform!\n");
00656     return -1;
00657 }
00658 #endif