SphinxBase 0.6

src/libsphinxbase/lm/fsg_model.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 
00035 /* System headers. */
00036 #ifdef _WIN32_WCE
00037 /*MC in a debug build it's implicitly included by assert.h
00038      but you need this in a release build */
00039 #include <windows.h>
00040 #else
00041 #include <time.h>
00042 #endif                          /* _WIN32_WCE */
00043 #include <stdio.h>
00044 #include <string.h>
00045 #include <assert.h>
00046 
00047 /* SphinxBase headers. */
00048 #include "sphinxbase/err.h"
00049 #include "sphinxbase/pio.h"
00050 #include "sphinxbase/ckd_alloc.h"
00051 #include "sphinxbase/prim_type.h"
00052 #include "sphinxbase/strfuncs.h"
00053 #include "sphinxbase/hash_table.h"
00054 #include "sphinxbase/fsg_model.h"
00055 
00063 struct trans_list_s {
00064     hash_table_t *null_trans;   /* Null transitions keyed by state. */
00065     hash_table_t *trans;        /* Lists of non-null transitions keyed by state. */
00066 };
00067 
00071 struct fsg_arciter_s {
00072     hash_iter_t *itor, *null_itor;
00073     gnode_t *gn;
00074 };
00075 
00076 #define FSG_MODEL_BEGIN_DECL            "FSG_BEGIN"
00077 #define FSG_MODEL_END_DECL              "FSG_END"
00078 #define FSG_MODEL_N_DECL                        "N"
00079 #define FSG_MODEL_NUM_STATES_DECL       "NUM_STATES"
00080 #define FSG_MODEL_S_DECL                        "S"
00081 #define FSG_MODEL_START_STATE_DECL      "START_STATE"
00082 #define FSG_MODEL_F_DECL                        "F"
00083 #define FSG_MODEL_FINAL_STATE_DECL      "FINAL_STATE"
00084 #define FSG_MODEL_T_DECL                        "T"
00085 #define FSG_MODEL_TRANSITION_DECL       "TRANSITION"
00086 #define FSG_MODEL_COMMENT_CHAR          '#'
00087 
00088 
00089 static int32
00090 nextline_str2words(FILE * fp, int32 * lineno,
00091                    char **lineptr, char ***wordptr)
00092 {
00093     for (;;) {
00094         size_t len;
00095         int32 n;
00096 
00097         ckd_free(*lineptr);
00098         if ((*lineptr = fread_line(fp, &len)) == NULL)
00099             return -1;
00100 
00101         (*lineno)++;
00102 
00103         if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
00104             continue;           /* Skip comment lines */
00105 
00106         n = str2words(*lineptr, NULL, 0);
00107         if (n == 0)
00108             continue;           /* Skip blank lines */
00109 
00110         /* Abuse of realloc(), but this doesn't have to be fast. */
00111         if (*wordptr == NULL)
00112             *wordptr = ckd_calloc(n, sizeof(**wordptr));
00113         else
00114             *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
00115         return str2words(*lineptr, *wordptr, n);
00116     }
00117 }
00118 
00119 void
00120 fsg_model_trans_add(fsg_model_t * fsg,
00121                     int32 from, int32 to, int32 logp, int32 wid)
00122 {
00123     fsg_link_t *link;
00124     glist_t gl;
00125     gnode_t *gn;
00126 
00127     if (fsg->trans[from].trans == NULL)
00128         fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
00129 
00130     /* Check for duplicate link (i.e., link already exists with label=wid) */
00131     for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
00132         link = (fsg_link_t *) gnode_ptr(gn);
00133         if (link->wid == wid) {
00134             if (link->logs2prob < logp)
00135                 link->logs2prob = logp;
00136             return;
00137         }
00138     }
00139 
00140     /* Create transition object */
00141     link = listelem_malloc(fsg->link_alloc);
00142     link->from_state = from;
00143     link->to_state = to;
00144     link->logs2prob = logp;
00145     link->wid = wid;
00146 
00147     /* Add it to the list of transitions and update the hash table */
00148     gl = glist_add_ptr(gl, (void *) link);
00149     hash_table_replace_bkey(fsg->trans[from].trans,
00150                             (char const *) &link->to_state,
00151                             sizeof(link->to_state), gl);
00152 }
00153 
00154 int32
00155 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
00156                         int32 logp, int32 wid)
00157 {
00158     fsg_link_t *link, *link2;
00159 
00160     /* Check for transition probability */
00161     if (logp > 0) {
00162         E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
00163                 from, to);
00164     }
00165 
00166     /* Self-loop null transitions (with prob <= 1.0) are redundant */
00167     if (from == to)
00168         return -1;
00169 
00170     if (fsg->trans[from].null_trans == NULL)
00171         fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
00172 
00173     /* Check for a duplicate link; if found, keep the higher prob */
00174     link = fsg_model_null_trans(fsg, from, to);
00175     if (link) {
00176         if (link->logs2prob < logp) {
00177             link->logs2prob = logp;
00178             return 0;
00179         }
00180         else
00181             return -1;
00182     }
00183 
00184     /* Create null transition object */
00185     link = listelem_malloc(fsg->link_alloc);
00186     link->from_state = from;
00187     link->to_state = to;
00188     link->logs2prob = logp;
00189     link->wid = -1;
00190 
00191     link2 = (fsg_link_t *)
00192         hash_table_enter_bkey(fsg->trans[from].null_trans,
00193                               (char const *) &link->to_state,
00194                               sizeof(link->to_state), link);
00195     assert(link == link2);
00196 
00197     return 1;
00198 }
00199 
00200 int32
00201 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
00202                          int32 logp)
00203 {
00204     return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
00205 }
00206 
00207 glist_t
00208 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
00209 {
00210     gnode_t *gn1;
00211     int updated;
00212     fsg_link_t *tl1, *tl2;
00213     int32 k, n;
00214 
00215     E_INFO("Computing transitive closure for null transitions\n");
00216 
00217     if (nulls == NULL) {
00218         fsg_link_t *null;
00219         int i, j;
00220 
00221         for (i = 0; i < fsg->n_state; ++i) {
00222             for (j = 0; j < fsg->n_state; ++j) {
00223                 if ((null = fsg_model_null_trans(fsg, i, j)))
00224                     nulls = glist_add_ptr(nulls, null);
00225             }
00226         }
00227     }
00228 
00229     /*
00230      * Probably not the most efficient closure implementation, in general, but
00231      * probably reasonably efficient for a sparse null transition matrix.
00232      */
00233     n = 0;
00234     do {
00235         updated = FALSE;
00236 
00237         for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
00238             hash_iter_t *itor;
00239 
00240             tl1 = (fsg_link_t *) gnode_ptr(gn1);
00241             assert(tl1->wid < 0);
00242 
00243             if (fsg->trans[tl1->to_state].null_trans == NULL)
00244                 continue;
00245 
00246             for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
00247                  itor; itor = hash_table_iter_next(itor)) {
00248 
00249                 tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
00250 
00251                 k = fsg_model_null_trans_add(fsg,
00252                                              tl1->from_state,
00253                                              tl2->to_state,
00254                                              tl1->logs2prob +
00255                                              tl2->logs2prob);
00256                 if (k >= 0) {
00257                     updated = TRUE;
00258                     if (k > 0) {
00259                         nulls = glist_add_ptr(nulls, (void *)
00260                                               fsg_model_null_trans
00261                                               (fsg, tl1->from_state,
00262                                                tl2->to_state));
00263                         n++;
00264                     }
00265                 }
00266             }
00267         }
00268     } while (updated);
00269     
00270     E_INFO("%d null transitions added\n", n);
00271 
00272     return nulls;
00273 }
00274 
00275 glist_t
00276 fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
00277 {
00278     void *val;
00279 
00280     if (fsg->trans[i].trans == NULL)
00281         return NULL;
00282     if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
00283                                sizeof(j), &val) < 0)
00284         return NULL;
00285     return (glist_t) val;
00286 }
00287 
00288 fsg_link_t *
00289 fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
00290 {
00291     void *val;
00292 
00293     if (fsg->trans[i].null_trans == NULL)
00294         return NULL;
00295     if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
00296                                sizeof(j), &val) < 0)
00297         return NULL;
00298     return (fsg_link_t *) val;
00299 }
00300 
00301 fsg_arciter_t *
00302 fsg_model_arcs(fsg_model_t * fsg, int32 i)
00303 {
00304     fsg_arciter_t *itor;
00305 
00306     if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
00307         return NULL;
00308     itor = ckd_calloc(1, sizeof(*itor));
00309     if (fsg->trans[i].null_trans)
00310         itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
00311     if (fsg->trans[i].trans)
00312         itor->itor = hash_table_iter(fsg->trans[i].trans);
00313     if (itor->itor != NULL)
00314         itor->gn = hash_entry_val(itor->itor->ent);
00315     return itor;
00316 }
00317 
00318 fsg_link_t *
00319 fsg_arciter_get(fsg_arciter_t * itor)
00320 {
00321     /* Iterate over non-null arcs first. */
00322     if (itor->gn)
00323         return (fsg_link_t *) gnode_ptr(itor->gn);
00324     else if (itor->null_itor)
00325         return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
00326     else
00327         return NULL;
00328 }
00329 
00330 fsg_arciter_t *
00331 fsg_arciter_next(fsg_arciter_t * itor)
00332 {
00333     /* Iterate over non-null arcs first. */
00334     if (itor->gn) {
00335         itor->gn = gnode_next(itor->gn);
00336         /* Move to the next destination arc. */
00337         if (itor->gn == NULL) {
00338             itor->itor = hash_table_iter_next(itor->itor);
00339             if (itor->itor != NULL)
00340                 itor->gn = hash_entry_val(itor->itor->ent);
00341             else if (itor->null_itor == NULL)
00342                 goto stop_iteration;
00343         }
00344     }
00345     else {
00346         if (itor->null_itor == NULL)
00347             goto stop_iteration;
00348         itor->null_itor = hash_table_iter_next(itor->null_itor);
00349         if (itor->null_itor == NULL)
00350             goto stop_iteration;
00351     }
00352     return itor;
00353   stop_iteration:
00354     fsg_arciter_free(itor);
00355     return NULL;
00356 
00357 }
00358 
00359 void
00360 fsg_arciter_free(fsg_arciter_t * itor)
00361 {
00362     if (itor == NULL)
00363         return;
00364     hash_table_iter_free(itor->null_itor);
00365     hash_table_iter_free(itor->itor);
00366     ckd_free(itor);
00367 }
00368 
00369 int
00370 fsg_model_word_id(fsg_model_t * fsg, char const *word)
00371 {
00372     int wid;
00373 
00374     /* Search for an existing word matching this. */
00375     for (wid = 0; wid < fsg->n_word; ++wid) {
00376         if (0 == strcmp(fsg->vocab[wid], word))
00377             break;
00378     }
00379     /* If not found, add this to the vocab. */
00380     if (wid == fsg->n_word)
00381         return -1;
00382     return wid;
00383 }
00384 
00385 int
00386 fsg_model_word_add(fsg_model_t * fsg, char const *word)
00387 {
00388     int wid, old_size;
00389 
00390     /* Search for an existing word matching this. */
00391     wid = fsg_model_word_id(fsg, word);
00392     /* If not found, add this to the vocab. */
00393     if (wid == -1) {
00394         wid = fsg->n_word;
00395         if (fsg->n_word == fsg->n_word_alloc) {
00396             old_size = fsg->n_word_alloc;
00397             fsg->n_word_alloc += 10;
00398             fsg->vocab = ckd_realloc(fsg->vocab,
00399                                      fsg->n_word_alloc *
00400                                      sizeof(*fsg->vocab));
00401             if (fsg->silwords)
00402                 fsg->silwords =
00403                     bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc);
00404             if (fsg->altwords)
00405                 fsg->altwords =
00406                     bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc);
00407         }
00408         ++fsg->n_word;
00409         fsg->vocab[wid] = ckd_salloc(word);
00410     }
00411     return wid;
00412 }
00413 
00414 int
00415 fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
00416                       int state, float32 silprob)
00417 {
00418     int32 logsilp;
00419     int n_trans, silwid, src;
00420 
00421     E_INFO("Adding silence transitions for %s to FSG\n", silword);
00422 
00423     silwid = fsg_model_word_add(fsg, silword);
00424     logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
00425     if (fsg->silwords == NULL)
00426         fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
00427     bitvec_set(fsg->silwords, silwid);
00428 
00429     n_trans = 0;
00430     if (state == -1) {
00431         for (src = 0; src < fsg->n_state; src++) {
00432             fsg_model_trans_add(fsg, src, src, logsilp, silwid);
00433             ++n_trans;
00434         }
00435     }
00436     else {
00437         fsg_model_trans_add(fsg, state, state, logsilp, silwid);
00438         ++n_trans;
00439     }
00440 
00441     E_INFO("Added %d silence word transitions\n", n_trans);
00442     return n_trans;
00443 }
00444 
00445 int
00446 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
00447                   char const *altword)
00448 {
00449     int i, basewid, altwid;
00450     int ntrans;
00451 
00452     /* FIXME: This will get slow, eventually... */
00453     for (basewid = 0; basewid < fsg->n_word; ++basewid)
00454         if (0 == strcmp(fsg->vocab[basewid], baseword))
00455             break;
00456     if (basewid == fsg->n_word) {
00457         E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
00458         return -1;
00459     }
00460     altwid = fsg_model_word_add(fsg, altword);
00461     if (fsg->altwords == NULL)
00462         fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
00463     bitvec_set(fsg->altwords, altwid);
00464 
00465     E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
00466                 baseword, altword));
00467 
00468     /* Look for all transitions involving baseword and duplicate them. */
00469     /* FIXME: This will also get slow, eventually... */
00470     ntrans = 0;
00471     for (i = 0; i < fsg->n_state; ++i) {
00472         hash_iter_t *itor;
00473         if (fsg->trans[i].trans == NULL)
00474             continue;
00475         for (itor = hash_table_iter(fsg->trans[i].trans); itor;
00476              itor = hash_table_iter_next(itor)) {
00477             glist_t trans;
00478             gnode_t *gn;
00479 
00480             trans = hash_entry_val(itor->ent);
00481             for (gn = trans; gn; gn = gnode_next(gn)) {
00482                 fsg_link_t *fl = gnode_ptr(gn);
00483                 if (fl->wid == basewid) {
00484                     fsg_link_t *link;
00485 
00486                     /* Create transition object */
00487                     link = listelem_malloc(fsg->link_alloc);
00488                     link->from_state = fl->from_state;
00489                     link->to_state = fl->to_state;
00490                     link->logs2prob = fl->logs2prob;    /* FIXME!!!??? */
00491                     link->wid = altwid;
00492 
00493                     trans = glist_add_ptr(trans, (void *) link);
00494                     ++ntrans;
00495                 }
00496             }
00497             hash_entry_val(itor->ent) = trans;
00498         }
00499     }
00500 
00501     E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
00502     return ntrans;
00503 }
00504 
00505 
00506 fsg_model_t *
00507 fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
00508                int32 n_state)
00509 {
00510     fsg_model_t *fsg;
00511 
00512     /* Allocate basic stuff. */
00513     fsg = ckd_calloc(1, sizeof(*fsg));
00514     fsg->refcount = 1;
00515     fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
00516     fsg->lmath = lmath;
00517     fsg->name = name ? ckd_salloc(name) : NULL;
00518     fsg->n_state = n_state;
00519     fsg->lw = lw;
00520 
00521     fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
00522 
00523     return fsg;
00524 }
00525 
00526 fsg_model_t *
00527 fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
00528 {
00529     fsg_model_t *fsg;
00530     hash_table_t *vocab;
00531     hash_iter_t *itor;
00532     int32 lastwid;
00533     char **wordptr;
00534     char *lineptr;
00535     char *fsgname;
00536     int32 lineno;
00537     int32 n, i, j;
00538     int n_state, n_trans, n_null_trans;
00539     glist_t nulls;
00540     float32 p;
00541 
00542     lineno = 0;
00543     vocab = hash_table_new(32, FALSE);
00544     wordptr = NULL;
00545     lineptr = NULL;
00546     nulls = NULL;
00547     fsgname = NULL;
00548     fsg = NULL;
00549 
00550     /* Scan upto FSG_BEGIN header */
00551     for (;;) {
00552         n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00553         if (n < 0) {
00554             E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
00555             goto parse_error;
00556         }
00557 
00558         if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
00559             if (n > 2) {
00560                 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
00561                         lineno);
00562                 goto parse_error;
00563             }
00564             break;
00565         }
00566     }
00567     /* Save FSG name, or it will get clobbered below :(.
00568      * If name is missing, try the default.
00569      */
00570     if (n == 2) {
00571         fsgname = ckd_salloc(wordptr[1]);
00572     }
00573     else {
00574         E_WARN("FSG name is missing\n");
00575         fsgname = ckd_salloc("unknown");
00576     }
00577 
00578     /* Read #states */
00579     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00580     if ((n != 2)
00581         || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
00582             && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
00583         || (sscanf(wordptr[1], "%d", &n_state) != 1)
00584         || (n_state <= 0)) {
00585         E_ERROR
00586             ("Line[%d]: #states declaration line missing or malformed\n",
00587              lineno);
00588         goto parse_error;
00589     }
00590 
00591     /* Now create the FSG. */
00592     fsg = fsg_model_init(fsgname, lmath, lw, n_state);
00593     ckd_free(fsgname);
00594     fsgname = NULL;
00595 
00596     /* Read start state */
00597     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00598     if ((n != 2)
00599         || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
00600             && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
00601         || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
00602         || (fsg->start_state < 0)
00603         || (fsg->start_state >= fsg->n_state)) {
00604         E_ERROR
00605             ("Line[%d]: start state declaration line missing or malformed\n",
00606              lineno);
00607         goto parse_error;
00608     }
00609 
00610     /* Read final state */
00611     n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00612     if ((n != 2)
00613         || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
00614             && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
00615         || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
00616         || (fsg->final_state < 0)
00617         || (fsg->final_state >= fsg->n_state)) {
00618         E_ERROR
00619             ("Line[%d]: final state declaration line missing or malformed\n",
00620              lineno);
00621         goto parse_error;
00622     }
00623 
00624     /* Read transitions */
00625     lastwid = 0;
00626     n_trans = n_null_trans = 0;
00627     for (;;) {
00628         int32 wid, tprob;
00629 
00630         n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
00631         if (n <= 0) {
00632             E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
00633                     lineno);
00634             goto parse_error;
00635         }
00636 
00637         if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
00638             break;
00639         }
00640 
00641         if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
00642             || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
00643 
00644 
00645             if (((n != 4) && (n != 5))
00646                 || (sscanf(wordptr[1], "%d", &i) != 1)
00647                 || (sscanf(wordptr[2], "%d", &j) != 1)
00648                 || (i < 0) || (i >= fsg->n_state)
00649                 || (j < 0) || (j >= fsg->n_state)) {
00650                 E_ERROR
00651                     ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
00652                      lineno);
00653                 goto parse_error;
00654             }
00655 
00656             p = atof_c(wordptr[3]);
00657             if ((p <= 0.0) || (p > 1.0)) {
00658                 E_ERROR
00659                     ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
00660                      lineno);
00661                 goto parse_error;
00662             }
00663         }
00664         else {
00665             E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
00666                     lineno);
00667             goto parse_error;
00668         }
00669 
00670         tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
00671         /* Add word to "dictionary". */
00672         if (n > 4) {
00673             if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
00674                 (void) hash_table_enter_int32(vocab,
00675                                               ckd_salloc(wordptr[4]),
00676                                               lastwid);
00677                 wid = lastwid;
00678                 ++lastwid;
00679             }
00680             fsg_model_trans_add(fsg, i, j, tprob, wid);
00681             ++n_trans;
00682         }
00683         else {
00684             if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
00685                 ++n_null_trans;
00686                 nulls =
00687                     glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
00688             }
00689         }
00690     }
00691 
00692     E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
00693            fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
00694 
00695 
00696     /* Now create a string table from the "dictionary" */
00697     fsg->n_word = hash_table_inuse(vocab);
00698     fsg->n_word_alloc = fsg->n_word + 10;       /* Pad it a bit. */
00699     fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
00700     for (itor = hash_table_iter(vocab); itor;
00701          itor = hash_table_iter_next(itor)) {
00702         char const *word = hash_entry_key(itor->ent);
00703         int32 wid = (int32) (long) hash_entry_val(itor->ent);
00704         fsg->vocab[wid] = (char *) word;
00705     }
00706     hash_table_free(vocab);
00707 
00708     /* Do transitive closure on null transitions */
00709     nulls = fsg_model_null_trans_closure(fsg, nulls);
00710     glist_free(nulls);
00711 
00712     ckd_free(lineptr);
00713     ckd_free(wordptr);
00714 
00715     return fsg;
00716 
00717   parse_error:
00718     for (itor = hash_table_iter(vocab); itor;
00719          itor = hash_table_iter_next(itor))
00720         ckd_free((char *) hash_entry_key(itor->ent));
00721     glist_free(nulls);
00722     hash_table_free(vocab);
00723     ckd_free(fsgname);
00724     ckd_free(lineptr);
00725     ckd_free(wordptr);
00726     fsg_model_free(fsg);
00727     return NULL;
00728 }
00729 
00730 
00731 fsg_model_t *
00732 fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
00733 {
00734     FILE *fp;
00735     fsg_model_t *fsg;
00736 
00737     if ((fp = fopen(file, "r")) == NULL) {
00738         E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
00739         return NULL;
00740     }
00741     fsg = fsg_model_read(fp, lmath, lw);
00742     fclose(fp);
00743     return fsg;
00744 }
00745 
00746 fsg_model_t *
00747 fsg_model_retain(fsg_model_t * fsg)
00748 {
00749     ++fsg->refcount;
00750     return fsg;
00751 }
00752 
00753 static void
00754 trans_list_free(fsg_model_t * fsg, int32 i)
00755 {
00756     hash_iter_t *itor;
00757 
00758     /* FIXME (maybe): FSG links will all get freed when we call
00759      * listelem_alloc_free() so don't bother freeing them explicitly
00760      * here. */
00761     if (fsg->trans[i].trans) {
00762         for (itor = hash_table_iter(fsg->trans[i].trans);
00763              itor; itor = hash_table_iter_next(itor)) {
00764             glist_t gl = (glist_t) hash_entry_val(itor->ent);
00765             glist_free(gl);
00766         }
00767     }
00768     hash_table_free(fsg->trans[i].trans);
00769     hash_table_free(fsg->trans[i].null_trans);
00770 }
00771 
00772 int
00773 fsg_model_free(fsg_model_t * fsg)
00774 {
00775     int i;
00776 
00777     if (fsg == NULL)
00778         return 0;
00779 
00780     if (--fsg->refcount > 0)
00781         return fsg->refcount;
00782 
00783     for (i = 0; i < fsg->n_word; ++i)
00784         ckd_free(fsg->vocab[i]);
00785     for (i = 0; i < fsg->n_state; ++i)
00786         trans_list_free(fsg, i);
00787     ckd_free(fsg->trans);
00788     ckd_free(fsg->vocab);
00789     listelem_alloc_free(fsg->link_alloc);
00790     bitvec_free(fsg->silwords);
00791     bitvec_free(fsg->altwords);
00792     ckd_free(fsg->name);
00793     ckd_free(fsg);
00794     return 0;
00795 }
00796 
00797 
00798 void
00799 fsg_model_write(fsg_model_t * fsg, FILE * fp)
00800 {
00801     int32 i;
00802 
00803     fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
00804             fsg->name ? fsg->name : "");
00805     fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
00806     fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
00807     fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
00808 
00809     for (i = 0; i < fsg->n_state; i++) {
00810         fsg_arciter_t *itor;
00811 
00812         for (itor = fsg_model_arcs(fsg, i); itor;
00813              itor = fsg_arciter_next(itor)) {
00814             fsg_link_t *tl = fsg_arciter_get(itor);
00815 
00816             fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
00817                     tl->from_state, tl->to_state,
00818                     logmath_exp(fsg->lmath,
00819                                 (int32) (tl->logs2prob / fsg->lw)),
00820                     (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
00821         }
00822     }
00823 
00824     fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
00825 
00826     fflush(fp);
00827 }
00828 
00829 void
00830 fsg_model_writefile(fsg_model_t * fsg, char const *file)
00831 {
00832     FILE *fp;
00833 
00834     assert(fsg);
00835 
00836     E_INFO("Writing FSG file '%s'\n", file);
00837 
00838     if ((fp = fopen(file, "w")) == NULL) {
00839         E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
00840         return;
00841     }
00842 
00843     fsg_model_write(fsg, fp);
00844 
00845     fclose(fp);
00846 }
00847 
00848 static void
00849 fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
00850 {
00851     fsg_arciter_t *itor;
00852 
00853     for (itor = fsg_model_arcs(fsg, i); itor;
00854          itor = fsg_arciter_next(itor)) {
00855         fsg_link_t *tl = fsg_arciter_get(itor);
00856         fprintf(fp, "%d %d %s %f\n",
00857                 tl->from_state, tl->to_state,
00858                 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
00859                 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
00860     }
00861 }
00862 
00863 void
00864 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
00865 {
00866     int i;
00867 
00868     /* Write transitions from initial state first. */
00869     fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
00870 
00871     /* Other states. */
00872     for (i = 0; i < fsg->n_state; i++) {
00873         if (i == fsg_model_start_state(fsg))
00874             continue;
00875         fsg_model_write_fsm_trans(fsg, i, fp);
00876     }
00877 
00878     /* Final state. */
00879     fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
00880 
00881     fflush(fp);
00882 }
00883 
00884 void
00885 fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
00886 {
00887     FILE *fp;
00888 
00889     assert(fsg);
00890 
00891     E_INFO("Writing FSM file '%s'\n", file);
00892 
00893     if ((fp = fopen(file, "w")) == NULL) {
00894         E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
00895         return;
00896     }
00897 
00898     fsg_model_write_fsm(fsg, fp);
00899 
00900     fclose(fp);
00901 }
00902 
00903 void
00904 fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
00905 {
00906     int i;
00907 
00908     fprintf(file, "<eps> 0\n");
00909     for (i = 0; i < fsg_model_n_word(fsg); ++i) {
00910         fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
00911     }
00912     fflush(file);
00913 }
00914 
00915 void
00916 fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
00917 {
00918     FILE *fp;
00919 
00920     assert(fsg);
00921 
00922     E_INFO("Writing FSM symbol table '%s'\n", file);
00923 
00924     if ((fp = fopen(file, "w")) == NULL) {
00925         E_ERROR("Failed to open symbol table '%s' for writing", file);
00926         return;
00927     }
00928 
00929     fsg_model_write_symtab(fsg, fp);
00930 
00931     fclose(fp);
00932 }