SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 00035 /* System headers. */ 00036 #ifdef _WIN32_WCE 00037 /*MC in a debug build it's implicitly included by assert.h 00038 but you need this in a release build */ 00039 #include <windows.h> 00040 #else 00041 #include <time.h> 00042 #endif /* _WIN32_WCE */ 00043 #include <stdio.h> 00044 #include <string.h> 00045 #include <assert.h> 00046 00047 /* SphinxBase headers. */ 00048 #include "sphinxbase/err.h" 00049 #include "sphinxbase/pio.h" 00050 #include "sphinxbase/ckd_alloc.h" 00051 #include "sphinxbase/prim_type.h" 00052 #include "sphinxbase/strfuncs.h" 00053 #include "sphinxbase/hash_table.h" 00054 #include "sphinxbase/fsg_model.h" 00055 00063 struct trans_list_s { 00064 hash_table_t *null_trans; /* Null transitions keyed by state. */ 00065 hash_table_t *trans; /* Lists of non-null transitions keyed by state. */ 00066 }; 00067 00071 struct fsg_arciter_s { 00072 hash_iter_t *itor, *null_itor; 00073 gnode_t *gn; 00074 }; 00075 00076 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" 00077 #define FSG_MODEL_END_DECL "FSG_END" 00078 #define FSG_MODEL_N_DECL "N" 00079 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" 00080 #define FSG_MODEL_S_DECL "S" 00081 #define FSG_MODEL_START_STATE_DECL "START_STATE" 00082 #define FSG_MODEL_F_DECL "F" 00083 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" 00084 #define FSG_MODEL_T_DECL "T" 00085 #define FSG_MODEL_TRANSITION_DECL "TRANSITION" 00086 #define FSG_MODEL_COMMENT_CHAR '#' 00087 00088 00089 static int32 00090 nextline_str2words(FILE * fp, int32 * lineno, 00091 char **lineptr, char ***wordptr) 00092 { 00093 for (;;) { 00094 size_t len; 00095 int32 n; 00096 00097 ckd_free(*lineptr); 00098 if ((*lineptr = fread_line(fp, &len)) == NULL) 00099 return -1; 00100 00101 (*lineno)++; 00102 00103 if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) 00104 continue; /* Skip comment lines */ 00105 00106 n = str2words(*lineptr, NULL, 0); 00107 if (n == 0) 00108 continue; /* Skip blank lines */ 00109 00110 /* Abuse of realloc(), but this doesn't have to be fast. */ 00111 if (*wordptr == NULL) 00112 *wordptr = ckd_calloc(n, sizeof(**wordptr)); 00113 else 00114 *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); 00115 return str2words(*lineptr, *wordptr, n); 00116 } 00117 } 00118 00119 void 00120 fsg_model_trans_add(fsg_model_t * fsg, 00121 int32 from, int32 to, int32 logp, int32 wid) 00122 { 00123 fsg_link_t *link; 00124 glist_t gl; 00125 gnode_t *gn; 00126 00127 if (fsg->trans[from].trans == NULL) 00128 fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); 00129 00130 /* Check for duplicate link (i.e., link already exists with label=wid) */ 00131 for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { 00132 link = (fsg_link_t *) gnode_ptr(gn); 00133 if (link->wid == wid) { 00134 if (link->logs2prob < logp) 00135 link->logs2prob = logp; 00136 return; 00137 } 00138 } 00139 00140 /* Create transition object */ 00141 link = listelem_malloc(fsg->link_alloc); 00142 link->from_state = from; 00143 link->to_state = to; 00144 link->logs2prob = logp; 00145 link->wid = wid; 00146 00147 /* Add it to the list of transitions and update the hash table */ 00148 gl = glist_add_ptr(gl, (void *) link); 00149 hash_table_replace_bkey(fsg->trans[from].trans, 00150 (char const *) &link->to_state, 00151 sizeof(link->to_state), gl); 00152 } 00153 00154 int32 00155 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, 00156 int32 logp, int32 wid) 00157 { 00158 fsg_link_t *link, *link2; 00159 00160 /* Check for transition probability */ 00161 if (logp > 0) { 00162 E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", 00163 from, to); 00164 } 00165 00166 /* Self-loop null transitions (with prob <= 1.0) are redundant */ 00167 if (from == to) 00168 return -1; 00169 00170 if (fsg->trans[from].null_trans == NULL) 00171 fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); 00172 00173 /* Check for a duplicate link; if found, keep the higher prob */ 00174 link = fsg_model_null_trans(fsg, from, to); 00175 if (link) { 00176 if (link->logs2prob < logp) { 00177 link->logs2prob = logp; 00178 return 0; 00179 } 00180 else 00181 return -1; 00182 } 00183 00184 /* Create null transition object */ 00185 link = listelem_malloc(fsg->link_alloc); 00186 link->from_state = from; 00187 link->to_state = to; 00188 link->logs2prob = logp; 00189 link->wid = -1; 00190 00191 link2 = (fsg_link_t *) 00192 hash_table_enter_bkey(fsg->trans[from].null_trans, 00193 (char const *) &link->to_state, 00194 sizeof(link->to_state), link); 00195 assert(link == link2); 00196 00197 return 1; 00198 } 00199 00200 int32 00201 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, 00202 int32 logp) 00203 { 00204 return fsg_model_tag_trans_add(fsg, from, to, logp, -1); 00205 } 00206 00207 glist_t 00208 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) 00209 { 00210 gnode_t *gn1; 00211 int updated; 00212 fsg_link_t *tl1, *tl2; 00213 int32 k, n; 00214 00215 E_INFO("Computing transitive closure for null transitions\n"); 00216 00217 if (nulls == NULL) { 00218 fsg_link_t *null; 00219 int i, j; 00220 00221 for (i = 0; i < fsg->n_state; ++i) { 00222 for (j = 0; j < fsg->n_state; ++j) { 00223 if ((null = fsg_model_null_trans(fsg, i, j))) 00224 nulls = glist_add_ptr(nulls, null); 00225 } 00226 } 00227 } 00228 00229 /* 00230 * Probably not the most efficient closure implementation, in general, but 00231 * probably reasonably efficient for a sparse null transition matrix. 00232 */ 00233 n = 0; 00234 do { 00235 updated = FALSE; 00236 00237 for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { 00238 hash_iter_t *itor; 00239 00240 tl1 = (fsg_link_t *) gnode_ptr(gn1); 00241 assert(tl1->wid < 0); 00242 00243 if (fsg->trans[tl1->to_state].null_trans == NULL) 00244 continue; 00245 00246 for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans); 00247 itor; itor = hash_table_iter_next(itor)) { 00248 00249 tl2 = (fsg_link_t *) hash_entry_val(itor->ent); 00250 00251 k = fsg_model_null_trans_add(fsg, 00252 tl1->from_state, 00253 tl2->to_state, 00254 tl1->logs2prob + 00255 tl2->logs2prob); 00256 if (k >= 0) { 00257 updated = TRUE; 00258 if (k > 0) { 00259 nulls = glist_add_ptr(nulls, (void *) 00260 fsg_model_null_trans 00261 (fsg, tl1->from_state, 00262 tl2->to_state)); 00263 n++; 00264 } 00265 } 00266 } 00267 } 00268 } while (updated); 00269 00270 E_INFO("%d null transitions added\n", n); 00271 00272 return nulls; 00273 } 00274 00275 glist_t 00276 fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j) 00277 { 00278 void *val; 00279 00280 if (fsg->trans[i].trans == NULL) 00281 return NULL; 00282 if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j, 00283 sizeof(j), &val) < 0) 00284 return NULL; 00285 return (glist_t) val; 00286 } 00287 00288 fsg_link_t * 00289 fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j) 00290 { 00291 void *val; 00292 00293 if (fsg->trans[i].null_trans == NULL) 00294 return NULL; 00295 if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j, 00296 sizeof(j), &val) < 0) 00297 return NULL; 00298 return (fsg_link_t *) val; 00299 } 00300 00301 fsg_arciter_t * 00302 fsg_model_arcs(fsg_model_t * fsg, int32 i) 00303 { 00304 fsg_arciter_t *itor; 00305 00306 if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) 00307 return NULL; 00308 itor = ckd_calloc(1, sizeof(*itor)); 00309 if (fsg->trans[i].null_trans) 00310 itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); 00311 if (fsg->trans[i].trans) 00312 itor->itor = hash_table_iter(fsg->trans[i].trans); 00313 if (itor->itor != NULL) 00314 itor->gn = hash_entry_val(itor->itor->ent); 00315 return itor; 00316 } 00317 00318 fsg_link_t * 00319 fsg_arciter_get(fsg_arciter_t * itor) 00320 { 00321 /* Iterate over non-null arcs first. */ 00322 if (itor->gn) 00323 return (fsg_link_t *) gnode_ptr(itor->gn); 00324 else if (itor->null_itor) 00325 return (fsg_link_t *) hash_entry_val(itor->null_itor->ent); 00326 else 00327 return NULL; 00328 } 00329 00330 fsg_arciter_t * 00331 fsg_arciter_next(fsg_arciter_t * itor) 00332 { 00333 /* Iterate over non-null arcs first. */ 00334 if (itor->gn) { 00335 itor->gn = gnode_next(itor->gn); 00336 /* Move to the next destination arc. */ 00337 if (itor->gn == NULL) { 00338 itor->itor = hash_table_iter_next(itor->itor); 00339 if (itor->itor != NULL) 00340 itor->gn = hash_entry_val(itor->itor->ent); 00341 else if (itor->null_itor == NULL) 00342 goto stop_iteration; 00343 } 00344 } 00345 else { 00346 if (itor->null_itor == NULL) 00347 goto stop_iteration; 00348 itor->null_itor = hash_table_iter_next(itor->null_itor); 00349 if (itor->null_itor == NULL) 00350 goto stop_iteration; 00351 } 00352 return itor; 00353 stop_iteration: 00354 fsg_arciter_free(itor); 00355 return NULL; 00356 00357 } 00358 00359 void 00360 fsg_arciter_free(fsg_arciter_t * itor) 00361 { 00362 if (itor == NULL) 00363 return; 00364 hash_table_iter_free(itor->null_itor); 00365 hash_table_iter_free(itor->itor); 00366 ckd_free(itor); 00367 } 00368 00369 int 00370 fsg_model_word_id(fsg_model_t * fsg, char const *word) 00371 { 00372 int wid; 00373 00374 /* Search for an existing word matching this. */ 00375 for (wid = 0; wid < fsg->n_word; ++wid) { 00376 if (0 == strcmp(fsg->vocab[wid], word)) 00377 break; 00378 } 00379 /* If not found, add this to the vocab. */ 00380 if (wid == fsg->n_word) 00381 return -1; 00382 return wid; 00383 } 00384 00385 int 00386 fsg_model_word_add(fsg_model_t * fsg, char const *word) 00387 { 00388 int wid, old_size; 00389 00390 /* Search for an existing word matching this. */ 00391 wid = fsg_model_word_id(fsg, word); 00392 /* If not found, add this to the vocab. */ 00393 if (wid == -1) { 00394 wid = fsg->n_word; 00395 if (fsg->n_word == fsg->n_word_alloc) { 00396 old_size = fsg->n_word_alloc; 00397 fsg->n_word_alloc += 10; 00398 fsg->vocab = ckd_realloc(fsg->vocab, 00399 fsg->n_word_alloc * 00400 sizeof(*fsg->vocab)); 00401 if (fsg->silwords) 00402 fsg->silwords = 00403 bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc); 00404 if (fsg->altwords) 00405 fsg->altwords = 00406 bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc); 00407 } 00408 ++fsg->n_word; 00409 fsg->vocab[wid] = ckd_salloc(word); 00410 } 00411 return wid; 00412 } 00413 00414 int 00415 fsg_model_add_silence(fsg_model_t * fsg, char const *silword, 00416 int state, float32 silprob) 00417 { 00418 int32 logsilp; 00419 int n_trans, silwid, src; 00420 00421 E_INFO("Adding silence transitions for %s to FSG\n", silword); 00422 00423 silwid = fsg_model_word_add(fsg, silword); 00424 logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); 00425 if (fsg->silwords == NULL) 00426 fsg->silwords = bitvec_alloc(fsg->n_word_alloc); 00427 bitvec_set(fsg->silwords, silwid); 00428 00429 n_trans = 0; 00430 if (state == -1) { 00431 for (src = 0; src < fsg->n_state; src++) { 00432 fsg_model_trans_add(fsg, src, src, logsilp, silwid); 00433 ++n_trans; 00434 } 00435 } 00436 else { 00437 fsg_model_trans_add(fsg, state, state, logsilp, silwid); 00438 ++n_trans; 00439 } 00440 00441 E_INFO("Added %d silence word transitions\n", n_trans); 00442 return n_trans; 00443 } 00444 00445 int 00446 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, 00447 char const *altword) 00448 { 00449 int i, basewid, altwid; 00450 int ntrans; 00451 00452 /* FIXME: This will get slow, eventually... */ 00453 for (basewid = 0; basewid < fsg->n_word; ++basewid) 00454 if (0 == strcmp(fsg->vocab[basewid], baseword)) 00455 break; 00456 if (basewid == fsg->n_word) { 00457 E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); 00458 return -1; 00459 } 00460 altwid = fsg_model_word_add(fsg, altword); 00461 if (fsg->altwords == NULL) 00462 fsg->altwords = bitvec_alloc(fsg->n_word_alloc); 00463 bitvec_set(fsg->altwords, altwid); 00464 00465 E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n", 00466 baseword, altword)); 00467 00468 /* Look for all transitions involving baseword and duplicate them. */ 00469 /* FIXME: This will also get slow, eventually... */ 00470 ntrans = 0; 00471 for (i = 0; i < fsg->n_state; ++i) { 00472 hash_iter_t *itor; 00473 if (fsg->trans[i].trans == NULL) 00474 continue; 00475 for (itor = hash_table_iter(fsg->trans[i].trans); itor; 00476 itor = hash_table_iter_next(itor)) { 00477 glist_t trans; 00478 gnode_t *gn; 00479 00480 trans = hash_entry_val(itor->ent); 00481 for (gn = trans; gn; gn = gnode_next(gn)) { 00482 fsg_link_t *fl = gnode_ptr(gn); 00483 if (fl->wid == basewid) { 00484 fsg_link_t *link; 00485 00486 /* Create transition object */ 00487 link = listelem_malloc(fsg->link_alloc); 00488 link->from_state = fl->from_state; 00489 link->to_state = fl->to_state; 00490 link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ 00491 link->wid = altwid; 00492 00493 trans = glist_add_ptr(trans, (void *) link); 00494 ++ntrans; 00495 } 00496 } 00497 hash_entry_val(itor->ent) = trans; 00498 } 00499 } 00500 00501 E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans)); 00502 return ntrans; 00503 } 00504 00505 00506 fsg_model_t * 00507 fsg_model_init(char const *name, logmath_t * lmath, float32 lw, 00508 int32 n_state) 00509 { 00510 fsg_model_t *fsg; 00511 00512 /* Allocate basic stuff. */ 00513 fsg = ckd_calloc(1, sizeof(*fsg)); 00514 fsg->refcount = 1; 00515 fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); 00516 fsg->lmath = lmath; 00517 fsg->name = name ? ckd_salloc(name) : NULL; 00518 fsg->n_state = n_state; 00519 fsg->lw = lw; 00520 00521 fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); 00522 00523 return fsg; 00524 } 00525 00526 fsg_model_t * 00527 fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) 00528 { 00529 fsg_model_t *fsg; 00530 hash_table_t *vocab; 00531 hash_iter_t *itor; 00532 int32 lastwid; 00533 char **wordptr; 00534 char *lineptr; 00535 char *fsgname; 00536 int32 lineno; 00537 int32 n, i, j; 00538 int n_state, n_trans, n_null_trans; 00539 glist_t nulls; 00540 float32 p; 00541 00542 lineno = 0; 00543 vocab = hash_table_new(32, FALSE); 00544 wordptr = NULL; 00545 lineptr = NULL; 00546 nulls = NULL; 00547 fsgname = NULL; 00548 fsg = NULL; 00549 00550 /* Scan upto FSG_BEGIN header */ 00551 for (;;) { 00552 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00553 if (n < 0) { 00554 E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); 00555 goto parse_error; 00556 } 00557 00558 if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { 00559 if (n > 2) { 00560 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", 00561 lineno); 00562 goto parse_error; 00563 } 00564 break; 00565 } 00566 } 00567 /* Save FSG name, or it will get clobbered below :(. 00568 * If name is missing, try the default. 00569 */ 00570 if (n == 2) { 00571 fsgname = ckd_salloc(wordptr[1]); 00572 } 00573 else { 00574 E_WARN("FSG name is missing\n"); 00575 fsgname = ckd_salloc("unknown"); 00576 } 00577 00578 /* Read #states */ 00579 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00580 if ((n != 2) 00581 || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) 00582 && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) 00583 || (sscanf(wordptr[1], "%d", &n_state) != 1) 00584 || (n_state <= 0)) { 00585 E_ERROR 00586 ("Line[%d]: #states declaration line missing or malformed\n", 00587 lineno); 00588 goto parse_error; 00589 } 00590 00591 /* Now create the FSG. */ 00592 fsg = fsg_model_init(fsgname, lmath, lw, n_state); 00593 ckd_free(fsgname); 00594 fsgname = NULL; 00595 00596 /* Read start state */ 00597 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00598 if ((n != 2) 00599 || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) 00600 && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) 00601 || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) 00602 || (fsg->start_state < 0) 00603 || (fsg->start_state >= fsg->n_state)) { 00604 E_ERROR 00605 ("Line[%d]: start state declaration line missing or malformed\n", 00606 lineno); 00607 goto parse_error; 00608 } 00609 00610 /* Read final state */ 00611 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00612 if ((n != 2) 00613 || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) 00614 && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) 00615 || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) 00616 || (fsg->final_state < 0) 00617 || (fsg->final_state >= fsg->n_state)) { 00618 E_ERROR 00619 ("Line[%d]: final state declaration line missing or malformed\n", 00620 lineno); 00621 goto parse_error; 00622 } 00623 00624 /* Read transitions */ 00625 lastwid = 0; 00626 n_trans = n_null_trans = 0; 00627 for (;;) { 00628 int32 wid, tprob; 00629 00630 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); 00631 if (n <= 0) { 00632 E_ERROR("Line[%d]: transition or FSG_END statement expected\n", 00633 lineno); 00634 goto parse_error; 00635 } 00636 00637 if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { 00638 break; 00639 } 00640 00641 if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) 00642 || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { 00643 00644 00645 if (((n != 4) && (n != 5)) 00646 || (sscanf(wordptr[1], "%d", &i) != 1) 00647 || (sscanf(wordptr[2], "%d", &j) != 1) 00648 || (i < 0) || (i >= fsg->n_state) 00649 || (j < 0) || (j >= fsg->n_state)) { 00650 E_ERROR 00651 ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", 00652 lineno); 00653 goto parse_error; 00654 } 00655 00656 p = atof_c(wordptr[3]); 00657 if ((p <= 0.0) || (p > 1.0)) { 00658 E_ERROR 00659 ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", 00660 lineno); 00661 goto parse_error; 00662 } 00663 } 00664 else { 00665 E_ERROR("Line[%d]: transition or FSG_END statement expected\n", 00666 lineno); 00667 goto parse_error; 00668 } 00669 00670 tprob = (int32) (logmath_log(lmath, p) * fsg->lw); 00671 /* Add word to "dictionary". */ 00672 if (n > 4) { 00673 if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { 00674 (void) hash_table_enter_int32(vocab, 00675 ckd_salloc(wordptr[4]), 00676 lastwid); 00677 wid = lastwid; 00678 ++lastwid; 00679 } 00680 fsg_model_trans_add(fsg, i, j, tprob, wid); 00681 ++n_trans; 00682 } 00683 else { 00684 if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { 00685 ++n_null_trans; 00686 nulls = 00687 glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); 00688 } 00689 } 00690 } 00691 00692 E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", 00693 fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); 00694 00695 00696 /* Now create a string table from the "dictionary" */ 00697 fsg->n_word = hash_table_inuse(vocab); 00698 fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ 00699 fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); 00700 for (itor = hash_table_iter(vocab); itor; 00701 itor = hash_table_iter_next(itor)) { 00702 char const *word = hash_entry_key(itor->ent); 00703 int32 wid = (int32) (long) hash_entry_val(itor->ent); 00704 fsg->vocab[wid] = (char *) word; 00705 } 00706 hash_table_free(vocab); 00707 00708 /* Do transitive closure on null transitions */ 00709 nulls = fsg_model_null_trans_closure(fsg, nulls); 00710 glist_free(nulls); 00711 00712 ckd_free(lineptr); 00713 ckd_free(wordptr); 00714 00715 return fsg; 00716 00717 parse_error: 00718 for (itor = hash_table_iter(vocab); itor; 00719 itor = hash_table_iter_next(itor)) 00720 ckd_free((char *) hash_entry_key(itor->ent)); 00721 glist_free(nulls); 00722 hash_table_free(vocab); 00723 ckd_free(fsgname); 00724 ckd_free(lineptr); 00725 ckd_free(wordptr); 00726 fsg_model_free(fsg); 00727 return NULL; 00728 } 00729 00730 00731 fsg_model_t * 00732 fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw) 00733 { 00734 FILE *fp; 00735 fsg_model_t *fsg; 00736 00737 if ((fp = fopen(file, "r")) == NULL) { 00738 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); 00739 return NULL; 00740 } 00741 fsg = fsg_model_read(fp, lmath, lw); 00742 fclose(fp); 00743 return fsg; 00744 } 00745 00746 fsg_model_t * 00747 fsg_model_retain(fsg_model_t * fsg) 00748 { 00749 ++fsg->refcount; 00750 return fsg; 00751 } 00752 00753 static void 00754 trans_list_free(fsg_model_t * fsg, int32 i) 00755 { 00756 hash_iter_t *itor; 00757 00758 /* FIXME (maybe): FSG links will all get freed when we call 00759 * listelem_alloc_free() so don't bother freeing them explicitly 00760 * here. */ 00761 if (fsg->trans[i].trans) { 00762 for (itor = hash_table_iter(fsg->trans[i].trans); 00763 itor; itor = hash_table_iter_next(itor)) { 00764 glist_t gl = (glist_t) hash_entry_val(itor->ent); 00765 glist_free(gl); 00766 } 00767 } 00768 hash_table_free(fsg->trans[i].trans); 00769 hash_table_free(fsg->trans[i].null_trans); 00770 } 00771 00772 int 00773 fsg_model_free(fsg_model_t * fsg) 00774 { 00775 int i; 00776 00777 if (fsg == NULL) 00778 return 0; 00779 00780 if (--fsg->refcount > 0) 00781 return fsg->refcount; 00782 00783 for (i = 0; i < fsg->n_word; ++i) 00784 ckd_free(fsg->vocab[i]); 00785 for (i = 0; i < fsg->n_state; ++i) 00786 trans_list_free(fsg, i); 00787 ckd_free(fsg->trans); 00788 ckd_free(fsg->vocab); 00789 listelem_alloc_free(fsg->link_alloc); 00790 bitvec_free(fsg->silwords); 00791 bitvec_free(fsg->altwords); 00792 ckd_free(fsg->name); 00793 ckd_free(fsg); 00794 return 0; 00795 } 00796 00797 00798 void 00799 fsg_model_write(fsg_model_t * fsg, FILE * fp) 00800 { 00801 int32 i; 00802 00803 fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, 00804 fsg->name ? fsg->name : ""); 00805 fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); 00806 fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); 00807 fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); 00808 00809 for (i = 0; i < fsg->n_state; i++) { 00810 fsg_arciter_t *itor; 00811 00812 for (itor = fsg_model_arcs(fsg, i); itor; 00813 itor = fsg_arciter_next(itor)) { 00814 fsg_link_t *tl = fsg_arciter_get(itor); 00815 00816 fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, 00817 tl->from_state, tl->to_state, 00818 logmath_exp(fsg->lmath, 00819 (int32) (tl->logs2prob / fsg->lw)), 00820 (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); 00821 } 00822 } 00823 00824 fprintf(fp, "%s\n", FSG_MODEL_END_DECL); 00825 00826 fflush(fp); 00827 } 00828 00829 void 00830 fsg_model_writefile(fsg_model_t * fsg, char const *file) 00831 { 00832 FILE *fp; 00833 00834 assert(fsg); 00835 00836 E_INFO("Writing FSG file '%s'\n", file); 00837 00838 if ((fp = fopen(file, "w")) == NULL) { 00839 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); 00840 return; 00841 } 00842 00843 fsg_model_write(fsg, fp); 00844 00845 fclose(fp); 00846 } 00847 00848 static void 00849 fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp) 00850 { 00851 fsg_arciter_t *itor; 00852 00853 for (itor = fsg_model_arcs(fsg, i); itor; 00854 itor = fsg_arciter_next(itor)) { 00855 fsg_link_t *tl = fsg_arciter_get(itor); 00856 fprintf(fp, "%d %d %s %f\n", 00857 tl->from_state, tl->to_state, 00858 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid), 00859 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); 00860 } 00861 } 00862 00863 void 00864 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) 00865 { 00866 int i; 00867 00868 /* Write transitions from initial state first. */ 00869 fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); 00870 00871 /* Other states. */ 00872 for (i = 0; i < fsg->n_state; i++) { 00873 if (i == fsg_model_start_state(fsg)) 00874 continue; 00875 fsg_model_write_fsm_trans(fsg, i, fp); 00876 } 00877 00878 /* Final state. */ 00879 fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); 00880 00881 fflush(fp); 00882 } 00883 00884 void 00885 fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file) 00886 { 00887 FILE *fp; 00888 00889 assert(fsg); 00890 00891 E_INFO("Writing FSM file '%s'\n", file); 00892 00893 if ((fp = fopen(file, "w")) == NULL) { 00894 E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file); 00895 return; 00896 } 00897 00898 fsg_model_write_fsm(fsg, fp); 00899 00900 fclose(fp); 00901 } 00902 00903 void 00904 fsg_model_write_symtab(fsg_model_t * fsg, FILE * file) 00905 { 00906 int i; 00907 00908 fprintf(file, "<eps> 0\n"); 00909 for (i = 0; i < fsg_model_n_word(fsg); ++i) { 00910 fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); 00911 } 00912 fflush(file); 00913 } 00914 00915 void 00916 fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file) 00917 { 00918 FILE *fp; 00919 00920 assert(fsg); 00921 00922 E_INFO("Writing FSM symbol table '%s'\n", file); 00923 00924 if ((fp = fopen(file, "w")) == NULL) { 00925 E_ERROR("Failed to open symbol table '%s' for writing", file); 00926 return; 00927 } 00928 00929 fsg_model_write_symtab(fsg, fp); 00930 00931 fclose(fp); 00932 }