SphinxBase 0.6

include/sphinxbase/ngram_model.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00043 #ifndef __NGRAM_MODEL_H__
00044 #define __NGRAM_MODEL_H__
00045 
00046 #include <stdarg.h>
00047 
00048 /* Win32/WinCE DLL gunk */
00049 #include <sphinxbase/sphinxbase_export.h>
00050 #include <sphinxbase/prim_type.h>
00051 #include <sphinxbase/cmd_ln.h>
00052 #include <sphinxbase/logmath.h>
00053 #include <sphinxbase/mmio.h>
00054 
00055 #ifdef __cplusplus
00056 extern "C" {
00057 #endif
00058 #if 0
00059 /* Fool Emacs. */
00060 }
00061 #endif
00062 
00066 typedef struct ngram_model_s ngram_model_t;
00067 
00071 typedef struct ngram_class_s ngram_class_t;
00072 
00076 typedef enum ngram_file_type_e {
00077     NGRAM_INVALID = -1, 
00078     NGRAM_AUTO,  
00079     NGRAM_ARPA,  
00080     NGRAM_DMP,   
00081     NGRAM_DMP32, 
00082 } ngram_file_type_t;
00083 
00084 #define NGRAM_INVALID_WID -1 
00106 SPHINXBASE_EXPORT
00107 ngram_model_t *ngram_model_read(cmd_ln_t *config,
00108                                 const char *file_name,
00109                                 ngram_file_type_t file_type,
00110                                 logmath_t *lmath);
00111 
00117 SPHINXBASE_EXPORT
00118 int ngram_model_write(ngram_model_t *model, const char *file_name,
00119                       ngram_file_type_t format);
00120 
00126 SPHINXBASE_EXPORT
00127 ngram_file_type_t ngram_file_name_to_type(const char *file_name);
00128 
00134 SPHINXBASE_EXPORT
00135 ngram_file_type_t ngram_str_to_type(const char *str_name);
00136 
00143 SPHINXBASE_EXPORT
00144 char const *ngram_type_to_str(int type);
00145 
00151 SPHINXBASE_EXPORT
00152 ngram_model_t *ngram_model_retain(ngram_model_t *model);
00153 
00159 SPHINXBASE_EXPORT
00160 int ngram_model_free(ngram_model_t *model);
00161 
00178 SPHINXBASE_EXPORT
00179 int ngram_model_recode(ngram_model_t *model, const char *from, const char *to);
00180 
00184 typedef enum ngram_case_e {
00185     NGRAM_UPPER,
00186     NGRAM_LOWER
00187 } ngram_case_t;
00188 
00195 SPHINXBASE_EXPORT
00196 int ngram_model_casefold(ngram_model_t *model, int kase);
00197 
00209 SPHINXBASE_EXPORT
00210 int ngram_model_apply_weights(ngram_model_t *model,
00211                               float32 lw, float32 wip, float32 uw);
00212 
00221 SPHINXBASE_EXPORT
00222 float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip,
00223                                 int32 *out_log_uw);
00224 
00257 SPHINXBASE_EXPORT
00258 int32 ngram_score(ngram_model_t *model, const char *word, ...);
00259 
00263 SPHINXBASE_EXPORT
00264 int32 ngram_tg_score(ngram_model_t *model,
00265                      int32 w3, int32 w2, int32 w1,
00266                      int32 *n_used);
00267 
00271 SPHINXBASE_EXPORT
00272 int32 ngram_bg_score(ngram_model_t *model,
00273                      int32 w2, int32 w1,
00274                      int32 *n_used);
00275 
00279 SPHINXBASE_EXPORT
00280 int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history,
00281                      int32 n_hist, int32 *n_used);
00282 
00293 SPHINXBASE_EXPORT
00294 int32 ngram_prob(ngram_model_t *model, const char *word, ...);
00295 
00302 SPHINXBASE_EXPORT
00303 int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history,
00304                     int32 n_hist, int32 *n_used);
00305 
00317 SPHINXBASE_EXPORT
00318 int32 ngram_score_to_prob(ngram_model_t *model, int32 score);
00319 
00323 SPHINXBASE_EXPORT
00324 int32 ngram_wid(ngram_model_t *model, const char *word);
00325 
00329 SPHINXBASE_EXPORT
00330 const char *ngram_word(ngram_model_t *model, int32 wid);
00331 
00345 SPHINXBASE_EXPORT
00346 int32 ngram_unknown_wid(ngram_model_t *model);
00347 
00351 SPHINXBASE_EXPORT
00352 int32 ngram_zero(ngram_model_t *model);
00353 
00357 SPHINXBASE_EXPORT
00358 int32 ngram_model_get_size(ngram_model_t *model);
00359 
00363 SPHINXBASE_EXPORT
00364 int32 const *ngram_model_get_counts(ngram_model_t *model);
00365 
00369 typedef struct ngram_iter_s ngram_iter_t;
00370 
00379 SPHINXBASE_EXPORT
00380 ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m);
00381 
00385 SPHINXBASE_EXPORT
00386 ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...);
00387 
00391 SPHINXBASE_EXPORT
00392 ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
00393 
00402 SPHINXBASE_EXPORT
00403 int32 const *ngram_iter_get(ngram_iter_t *itor,
00404                             int32 *out_score,
00405                             int32 *out_bowt);
00406 
00412 SPHINXBASE_EXPORT
00413 ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor);
00414 
00418 SPHINXBASE_EXPORT
00419 ngram_iter_t *ngram_iter_next(ngram_iter_t *itor);
00420 
00424 SPHINXBASE_EXPORT
00425 void ngram_iter_free(ngram_iter_t *itor);
00426 
00439 SPHINXBASE_EXPORT
00440 int32 ngram_model_add_word(ngram_model_t *model,
00441                            const char *word, float32 weight);
00442 
00456 SPHINXBASE_EXPORT
00457 int32 ngram_model_read_classdef(ngram_model_t *model,
00458                                 const char *file_name);
00459 
00468 SPHINXBASE_EXPORT
00469 int32 ngram_model_add_class(ngram_model_t *model,
00470                             const char *classname,
00471                             float32 classweight,
00472                             char **words,
00473                             const float32 *weights,
00474                             int32 n_words);
00475 
00485 SPHINXBASE_EXPORT
00486 int32 ngram_model_add_class_word(ngram_model_t *model,
00487                                  const char *classname,
00488                                  const char *word,
00489                                  float32 weight);
00490 
00515 SPHINXBASE_EXPORT
00516 ngram_model_t *ngram_model_set_init(cmd_ln_t *config,
00517                                     ngram_model_t **models,
00518                                     char **names,
00519                                     const float32 *weights,
00520                                     int32 n_models);
00521 
00552 SPHINXBASE_EXPORT
00553 ngram_model_t *ngram_model_set_read(cmd_ln_t *config,
00554                                     const char *lmctlfile,
00555                                     logmath_t *lmath);
00556 
00560 SPHINXBASE_EXPORT
00561 int32 ngram_model_set_count(ngram_model_t *set);
00562 
00566 typedef struct ngram_model_set_iter_s ngram_model_set_iter_t;
00567 
00573 SPHINXBASE_EXPORT
00574 ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set);
00575 
00581 SPHINXBASE_EXPORT
00582 ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor);
00583 
00587 SPHINXBASE_EXPORT
00588 void ngram_model_set_iter_free(ngram_model_set_iter_t *itor);
00589 
00597 SPHINXBASE_EXPORT
00598 ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00599                                           char const **lmname);
00600 
00607 SPHINXBASE_EXPORT
00608 ngram_model_t *ngram_model_set_select(ngram_model_t *set,
00609                                       const char *name);
00610 
00617 SPHINXBASE_EXPORT
00618 ngram_model_t *ngram_model_set_lookup(ngram_model_t *set,
00619                                       const char *name);
00620 
00624 SPHINXBASE_EXPORT
00625 const char *ngram_model_set_current(ngram_model_t *set);
00626 
00634 SPHINXBASE_EXPORT
00635 ngram_model_t *ngram_model_set_interp(ngram_model_t *set,
00636                                       const char **names,
00637                                       const float32 *weights);
00638 
00651 SPHINXBASE_EXPORT
00652 ngram_model_t *ngram_model_set_add(ngram_model_t *set,
00653                                    ngram_model_t *model,
00654                                    const char *name,
00655                                    float32 weight,
00656                                    int reuse_widmap);
00657 
00666 SPHINXBASE_EXPORT
00667 ngram_model_t *ngram_model_set_remove(ngram_model_t *set,
00668                                       const char *name,
00669                                       int reuse_widmap);
00670 
00674 SPHINXBASE_EXPORT
00675 void ngram_model_set_map_words(ngram_model_t *set,
00676                                const char **words,
00677                                int32 n_words);
00678 
00686 SPHINXBASE_EXPORT
00687 int32 ngram_model_set_current_wid(ngram_model_t *set,
00688                                   int32 set_wid);
00689 
00699 SPHINXBASE_EXPORT
00700 int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid);
00701 
00709 SPHINXBASE_EXPORT
00710 void ngram_model_flush(ngram_model_t *lm);
00711 
00712 #ifdef __cplusplus
00713 }
00714 #endif
00715 
00716 
00717 #endif /* __NGRAM_MODEL_H__ */