SphinxBase 0.6

include/sphinxbase/ad.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * ad.h -- generic live audio interface for recording and playback
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * 
00049  * $Log: ad.h,v $
00050  * Revision 1.8  2005/06/22 08:00:06  arthchan2003
00051  * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs.
00052  *
00053  * Revision 1.7  2004/12/14 00:39:49  arthchan2003
00054  * add <s3types.h> to the code, change some comments to doxygen style
00055  *
00056  * Revision 1.6  2004/12/06 11:17:55  arthchan2003
00057  * Update the copyright information of ad.h, *sigh* start to feel tired of updating documentation system.  Anyone who has time, please take up libs3audio. That is the last place which is undocumented
00058  *
00059  * Revision 1.5  2004/07/23 23:44:46  egouvea
00060  * Changed the cygwin code to use the same audio files as the MS Visual code, removed unused variables from fe_interface.c
00061  *
00062  * Revision 1.4  2004/02/29 23:48:31  egouvea
00063  * Updated configure.in to the recent automake/autoconf, fixed win32
00064  * references in audio files.
00065  *
00066  * Revision 1.3  2002/11/10 19:27:38  egouvea
00067  * Fixed references to sun's implementation of audio interface,
00068  * referring to the correct .h file, and replacing sun4 with sunos.
00069  *
00070  * Revision 1.2  2001/12/11 04:40:55  lenzo
00071  * License cleanup.
00072  *
00073  * Revision 1.1.1.1  2001/12/03 16:01:45  egouvea
00074  * Initial import of sphinx3
00075  *
00076  * Revision 1.1.1.1  2001/01/17 05:17:14  ricky
00077  * Initial Import of the s3.3 decoder, has working decodeaudiofile, s3.3_live
00078  *
00079  * 
00080  * 19-Jan-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00081  *              Added AD_ return codes.  Added ad_open_sps_bufsize(), and
00082  *              ad_rec_t.n_buf.
00083  * 
00084  * 17-Apr-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00085  *              Added ad_open_play_sps().
00086  * 
00087  * 07-Mar-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00088  *              Added ad_open_sps().
00089  * 
00090  * 10-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00091  *              Added ad_wbuf_t, ad_rec_t, and ad_play_t types, and augmented all
00092  *              recording functions with ad_rec_t, and playback functions with
00093  *              ad_play_t.
00094  * 
00095  * 06-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00096  *              Created.
00097  */
00098 
00103 #ifndef _AD_H_
00104 #define _AD_H_
00105 
00106 #include <sphinx_config.h>
00107 
00108 #if defined (__CYGWIN__)
00109 #include <w32api/windows.h>
00110 #include <w32api/mmsystem.h>
00111 #elif (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
00112 #include <windows.h>
00113 #include <mmsystem.h>
00114 #elif defined(AD_BACKEND_JACK)
00115 #include <jack/jack.h>
00116 #include <jack/ringbuffer.h>
00117 #ifdef HAVE_SAMPLERATE_H
00118 #include <samplerate.h>
00119 #endif
00120 #elif defined(AD_BACKEND_PULSEAUDIO)
00121 #include <pulse/pulseaudio.h>
00122 #include <pulse/simple.h>
00123 #elif defined(AD_BACKEND_ALSA)
00124 #include <alsa/asoundlib.h>
00125 #endif
00126 
00127 /* Win32/WinCE DLL gunk */
00128 #include <sphinxbase/sphinxbase_export.h>
00129 
00130 #include <sphinxbase/prim_type.h>
00131 
00132 #ifdef __cplusplus
00133 extern "C" {
00134 #endif
00135 #if 0
00136 /* Fool Emacs. */
00137 }
00138 #endif
00139 
00140 #define AD_SAMPLE_SIZE          (sizeof(int16))
00141 #define DEFAULT_SAMPLES_PER_SEC 16000
00142 
00143 /* Return codes */
00144 #define AD_OK           0
00145 #define AD_EOF          -1
00146 #define AD_ERR_GEN      -1
00147 #define AD_ERR_NOT_OPEN -2
00148 #define AD_ERR_WAVE     -3
00149 
00150 
00151 #if  (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
00152 typedef struct {
00153     HGLOBAL h_whdr;
00154     LPWAVEHDR p_whdr;
00155     HGLOBAL h_buf;
00156     LPSTR p_buf;
00157 } ad_wbuf_t;
00158 #endif
00159 
00160 
00161 /* ------------ RECORDING -------------- */
00162 
00163 /*
00164  * NOTE: ad_rec_t and ad_play_t are READ-ONLY structures for the user.
00165  */
00166 
00167 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
00168 
00169 #define DEFAULT_DEVICE (char*)DEV_MAPPER
00170 
00174 typedef struct ad_rec_s {
00175     HWAVEIN h_wavein;   /* "HANDLE" to the audio input device */
00176     ad_wbuf_t *wi_buf;  /* Recording buffers provided to system */
00177     int32 n_buf;        /* #Recording buffers provided to system */
00178     int32 opened;       /* Flag; A/D opened for recording */
00179     int32 recording;
00180     int32 curbuf;       /* Current buffer with data for application */
00181     int32 curoff;       /* Start of data for application in curbuf */
00182     int32 curlen;       /* #samples of data from curoff in curbuf */
00183     int32 lastbuf;      /* Last buffer containing data after recording stopped */
00184     int32 sps;          /* Samples/sec */
00185     int32 bps;          /* Bytes/sample */
00186 } ad_rec_t;
00187 
00188 #elif defined(AD_BACKEND_OSS)
00189 
00190 #define DEFAULT_DEVICE "/dev/dsp"
00191 
00195 typedef struct {
00196     int32 dspFD;        /* Audio device descriptor */
00197     int32 recording;
00198     int32 sps;          /* Samples/sec */
00199     int32 bps;          /* Bytes/sample */
00200 } ad_rec_t;
00201 
00202 #elif defined(AD_BACKEND_PULSEAUDIO)
00203 
00204 #define DEFAULT_DEVICE NULL
00205 
00206 typedef struct {
00207     pa_simple* pa;
00208     int32 recording;
00209     int32 sps;
00210     int32 bps;
00211 } ad_rec_t;
00212 
00213 #elif defined(AD_BACKEND_ALSA)
00214 
00215 #define DEFAULT_DEVICE "default"
00216 typedef struct {
00217     snd_pcm_t *dspH;
00218     int32 recording;
00219     int32 sps;
00220     int32 bps;
00221 } ad_rec_t;
00222 
00223 #elif defined(AD_BACKEND_JACK)
00224 
00225 typedef struct {
00226     jack_client_t *client;
00227     jack_port_t *input_port;
00228     jack_port_t *output_port;
00229     jack_ringbuffer_t* rbuffer;
00230     jack_default_audio_sample_t* sample_buffer;    
00231     int32 recording;
00232     int32 sps;
00233     int32 bps;
00234 #ifdef HAVE_SAMPLERATE_H
00235     SRC_STATE *resample_state;
00236     jack_default_audio_sample_t *resample_buffer;
00237 #endif
00238 } ad_rec_t;
00239 
00240 #elif defined(AD_BACKEND_S60)
00241 
00242 typedef struct ad_rec_s {
00243     void* recorder;
00244     int32 recording;
00245     int32 sps;
00246     int32 bps;
00247 } ad_rec_t;
00248 
00249 SPHINXBASE_EXPORT
00250 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
00251 
00252 #else
00253 
00254 #define DEFAULT_DEVICE NULL
00255 typedef struct {
00256     int32 sps;          
00257     int32 bps;          
00258 } ad_rec_t;     
00259 
00260 
00261 #endif
00262 
00263 
00273 SPHINXBASE_EXPORT
00274 ad_rec_t *ad_open_dev (
00275         const char *dev, 
00276         int32 samples_per_sec 
00277         );
00278 
00282 SPHINXBASE_EXPORT
00283 ad_rec_t *ad_open_sps (
00284                        int32 samples_per_sec 
00285                        );
00286 
00287 
00291 SPHINXBASE_EXPORT
00292 ad_rec_t *ad_open ( void );
00293 
00294 
00295 #if defined(WIN32) && !defined(GNUWINCE)
00296 /*
00297  * Like ad_open_sps but specifies buffering required within driver.  This function is
00298  * useful if the default (5000 msec worth) is too small and results in loss of data.
00299  */
00300 SPHINXBASE_EXPORT
00301 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
00302 #endif
00303 
00304 
00305 /* Start audio recording.  Return value: 0 if successful, <0 otherwise */
00306 SPHINXBASE_EXPORT
00307 int32 ad_start_rec (ad_rec_t *);
00308 
00309 
00310 /* Stop audio recording.  Return value: 0 if successful, <0 otherwise */
00311 SPHINXBASE_EXPORT
00312 int32 ad_stop_rec (ad_rec_t *);
00313 
00314 
00315 /* Close the recording device.  Return value: 0 if successful, <0 otherwise */
00316 SPHINXBASE_EXPORT
00317 int32 ad_close (ad_rec_t *);
00318 
00319 
00320 /*
00321  * Read next block of audio samples while recording; read upto max samples into buf.
00322  * Return value: # samples actually read (could be 0 since non-blocking); -1 if not
00323  * recording and no more samples remaining to be read from most recent recording.
00324  */
00325 SPHINXBASE_EXPORT
00326 int32 ad_read (ad_rec_t *, int16 *buf, int32 max);
00327 
00328 
00329 /* ------ PLAYBACK; SIMILAR TO RECORDING ------- */
00330 
00331 #if defined(WIN32) && !defined(GNUWINCE)
00332 
00333 typedef struct {
00334     HWAVEOUT h_waveout; /* "HANDLE" to the audio output device */
00335     ad_wbuf_t *wo_buf;  /* Playback buffers given to the system */
00336     int32 opened;       /* Flag; A/D opened for playback */
00337     int32 playing;
00338     char *busy;         /* flags [N_WO_BUF] indicating whether given to system */
00339     int32 nxtbuf;       /* Next buffer [0..N_WO_BUF-1] to be used for playback data */
00340     int32 sps;          /* Samples/sec */
00341     int32 bps;          /* Bytes/sample */
00342 } ad_play_t;
00343 
00344 #else
00345 
00346 typedef struct {
00347     int32 sps;          /* Samples/sec */
00348     int32 bps;          /* Bytes/sample */
00349 } ad_play_t;    /* Dummy definition for systems without A/D stuff */
00350 
00351 #endif
00352 
00353 
00354 SPHINXBASE_EXPORT
00355 ad_play_t *ad_open_play_sps (int32 samples_per_sec);
00356 
00357 SPHINXBASE_EXPORT
00358 ad_play_t *ad_open_play ( void );
00359 
00360 SPHINXBASE_EXPORT
00361 int32 ad_start_play (ad_play_t *);
00362 
00363 SPHINXBASE_EXPORT
00364 int32 ad_stop_play (ad_play_t *);
00365 
00366 SPHINXBASE_EXPORT
00367 int32 ad_close_play (ad_play_t *);
00368 
00369 
00378 SPHINXBASE_EXPORT
00379 int32 ad_write (ad_play_t *, int16 *buf, int32 len);
00380 
00381 
00382 /* ------ MISCELLANEOUS ------- */
00383 
00387 SPHINXBASE_EXPORT
00388 void ad_mu2li (int16 *outbuf,           /* Out: PCM data placed here (allocated by user) */
00389                unsigned char *inbuf,    /* In: Input buffer with mulaw data */
00390                int32 n_samp);           /* In: #Samples in inbuf */
00391 
00392 #ifdef __cplusplus
00393 }
00394 #endif
00395 
00396 
00397 #endif