SphinxBase  5prealpha
fe_interface.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 #include <stdio.h>
38 #include <string.h>
39 #include <math.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 
43 #ifdef HAVE_CONFIG_H
44 #include <config.h>
45 #endif
46 
47 #include "sphinxbase/prim_type.h"
48 #include "sphinxbase/byteorder.h"
49 #include "sphinxbase/fixpoint.h"
50 #include "sphinxbase/genrand.h"
51 #include "sphinxbase/err.h"
52 #include "sphinxbase/cmd_ln.h"
53 #include "sphinxbase/ckd_alloc.h"
54 
55 #include "fe_internal.h"
56 #include "fe_warp.h"
57 
58 static const arg_t fe_args[] = {
59  waveform_to_cepstral_command_line_macro(),
60  { NULL, 0, NULL, NULL }
61 };
62 
63 int
64 fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
65 {
66  int j, frate;
67 
68  fe->config = config;
69  fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
70  frate = cmd_ln_int32_r(config, "-frate");
71  if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
72  E_ERROR
73  ("Frame rate %d can not be bigger than sample rate %.02f\n",
74  frate, fe->sampling_rate);
75  return -1;
76  }
77 
78  fe->frame_rate = (int16)frate;
79  if (cmd_ln_boolean_r(config, "-dither")) {
80  fe->dither = 1;
81  fe->seed = cmd_ln_int32_r(config, "-seed");
82  }
83 #ifdef WORDS_BIGENDIAN
84  fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
85 #else
86  fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
87 #endif
88  fe->window_length = cmd_ln_float32_r(config, "-wlen");
89  fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");
90 
91  fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
92  fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");
93 
94  /* Check FFT size, compute FFT order (log_2(n)) */
95  for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
96  if (((j % 2) != 0) || (fe->fft_size <= 0)) {
97  E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
98  fe->fft_size);
99  return -1;
100  }
101  }
102  /* Verify that FFT size is greater or equal to window length. */
103  if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
104  E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
105  (int)(fe->window_length * fe->sampling_rate));
106  return -1;
107  }
108 
109  fe->pre_speech = (int16)cmd_ln_int32_r(config, "-vad_prespeech");
110  fe->post_speech = (int16)cmd_ln_int32_r(config, "-vad_postspeech");
111  fe->start_speech = (int16)cmd_ln_int32_r(config, "-vad_startspeech");
112  fe->vad_threshold = cmd_ln_float32_r(config, "-vad_threshold");
113 
114  fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");
115  fe->remove_noise = cmd_ln_boolean_r(config, "-remove_noise");
116  fe->remove_silence = cmd_ln_boolean_r(config, "-remove_silence");
117 
118  if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
119  fe->transform = DCT_II;
120  else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
121  fe->transform = LEGACY_DCT;
122  else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
123  fe->transform = DCT_HTK;
124  else {
125  E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
126  return -1;
127  }
128 
129  if (cmd_ln_boolean_r(config, "-logspec"))
130  fe->log_spec = RAW_LOG_SPEC;
131  if (cmd_ln_boolean_r(config, "-smoothspec"))
132  fe->log_spec = SMOOTH_LOG_SPEC;
133 
134  return 0;
135 }
136 
137 static int
138 fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
139 {
140  mel->sampling_rate = fe->sampling_rate;
141  mel->fft_size = fe->fft_size;
142  mel->num_cepstra = fe->num_cepstra;
143  mel->num_filters = cmd_ln_int32_r(config, "-nfilt");
144 
145  if (fe->log_spec)
146  fe->feature_dimension = mel->num_filters;
147  else
148  fe->feature_dimension = fe->num_cepstra;
149 
150  mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
151  mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");
152 
153  mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");
154 
155  mel->warp_type = cmd_ln_str_r(config, "-warp_type");
156  mel->warp_params = cmd_ln_str_r(config, "-warp_params");
157  mel->lifter_val = cmd_ln_int32_r(config, "-lifter");
158 
159  mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
160  mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");
161 
162  if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
163  E_ERROR("Failed to initialize the warping function.\n");
164  return -1;
165  }
166  fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
167  return 0;
168 }
169 
170 void
171 fe_print_current(fe_t const *fe)
172 {
173  E_INFO("Current FE Parameters:\n");
174  E_INFO("\tSampling Rate: %f\n", fe->sampling_rate);
175  E_INFO("\tFrame Size: %d\n", fe->frame_size);
176  E_INFO("\tFrame Shift: %d\n", fe->frame_shift);
177  E_INFO("\tFFT Size: %d\n", fe->fft_size);
178  E_INFO("\tLower Frequency: %g\n",
179  fe->mel_fb->lower_filt_freq);
180  E_INFO("\tUpper Frequency: %g\n",
181  fe->mel_fb->upper_filt_freq);
182  E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters);
183  E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
184  E_INFO("\tStart Utt Status: %d\n", fe->start_flag);
185  E_INFO("Will %sremove DC offset at frame level\n",
186  fe->remove_dc ? "" : "not ");
187  if (fe->dither) {
188  E_INFO("Will add dither to audio\n");
189  E_INFO("Dither seeded with %d\n", fe->seed);
190  }
191  else {
192  E_INFO("Will not add dither to audio\n");
193  }
194  if (fe->mel_fb->lifter_val) {
195  E_INFO("Will apply sine-curve liftering, period %d\n",
196  fe->mel_fb->lifter_val);
197  }
198  E_INFO("Will %snormalize filters to unit area\n",
199  fe->mel_fb->unit_area ? "" : "not ");
200  E_INFO("Will %sround filter frequencies to DFT points\n",
201  fe->mel_fb->round_filters ? "" : "not ");
202  E_INFO("Will %suse double bandwidth in mel filter\n",
203  fe->mel_fb->doublewide ? "" : "not ");
204 }
205 
206 fe_t *
207 fe_init_auto()
208 {
209  return fe_init_auto_r(cmd_ln_get());
210 }
211 
212 fe_t *
213 fe_init_auto_r(cmd_ln_t *config)
214 {
215  fe_t *fe;
216  int prespch_frame_len;
217 
218  fe = (fe_t*)ckd_calloc(1, sizeof(*fe));
219  fe->refcount = 1;
220 
221  /* transfer params to front end */
222  if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
223  fe_free(fe);
224  return NULL;
225  }
226 
227  /* compute remaining fe parameters */
228  /* We add 0.5 so approximate the float with the closest
229  * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
230  */
231  fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
232  fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
233  fe->prior = 0;
234 
235  fe_start_stream(fe);
236 
237  assert (fe->frame_shift > 1);
238 
239  if (fe->frame_size < fe->frame_shift) {
240  E_ERROR
241  ("Frame size %d (-wlen) must be greater than frame shift %d (-frate)\n",
242  fe->frame_size, fe->frame_shift);
243  fe_free(fe);
244  return NULL;
245  }
246 
247 
248  if (fe->frame_size > (fe->fft_size)) {
249  E_ERROR
250  ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n",
251  fe->frame_size, fe->fft_size);
252  fe_free(fe);
253  return NULL;
254  }
255 
256  if (fe->dither)
257  fe_init_dither(fe->seed);
258 
259  /* establish buffers for overflow samps and hamming window */
260  fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
261  fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));
262 
263  /* create hamming window */
264  fe_create_hamming(fe->hamming_window, fe->frame_size);
265 
266  /* init and fill appropriate filter structure */
267  fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));
268 
269  /* transfer params to mel fb */
270  fe_parse_melfb_params(config, fe, fe->mel_fb);
271 
272  if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) {
273  E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n",
274  fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2);
275  fe_free(fe);
276  return NULL;
277  }
278 
279  fe_build_melfilters(fe->mel_fb);
280 
281  fe_compute_melcosine(fe->mel_fb);
282  if (fe->remove_noise || fe->remove_silence)
283  fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters);
284 
285  fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data));
286  prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters;
287  fe->vad_data->prespch_buf = fe_prespch_init(fe->pre_speech + 1, prespch_frame_len, fe->frame_shift);
288 
289  /* Create temporary FFT, spectrum and mel-spectrum buffers. */
290  /* FIXME: Gosh there are a lot of these. */
291  fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
292  fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
293  fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
294  fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));
295 
296  /* create twiddle factors */
297  fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
298  fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
299  fe_create_twiddle(fe);
300 
301  if (cmd_ln_boolean_r(config, "-verbose")) {
302  fe_print_current(fe);
303  }
304 
305  /*** Initialize the overflow buffers ***/
306  fe_start_utt(fe);
307  return fe;
308 }
309 
310 arg_t const *
311 fe_get_args(void)
312 {
313  return fe_args;
314 }
315 
316 const cmd_ln_t *
317 fe_get_config(fe_t *fe)
318 {
319  return fe->config;
320 }
321 
322 void
323 fe_init_dither(int32 seed)
324 {
325  E_INFO("Using %d as the seed.\n", seed);
326  s3_rand_seed(seed);
327 }
328 
329 static void
330 fe_reset_vad_data(vad_data_t * vad_data)
331 {
332  vad_data->in_speech = 0;
333  vad_data->pre_speech_frames = 0;
334  vad_data->post_speech_frames = 0;
335  fe_prespch_reset_cep(vad_data->prespch_buf);
336 }
337 
338 int32
339 fe_start_utt(fe_t * fe)
340 {
341  fe->num_overflow_samps = 0;
342  memset(fe->overflow_samps, 0, fe->frame_size * sizeof(int16));
343  fe->start_flag = 1;
344  fe->prior = 0;
345  fe_reset_vad_data(fe->vad_data);
346  return 0;
347 }
348 
349 void
350 fe_start_stream(fe_t *fe)
351 {
352  fe->sample_counter = 0;
353  fe_reset_noisestats(fe->noise_stats);
354 }
355 
356 int
357 fe_get_output_size(fe_t *fe)
358 {
359  return (int)fe->feature_dimension;
360 }
361 
362 void
363 fe_get_input_size(fe_t *fe, int *out_frame_shift,
364  int *out_frame_size)
365 {
366  if (out_frame_shift)
367  *out_frame_shift = fe->frame_shift;
368  if (out_frame_size)
369  *out_frame_size = fe->frame_size;
370 }
371 
372 uint8
373 fe_get_vad_state(fe_t *fe)
374 {
375  return fe->vad_data->in_speech;
376 }
377 
378 int
379 fe_process_frames(fe_t *fe,
380  int16 const **inout_spch,
381  size_t *inout_nsamps,
382  mfcc_t **buf_cep,
383  int32 *inout_nframes,
384  int32 *out_frameidx)
385 {
386  return fe_process_frames_ext(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes, NULL, NULL, out_frameidx);
387 }
388 
389 
393 static int
394 fe_copy_from_prespch(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx)
395 {
396  while ((*inout_nframes) > 0 && fe_prespch_read_cep(fe->vad_data->prespch_buf, buf_cep[outidx]) > 0) {
397  outidx++;
398  (*inout_nframes)--;
399  }
400  return outidx;
401 }
402 
406 static int
407 fe_check_prespeech(fe_t *fe, int32 *inout_nframes, mfcc_t **buf_cep, int outidx, int32 *out_frameidx, size_t *inout_nsamps, int orig_nsamps)
408 {
409  if (fe->vad_data->in_speech) {
410  if (fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
411 
412  /* Previous frame triggered vad into speech state. Last frame is in the end of
413  prespeech buffer, so overwrite it */
414  outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
415 
416  /* Sets the start frame for the returned data so that caller can update timings */
417  if (out_frameidx) {
418  *out_frameidx = (fe->sample_counter + orig_nsamps - *inout_nsamps) / fe->frame_shift - fe->pre_speech;
419  }
420  } else {
421  outidx++;
422  (*inout_nframes)--;
423  }
424  }
425  /* Amount of data behind the original input which is still needed. */
426  if (fe->num_overflow_samps > 0)
427  fe->num_overflow_samps -= fe->frame_shift;
428 
429  return outidx;
430 }
431 
432 int
433 fe_process_frames_ext(fe_t *fe,
434  int16 const **inout_spch,
435  size_t *inout_nsamps,
436  mfcc_t **buf_cep,
437  int32 *inout_nframes,
438  int16 *voiced_spch,
439  int32 *voiced_spch_nsamps,
440  int32 *out_frameidx)
441 {
442  int outidx, n_overflow, orig_n_overflow;
443  int16 const *orig_spch;
444  size_t orig_nsamps;
445 
446  /* The logic here is pretty complex, please be careful with modifications */
447 
448  /* FIXME: Dump PCM data if needed */
449 
450  /* In the special case where there is no output buffer, return the
451  * maximum number of frames which would be generated. */
452  if (buf_cep == NULL) {
453  if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
454  *inout_nframes = 0;
455  else
456  *inout_nframes = 1
457  + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
458  / fe->frame_shift);
459  if (!fe->vad_data->in_speech)
460  *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf);
461  return *inout_nframes;
462  }
463 
464  if (out_frameidx)
465  *out_frameidx = 0;
466 
467  /* Are there not enough samples to make at least 1 frame? */
468  if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
469  if (*inout_nsamps > 0) {
470  /* Append them to the overflow buffer. */
471  memcpy(fe->overflow_samps + fe->num_overflow_samps,
472  *inout_spch, *inout_nsamps * (sizeof(int16)));
473  fe->num_overflow_samps += *inout_nsamps;
474  /* Update input-output pointers and counters. */
475  *inout_spch += *inout_nsamps;
476  *inout_nsamps = 0;
477  }
478  /* We produced no frames of output, sorry! */
479  *inout_nframes = 0;
480  return 0;
481  }
482 
483  /* Can't write a frame? Then do nothing! */
484  if (*inout_nframes < 1) {
485  *inout_nframes = 0;
486  return 0;
487  }
488 
489  /* Index of output frame. */
490  outidx = 0;
491 
492  /* Try to read from prespeech buffer */
493  if (fe->vad_data->in_speech && fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
494  outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
495  if ((*inout_nframes) < 1) {
496  /* mfcc buffer is filled from prespeech buffer */
497  *inout_nframes = outidx;
498  return 0;
499  }
500  }
501 
502  /* Keep track of the original start of the buffer. */
503  orig_spch = *inout_spch;
504  orig_nsamps = *inout_nsamps;
505  orig_n_overflow = fe->num_overflow_samps;
506 
507  /* Start processing, taking care of any incoming overflow. */
508  if (fe->num_overflow_samps > 0) {
509  int offset = fe->frame_size - fe->num_overflow_samps;
510  /* Append start of spch to overflow samples to make a full frame. */
511  memcpy(fe->overflow_samps + fe->num_overflow_samps,
512  *inout_spch, offset * sizeof(**inout_spch));
513  fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
514  /* Update input-output pointers and counters. */
515  *inout_spch += offset;
516  *inout_nsamps -= offset;
517  } else {
518  fe_read_frame(fe, *inout_spch, fe->frame_size);
519  /* Update input-output pointers and counters. */
520  *inout_spch += fe->frame_size;
521  *inout_nsamps -= fe->frame_size;
522  }
523 
524  fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
525  outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
526 
527  /* Process all remaining frames. */
528  while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) {
529  fe_shift_frame(fe, *inout_spch, fe->frame_shift);
530  fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
531 
532  outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);
533 
534  /* Update input-output pointers and counters. */
535  *inout_spch += fe->frame_shift;
536  *inout_nsamps -= fe->frame_shift;
537  }
538 
539  /* How many relevant overflow samples are there left? */
540  if (fe->num_overflow_samps <= 0) {
541  /* Maximum number of overflow samples past *inout_spch to save. */
542  n_overflow = *inout_nsamps;
543  if (n_overflow > fe->frame_shift)
544  n_overflow = fe->frame_shift;
545  fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
546  /* Make sure this isn't an illegal read! */
547  if (fe->num_overflow_samps > *inout_spch - orig_spch)
548  fe->num_overflow_samps = *inout_spch - orig_spch;
549  fe->num_overflow_samps += n_overflow;
550  if (fe->num_overflow_samps > 0) {
551  memcpy(fe->overflow_samps,
552  *inout_spch - (fe->frame_size - fe->frame_shift),
553  fe->num_overflow_samps * sizeof(**inout_spch));
554  /* Update the input pointer to cover this stuff. */
555  *inout_spch += n_overflow;
556  *inout_nsamps -= n_overflow;
557  }
558  } else {
559  /* There is still some relevant data left in the overflow buffer. */
560  /* Shift existing data to the beginning. */
561  memmove(fe->overflow_samps,
562  fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
563  fe->num_overflow_samps * sizeof(*fe->overflow_samps));
564  /* Copy in whatever we had in the original speech buffer. */
565  n_overflow = *inout_spch - orig_spch + *inout_nsamps;
566  if (n_overflow > fe->frame_size - fe->num_overflow_samps)
567  n_overflow = fe->frame_size - fe->num_overflow_samps;
568  memcpy(fe->overflow_samps + fe->num_overflow_samps,
569  orig_spch, n_overflow * sizeof(*orig_spch));
570  fe->num_overflow_samps += n_overflow;
571  /* Advance the input pointers. */
572  if (n_overflow > *inout_spch - orig_spch) {
573  n_overflow -= (*inout_spch - orig_spch);
574  *inout_spch += n_overflow;
575  *inout_nsamps -= n_overflow;
576  }
577  }
578 
579  /* Finally update the frame counter with the number of frames
580  * and global sample counter with number of samples we procesed*/
581  *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
582  fe->sample_counter += orig_nsamps - *inout_nsamps;
583 
584  return 0;
585 }
586 
587 int
588 fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps,
589  mfcc_t *** cep_block, int32 * nframes)
590 {
591  mfcc_t **cep;
592  int rv;
593 
594  /* Figure out how many frames we will need. */
595  fe_process_frames(fe, NULL, &nsamps, NULL, nframes, NULL);
596  /* Create the output buffer (it has to exist, even if there are no output frames). */
597  if (*nframes)
598  cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep));
599  else
600  cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep));
601  /* Now just call fe_process_frames() with the allocated buffer. */
602  rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes, NULL);
603  *cep_block = cep;
604 
605  return rv;
606 }
607 
608 
609 int32
610 fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
611 {
612  /* Process any remaining data, not very accurate for the VAD */
613  *nframes = 0;
614  if (fe->num_overflow_samps > 0) {
615  fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
616  fe_write_frame(fe, cepvector, FALSE);
617  if (fe->vad_data->in_speech)
618  *nframes = 1;
619  }
620 
621  /* reset overflow buffers... */
622  fe->num_overflow_samps = 0;
623  fe->start_flag = 0;
624 
625  return 0;
626 }
627 
628 fe_t *
629 fe_retain(fe_t *fe)
630 {
631  ++fe->refcount;
632  return fe;
633 }
634 
635 int
636 fe_free(fe_t * fe)
637 {
638  if (fe == NULL)
639  return 0;
640  if (--fe->refcount > 0)
641  return fe->refcount;
642 
643  /* kill FE instance - free everything... */
644  if (fe->mel_fb) {
645  if (fe->mel_fb->mel_cosine)
646  fe_free_2d((void *) fe->mel_fb->mel_cosine);
647  ckd_free(fe->mel_fb->lifter);
648  ckd_free(fe->mel_fb->spec_start);
649  ckd_free(fe->mel_fb->filt_start);
650  ckd_free(fe->mel_fb->filt_width);
651  ckd_free(fe->mel_fb->filt_coeffs);
652  ckd_free(fe->mel_fb);
653  }
654  ckd_free(fe->spch);
655  ckd_free(fe->frame);
656  ckd_free(fe->ccc);
657  ckd_free(fe->sss);
658  ckd_free(fe->spec);
659  ckd_free(fe->mfspec);
660  ckd_free(fe->overflow_samps);
661  ckd_free(fe->hamming_window);
662 
663  if (fe->noise_stats)
664  fe_free_noisestats(fe->noise_stats);
665 
666  if (fe->vad_data) {
667  fe_prespch_free(fe->vad_data->prespch_buf);
668  ckd_free(fe->vad_data);
669  }
670 
671  cmd_ln_free_r(fe->config);
672  ckd_free(fe);
673 
674  return 0;
675 }
676 
680 int32
681 fe_mfcc_to_float(fe_t * fe,
682  mfcc_t ** input, float32 ** output, int32 nframes)
683 {
684  int32 i;
685 
686 #ifndef FIXED_POINT
687  if ((void *) input == (void *) output)
688  return nframes * fe->feature_dimension;
689 #endif
690  for (i = 0; i < nframes * fe->feature_dimension; ++i)
691  output[0][i] = MFCC2FLOAT(input[0][i]);
692 
693  return i;
694 }
695 
699 int32
700 fe_float_to_mfcc(fe_t * fe,
701  float32 ** input, mfcc_t ** output, int32 nframes)
702 {
703  int32 i;
704 
705 #ifndef FIXED_POINT
706  if ((void *) input == (void *) output)
707  return nframes * fe->feature_dimension;
708 #endif
709  for (i = 0; i < nframes * fe->feature_dimension; ++i)
710  output[0][i] = FLOAT2MFCC(input[0][i]);
711 
712  return i;
713 }
714 
715 int32
716 fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
717 {
718 #ifdef FIXED_POINT
719  fe_spec2cep(fe, fr_spec, fr_cep);
720 #else /* ! FIXED_POINT */
721  powspec_t *powspec;
722  int32 i;
723 
724  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
725  for (i = 0; i < fe->mel_fb->num_filters; ++i)
726  powspec[i] = (powspec_t) fr_spec[i];
727  fe_spec2cep(fe, powspec, fr_cep);
728  ckd_free(powspec);
729 #endif /* ! FIXED_POINT */
730  return 0;
731 }
732 
733 int32
734 fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep)
735 {
736 #ifdef FIXED_POINT
737  fe_dct2(fe, fr_spec, fr_cep, 0);
738 #else /* ! FIXED_POINT */
739  powspec_t *powspec;
740  int32 i;
741 
742  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
743  for (i = 0; i < fe->mel_fb->num_filters; ++i)
744  powspec[i] = (powspec_t) fr_spec[i];
745  fe_dct2(fe, powspec, fr_cep, 0);
746  ckd_free(powspec);
747 #endif /* ! FIXED_POINT */
748  return 0;
749 }
750 
751 int32
752 fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec)
753 {
754 #ifdef FIXED_POINT
755  fe_dct3(fe, fr_cep, fr_spec);
756 #else /* ! FIXED_POINT */
757  powspec_t *powspec;
758  int32 i;
759 
760  powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t));
761  fe_dct3(fe, fr_cep, powspec);
762  for (i = 0; i < fe->mel_fb->num_filters; ++i)
763  fr_spec[i] = (mfcc_t) powspec[i];
764  ckd_free(powspec);
765 #endif /* ! FIXED_POINT */
766  return 0;
767 }
Command-line and other configurationparsing and handling.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_retain(cmd_ln_t *cmdln)
Retain ownership of a command-line argument set.
Definition: cmd_ln.c:1025
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition: ckd_alloc.h:270
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
Base Struct to hold all structure for MFCC computation.
Definition: fe_internal.h:75
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
Definition: cmd_ln.c:1032
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
Definition: cmd_ln.c:945
#define s3_rand_seed(s)
Macros to simplify calling of random generator function.
Definition: genrand.h:144
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_get(void)
Retrieve the global cmd_ln_t object used by non-re-entrant functions.
Definition: cmd_ln.c:490
Implementation of logging routines.
Argument definition structure.
High performance prortable random generator created by Takuji Nishimura and Makoto Matsumoto...
Opaque structure used to hold the results of command-line parsing.
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
Definition: cmd_ln.h:334
Structure for the front-end computation.
Definition: fe_internal.h:117