SphinxBase  5prealpha
fsg_model.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 #include <stdio.h>
36 #include <string.h>
37 #include <assert.h>
38 
39 /* SphinxBase headers. */
40 #include "sphinxbase/err.h"
41 #include "sphinxbase/pio.h"
42 #include "sphinxbase/ckd_alloc.h"
43 #include "sphinxbase/prim_type.h"
44 #include "sphinxbase/strfuncs.h"
45 #include "sphinxbase/hash_table.h"
46 #include "sphinxbase/fsg_model.h"
47 
55 struct trans_list_s {
56  hash_table_t *null_trans; /* Null transitions keyed by state. */
57  hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
58 };
59 
63 struct fsg_arciter_s {
64  hash_iter_t *itor, *null_itor;
65  gnode_t *gn;
66 };
67 
68 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
69 #define FSG_MODEL_END_DECL "FSG_END"
70 #define FSG_MODEL_N_DECL "N"
71 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
72 #define FSG_MODEL_S_DECL "S"
73 #define FSG_MODEL_START_STATE_DECL "START_STATE"
74 #define FSG_MODEL_F_DECL "F"
75 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
76 #define FSG_MODEL_T_DECL "T"
77 #define FSG_MODEL_TRANSITION_DECL "TRANSITION"
78 #define FSG_MODEL_COMMENT_CHAR '#'
79 
80 
81 static int32
82 nextline_str2words(FILE * fp, int32 * lineno,
83  char **lineptr, char ***wordptr)
84 {
85  for (;;) {
86  size_t len;
87  int32 n;
88 
89  ckd_free(*lineptr);
90  if ((*lineptr = fread_line(fp, &len)) == NULL)
91  return -1;
92 
93  (*lineno)++;
94 
95  if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
96  continue; /* Skip comment lines */
97 
98  n = str2words(*lineptr, NULL, 0);
99  if (n == 0)
100  continue; /* Skip blank lines */
101 
102  /* Abuse of realloc(), but this doesn't have to be fast. */
103  if (*wordptr == NULL)
104  *wordptr = ckd_calloc(n, sizeof(**wordptr));
105  else
106  *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
107  return str2words(*lineptr, *wordptr, n);
108  }
109 }
110 
111 void
112 fsg_model_trans_add(fsg_model_t * fsg,
113  int32 from, int32 to, int32 logp, int32 wid)
114 {
115  fsg_link_t *link;
116  glist_t gl;
117  gnode_t *gn;
118 
119  if (fsg->trans[from].trans == NULL)
120  fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
121 
122  /* Check for duplicate link (i.e., link already exists with label=wid) */
123  for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
124  link = (fsg_link_t *) gnode_ptr(gn);
125  if (link->wid == wid) {
126  if (link->logs2prob < logp)
127  link->logs2prob = logp;
128  return;
129  }
130  }
131 
132  /* Create transition object */
133  link = listelem_malloc(fsg->link_alloc);
134  link->from_state = from;
135  link->to_state = to;
136  link->logs2prob = logp;
137  link->wid = wid;
138 
139  /* Add it to the list of transitions and update the hash table */
140  gl = glist_add_ptr(gl, (void *) link);
141  hash_table_replace_bkey(fsg->trans[from].trans,
142  (char const *) &link->to_state,
143  sizeof(link->to_state), gl);
144 }
145 
146 int32
147 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
148  int32 logp, int32 wid)
149 {
150  fsg_link_t *link, *link2;
151 
152  /* Check for transition probability */
153  if (logp > 0) {
154  E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
155  from, to);
156  }
157 
158  /* Self-loop null transitions (with prob <= 1.0) are redundant */
159  if (from == to)
160  return -1;
161 
162  if (fsg->trans[from].null_trans == NULL)
163  fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
164 
165  /* Check for a duplicate link; if found, keep the higher prob */
166  link = fsg_model_null_trans(fsg, from, to);
167  if (link) {
168  if (link->logs2prob < logp) {
169  link->logs2prob = logp;
170  return 0;
171  }
172  else
173  return -1;
174  }
175 
176  /* Create null transition object */
177  link = listelem_malloc(fsg->link_alloc);
178  link->from_state = from;
179  link->to_state = to;
180  link->logs2prob = logp;
181  link->wid = -1;
182 
183  link2 = (fsg_link_t *)
184  hash_table_enter_bkey(fsg->trans[from].null_trans,
185  (char const *) &link->to_state,
186  sizeof(link->to_state), link);
187  assert(link == link2);
188 
189  return 1;
190 }
191 
192 int32
193 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
194  int32 logp)
195 {
196  return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
197 }
198 
199 glist_t
200 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
201 {
202  gnode_t *gn1;
203  int updated;
204  fsg_link_t *tl1, *tl2;
205  int32 k, n;
206 
207  E_INFO("Computing transitive closure for null transitions\n");
208 
209  /* If our caller didn't give us a list of null-transitions,
210  make such a list. Just loop through all the FSG states,
211  and all the null-transitions in that state (which are kept in
212  their own hash table). */
213  if (nulls == NULL) {
214  int i;
215  for (i = 0; i < fsg->n_state; ++i) {
216  hash_iter_t *itor;
217  hash_table_t *null_trans = fsg->trans[i].null_trans;
218  if (null_trans == NULL)
219  continue;
220  for (itor = hash_table_iter(null_trans);
221  itor != NULL; itor = hash_table_iter_next(itor)) {
222  nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent));
223  }
224  }
225  }
226 
227  /*
228  * Probably not the most efficient closure implementation, in general, but
229  * probably reasonably efficient for a sparse null transition matrix.
230  */
231  n = 0;
232  do {
233  updated = FALSE;
234 
235  for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
236  hash_iter_t *itor;
237 
238  tl1 = (fsg_link_t *) gnode_ptr(gn1);
239  assert(tl1->wid < 0);
240 
241  if (fsg->trans[tl1->to_state].null_trans == NULL)
242  continue;
243 
244  for (itor =
245  hash_table_iter(fsg->trans[tl1->to_state].null_trans);
246  itor; itor = hash_table_iter_next(itor)) {
247 
248  tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
249 
250  k = fsg_model_null_trans_add(fsg,
251  tl1->from_state,
252  tl2->to_state,
253  tl1->logs2prob +
254  tl2->logs2prob);
255  if (k >= 0) {
256  updated = TRUE;
257  if (k > 0) {
258  nulls = glist_add_ptr(nulls, (void *)
259  fsg_model_null_trans
260  (fsg, tl1->from_state,
261  tl2->to_state));
262  n++;
263  }
264  }
265  }
266  }
267  } while (updated);
268 
269  E_INFO("%d null transitions added\n", n);
270 
271  return nulls;
272 }
273 
274 glist_t
275 fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
276 {
277  void *val;
278 
279  if (fsg->trans[i].trans == NULL)
280  return NULL;
281  if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
282  sizeof(j), &val) < 0)
283  return NULL;
284  return (glist_t) val;
285 }
286 
287 fsg_link_t *
288 fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
289 {
290  void *val;
291 
292  if (fsg->trans[i].null_trans == NULL)
293  return NULL;
294  if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
295  sizeof(j), &val) < 0)
296  return NULL;
297  return (fsg_link_t *) val;
298 }
299 
301 fsg_model_arcs(fsg_model_t * fsg, int32 i)
302 {
303  fsg_arciter_t *itor;
304 
305  if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
306  return NULL;
307  itor = ckd_calloc(1, sizeof(*itor));
308  if (fsg->trans[i].null_trans)
309  itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
310  if (fsg->trans[i].trans)
311  itor->itor = hash_table_iter(fsg->trans[i].trans);
312  if (itor->itor != NULL)
313  itor->gn = hash_entry_val(itor->itor->ent);
314  return itor;
315 }
316 
317 fsg_link_t *
318 fsg_arciter_get(fsg_arciter_t * itor)
319 {
320  /* Iterate over non-null arcs first. */
321  if (itor->gn)
322  return (fsg_link_t *) gnode_ptr(itor->gn);
323  else if (itor->null_itor)
324  return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
325  else
326  return NULL;
327 }
328 
330 fsg_arciter_next(fsg_arciter_t * itor)
331 {
332  /* Iterate over non-null arcs first. */
333  if (itor->gn) {
334  itor->gn = gnode_next(itor->gn);
335  /* Move to the next destination arc. */
336  if (itor->gn == NULL) {
337  itor->itor = hash_table_iter_next(itor->itor);
338  if (itor->itor != NULL)
339  itor->gn = hash_entry_val(itor->itor->ent);
340  else if (itor->null_itor == NULL)
341  goto stop_iteration;
342  }
343  }
344  else {
345  if (itor->null_itor == NULL)
346  goto stop_iteration;
347  itor->null_itor = hash_table_iter_next(itor->null_itor);
348  if (itor->null_itor == NULL)
349  goto stop_iteration;
350  }
351  return itor;
352  stop_iteration:
353  fsg_arciter_free(itor);
354  return NULL;
355 
356 }
357 
358 void
359 fsg_arciter_free(fsg_arciter_t * itor)
360 {
361  if (itor == NULL)
362  return;
363  hash_table_iter_free(itor->null_itor);
364  hash_table_iter_free(itor->itor);
365  ckd_free(itor);
366 }
367 
368 int
369 fsg_model_word_id(fsg_model_t * fsg, char const *word)
370 {
371  int wid;
372 
373  /* Search for an existing word matching this. */
374  for (wid = 0; wid < fsg->n_word; ++wid) {
375  if (0 == strcmp(fsg->vocab[wid], word))
376  break;
377  }
378  /* If not found, add this to the vocab. */
379  if (wid == fsg->n_word)
380  return -1;
381  return wid;
382 }
383 
384 int
385 fsg_model_word_add(fsg_model_t * fsg, char const *word)
386 {
387  int wid, old_size;
388 
389  /* Search for an existing word matching this. */
390  wid = fsg_model_word_id(fsg, word);
391  /* If not found, add this to the vocab. */
392  if (wid == -1) {
393  wid = fsg->n_word;
394  if (fsg->n_word == fsg->n_word_alloc) {
395  old_size = fsg->n_word_alloc;
396  fsg->n_word_alloc += 10;
397  fsg->vocab = ckd_realloc(fsg->vocab,
398  fsg->n_word_alloc *
399  sizeof(*fsg->vocab));
400  if (fsg->silwords)
401  fsg->silwords =
402  bitvec_realloc(fsg->silwords, old_size,
403  fsg->n_word_alloc);
404  if (fsg->altwords)
405  fsg->altwords =
406  bitvec_realloc(fsg->altwords, old_size,
407  fsg->n_word_alloc);
408  }
409  ++fsg->n_word;
410  fsg->vocab[wid] = ckd_salloc(word);
411  }
412  return wid;
413 }
414 
415 int
416 fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
417  int state, float32 silprob)
418 {
419  int32 logsilp;
420  int n_trans, silwid, src;
421 
422  E_INFO("Adding silence transitions for %s to FSG\n", silword);
423 
424  silwid = fsg_model_word_add(fsg, silword);
425  logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
426  if (fsg->silwords == NULL)
427  fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
428  bitvec_set(fsg->silwords, silwid);
429 
430  n_trans = 0;
431  if (state == -1) {
432  for (src = 0; src < fsg->n_state; src++) {
433  fsg_model_trans_add(fsg, src, src, logsilp, silwid);
434  ++n_trans;
435  }
436  }
437  else {
438  fsg_model_trans_add(fsg, state, state, logsilp, silwid);
439  ++n_trans;
440  }
441 
442  E_INFO("Added %d silence word transitions\n", n_trans);
443  return n_trans;
444 }
445 
446 int
447 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
448  char const *altword)
449 {
450  int i, basewid, altwid;
451  int ntrans;
452 
453  /* FIXME: This will get slow, eventually... */
454  for (basewid = 0; basewid < fsg->n_word; ++basewid)
455  if (0 == strcmp(fsg->vocab[basewid], baseword))
456  break;
457  if (basewid == fsg->n_word) {
458  E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
459  return -1;
460  }
461  altwid = fsg_model_word_add(fsg, altword);
462  if (fsg->altwords == NULL)
463  fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
464  bitvec_set(fsg->altwords, altwid);
465  if (fsg_model_is_filler(fsg, basewid)) {
466  if (fsg->silwords == NULL)
467  fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
468  bitvec_set(fsg->silwords, altwid);
469  }
470 
471  E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
472  baseword, altword));
473 
474  /* Look for all transitions involving baseword and duplicate them. */
475  /* FIXME: This will also get slow, eventually... */
476  ntrans = 0;
477  for (i = 0; i < fsg->n_state; ++i) {
478  hash_iter_t *itor;
479  if (fsg->trans[i].trans == NULL)
480  continue;
481  for (itor = hash_table_iter(fsg->trans[i].trans); itor;
482  itor = hash_table_iter_next(itor)) {
483  glist_t trans;
484  gnode_t *gn;
485 
486  trans = hash_entry_val(itor->ent);
487  for (gn = trans; gn; gn = gnode_next(gn)) {
488  fsg_link_t *fl = gnode_ptr(gn);
489  if (fl->wid == basewid) {
490  fsg_link_t *link;
491 
492  /* Create transition object */
493  link = listelem_malloc(fsg->link_alloc);
494  link->from_state = fl->from_state;
495  link->to_state = fl->to_state;
496  link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
497  link->wid = altwid;
498 
499  trans = glist_add_ptr(trans, (void *) link);
500  ++ntrans;
501  }
502  }
503  hash_entry_val(itor->ent) = trans;
504  }
505  }
506 
507  E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
508  return ntrans;
509 }
510 
511 
512 fsg_model_t *
513 fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
514  int32 n_state)
515 {
516  fsg_model_t *fsg;
517 
518  /* Allocate basic stuff. */
519  fsg = ckd_calloc(1, sizeof(*fsg));
520  fsg->refcount = 1;
521  fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
522  fsg->lmath = lmath;
523  fsg->name = name ? ckd_salloc(name) : NULL;
524  fsg->n_state = n_state;
525  fsg->lw = lw;
526 
527  fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
528 
529  return fsg;
530 }
531 
532 fsg_model_t *
533 fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
534 {
535  fsg_model_t *fsg;
536  hash_table_t *vocab;
537  hash_iter_t *itor;
538  int32 lastwid;
539  char **wordptr;
540  char *lineptr;
541  char *fsgname;
542  int32 lineno;
543  int32 n, i, j;
544  int n_state, n_trans, n_null_trans;
545  glist_t nulls;
546  float32 p;
547 
548  lineno = 0;
549  vocab = hash_table_new(32, FALSE);
550  wordptr = NULL;
551  lineptr = NULL;
552  nulls = NULL;
553  fsgname = NULL;
554  fsg = NULL;
555 
556  /* Scan upto FSG_BEGIN header */
557  for (;;) {
558  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
559  if (n < 0) {
560  E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
561  goto parse_error;
562  }
563 
564  if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
565  if (n > 2) {
566  E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
567  lineno);
568  goto parse_error;
569  }
570  break;
571  }
572  }
573  /* Save FSG name, or it will get clobbered below :(.
574  * If name is missing, try the default.
575  */
576  if (n == 2) {
577  fsgname = ckd_salloc(wordptr[1]);
578  }
579  else {
580  E_WARN("FSG name is missing\n");
581  fsgname = ckd_salloc("unknown");
582  }
583 
584  /* Read #states */
585  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
586  if ((n != 2)
587  || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
588  && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
589  || (sscanf(wordptr[1], "%d", &n_state) != 1)
590  || (n_state <= 0)) {
591  E_ERROR
592  ("Line[%d]: #states declaration line missing or malformed\n",
593  lineno);
594  goto parse_error;
595  }
596 
597  /* Now create the FSG. */
598  fsg = fsg_model_init(fsgname, lmath, lw, n_state);
599  ckd_free(fsgname);
600  fsgname = NULL;
601 
602  /* Read start state */
603  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
604  if ((n != 2)
605  || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
606  && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
607  || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
608  || (fsg->start_state < 0)
609  || (fsg->start_state >= fsg->n_state)) {
610  E_ERROR
611  ("Line[%d]: start state declaration line missing or malformed\n",
612  lineno);
613  goto parse_error;
614  }
615 
616  /* Read final state */
617  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
618  if ((n != 2)
619  || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
620  && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
621  || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
622  || (fsg->final_state < 0)
623  || (fsg->final_state >= fsg->n_state)) {
624  E_ERROR
625  ("Line[%d]: final state declaration line missing or malformed\n",
626  lineno);
627  goto parse_error;
628  }
629 
630  /* Read transitions */
631  lastwid = 0;
632  n_trans = n_null_trans = 0;
633  for (;;) {
634  int32 wid, tprob;
635 
636  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
637  if (n <= 0) {
638  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
639  lineno);
640  goto parse_error;
641  }
642 
643  if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
644  break;
645  }
646 
647  if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
648  || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
649 
650 
651  if (((n != 4) && (n != 5))
652  || (sscanf(wordptr[1], "%d", &i) != 1)
653  || (sscanf(wordptr[2], "%d", &j) != 1)
654  || (i < 0) || (i >= fsg->n_state)
655  || (j < 0) || (j >= fsg->n_state)) {
656  E_ERROR
657  ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
658  lineno);
659  goto parse_error;
660  }
661 
662  p = atof_c(wordptr[3]);
663  if ((p <= 0.0) || (p > 1.0)) {
664  E_ERROR
665  ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
666  lineno);
667  goto parse_error;
668  }
669  }
670  else {
671  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
672  lineno);
673  goto parse_error;
674  }
675 
676  tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
677  /* Add word to "dictionary". */
678  if (n > 4) {
679  if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
680  (void) hash_table_enter_int32(vocab,
681  ckd_salloc(wordptr[4]),
682  lastwid);
683  wid = lastwid;
684  ++lastwid;
685  }
686  fsg_model_trans_add(fsg, i, j, tprob, wid);
687  ++n_trans;
688  }
689  else {
690  if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
691  ++n_null_trans;
692  nulls =
693  glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
694  }
695  }
696  }
697 
698  E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
699  fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
700 
701 
702  /* Now create a string table from the "dictionary" */
703  fsg->n_word = hash_table_inuse(vocab);
704  fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
705  fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
706  for (itor = hash_table_iter(vocab); itor;
707  itor = hash_table_iter_next(itor)) {
708  char const *word = hash_entry_key(itor->ent);
709  int32 wid = (int32) (long) hash_entry_val(itor->ent);
710  fsg->vocab[wid] = (char *) word;
711  }
712  hash_table_free(vocab);
713 
714  /* Do transitive closure on null transitions */
715  nulls = fsg_model_null_trans_closure(fsg, nulls);
716  glist_free(nulls);
717 
718  ckd_free(lineptr);
719  ckd_free(wordptr);
720 
721  return fsg;
722 
723  parse_error:
724  for (itor = hash_table_iter(vocab); itor;
725  itor = hash_table_iter_next(itor))
726  ckd_free((char *) hash_entry_key(itor->ent));
727  glist_free(nulls);
728  hash_table_free(vocab);
729  ckd_free(fsgname);
730  ckd_free(lineptr);
731  ckd_free(wordptr);
732  fsg_model_free(fsg);
733  return NULL;
734 }
735 
736 
737 fsg_model_t *
738 fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
739 {
740  FILE *fp;
741  fsg_model_t *fsg;
742 
743  if ((fp = fopen(file, "r")) == NULL) {
744  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
745  return NULL;
746  }
747  fsg = fsg_model_read(fp, lmath, lw);
748  fclose(fp);
749  return fsg;
750 }
751 
752 fsg_model_t *
753 fsg_model_retain(fsg_model_t * fsg)
754 {
755  ++fsg->refcount;
756  return fsg;
757 }
758 
759 static void
760 trans_list_free(fsg_model_t * fsg, int32 i)
761 {
762  hash_iter_t *itor;
763 
764  /* FIXME (maybe): FSG links will all get freed when we call
765  * listelem_alloc_free() so don't bother freeing them explicitly
766  * here. */
767  if (fsg->trans[i].trans) {
768  for (itor = hash_table_iter(fsg->trans[i].trans);
769  itor; itor = hash_table_iter_next(itor)) {
770  glist_t gl = (glist_t) hash_entry_val(itor->ent);
771  glist_free(gl);
772  }
773  }
774  hash_table_free(fsg->trans[i].trans);
775  hash_table_free(fsg->trans[i].null_trans);
776 }
777 
778 int
779 fsg_model_free(fsg_model_t * fsg)
780 {
781  int i;
782 
783  if (fsg == NULL)
784  return 0;
785 
786  if (--fsg->refcount > 0)
787  return fsg->refcount;
788 
789  for (i = 0; i < fsg->n_word; ++i)
790  ckd_free(fsg->vocab[i]);
791  for (i = 0; i < fsg->n_state; ++i)
792  trans_list_free(fsg, i);
793  ckd_free(fsg->trans);
794  ckd_free(fsg->vocab);
796  bitvec_free(fsg->silwords);
797  bitvec_free(fsg->altwords);
798  ckd_free(fsg->name);
799  ckd_free(fsg);
800  return 0;
801 }
802 
803 
804 void
805 fsg_model_write(fsg_model_t * fsg, FILE * fp)
806 {
807  int32 i;
808 
809  fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
810  fsg->name ? fsg->name : "");
811  fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
812  fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
813  fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
814 
815  for (i = 0; i < fsg->n_state; i++) {
816  fsg_arciter_t *itor;
817 
818  for (itor = fsg_model_arcs(fsg, i); itor;
819  itor = fsg_arciter_next(itor)) {
820  fsg_link_t *tl = fsg_arciter_get(itor);
821 
822  fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
823  tl->from_state, tl->to_state,
824  logmath_exp(fsg->lmath,
825  (int32) (tl->logs2prob / fsg->lw)),
826  (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
827  }
828  }
829 
830  fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
831 
832  fflush(fp);
833 }
834 
835 void
836 fsg_model_writefile(fsg_model_t * fsg, char const *file)
837 {
838  FILE *fp;
839 
840  assert(fsg);
841 
842  E_INFO("Writing FSG file '%s'\n", file);
843 
844  if ((fp = fopen(file, "w")) == NULL) {
845  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
846  return;
847  }
848 
849  fsg_model_write(fsg, fp);
850 
851  fclose(fp);
852 }
853 
854 static void
855 fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
856 {
857  fsg_arciter_t *itor;
858 
859  for (itor = fsg_model_arcs(fsg, i); itor;
860  itor = fsg_arciter_next(itor)) {
861  fsg_link_t *tl = fsg_arciter_get(itor);
862  fprintf(fp, "%d %d %s %f\n",
863  tl->from_state, tl->to_state,
864  (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
865  -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
866  }
867 }
868 
869 void
870 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
871 {
872  int i;
873 
874  /* Write transitions from initial state first. */
875  fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
876 
877  /* Other states. */
878  for (i = 0; i < fsg->n_state; i++) {
879  if (i == fsg_model_start_state(fsg))
880  continue;
881  fsg_model_write_fsm_trans(fsg, i, fp);
882  }
883 
884  /* Final state. */
885  fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
886 
887  fflush(fp);
888 }
889 
890 void
891 fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
892 {
893  FILE *fp;
894 
895  assert(fsg);
896 
897  E_INFO("Writing FSM file '%s'\n", file);
898 
899  if ((fp = fopen(file, "w")) == NULL) {
900  E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
901  return;
902  }
903 
904  fsg_model_write_fsm(fsg, fp);
905 
906  fclose(fp);
907 }
908 
909 void
910 fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
911 {
912  int i;
913 
914  fprintf(file, "<eps> 0\n");
915  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
916  fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
917  }
918  fflush(file);
919 }
920 
921 void
922 fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
923 {
924  FILE *fp;
925 
926  assert(fsg);
927 
928  E_INFO("Writing FSM symbol table '%s'\n", file);
929 
930  if ((fp = fopen(file, "w")) == NULL) {
931  E_ERROR("Failed to open symbol table '%s' for writing", file);
932  return;
933  }
934 
935  fsg_model_write_symtab(fsg, fp);
936 
937  fclose(fp);
938 }
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
Definition: hash_table.c:329
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:101
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:542
Miscellaneous useful string functions.
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
int refcount
Reference count.
Definition: fsg_model.h:92
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:102
int32 n_word_alloc
Number of words allocated in vocab.
Definition: fsg_model.h:95
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
Definition: hash_table.h:228
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:344
float32 lw
Language weight that&#39;s been applied to transition logprobs.
Definition: fsg_model.h:103
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
Definition: err.h:144
listelem_alloc_t * link_alloc
Allocator for FSG links.
Definition: fsg_model.h:106
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
Sphinx&#39;s memory allocation/deallocation routines.
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:689
int32 n_word
Number of unique words in this FSG.
Definition: fsg_model.h:94
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
A node in a generic list.
Definition: glist.h:100
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:653
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
Adjacency list (opaque) for a state in an FSG.
Definition: fsg_model.c:55
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:695
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
Definition: logmath.c:468
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
Definition: strfuncs.c:55
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition: pio.c:376
Implementation of arc iterator.
Definition: fsg_model.c:63
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
Definition: glist.c:133
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Implementation of logging routines.
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
int32 n_state
number of states in FSG
Definition: fsg_model.h:100
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
Definition: strfuncs.c:123
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:663
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
bitvec_t * altwords
Indicates which words are pronunciation alternates.
Definition: fsg_model.h:98
trans_list_t * trans
Transitions out of each state, if any.
Definition: fsg_model.h:105
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
Definition: bitvec.c:64
Hash table implementation.
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Definition: bitvec.h:95
#define E_FATAL(...)
Exit with non-zero status after error message.
Definition: err.h:81
Word level FSG definition.
Definition: fsg_model.h:91
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated...
Definition: hash_table.c:555
bitvec_t * silwords
Indicates which words are silence/fillers.
Definition: fsg_model.h:97
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
Definition: logmath.c:456
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
logmath_t * lmath
Pointer to log math computation object.
Definition: fsg_model.h:99
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
Definition: bitvec.h:75
file IO related operations.
#define bitvec_free(v)
Free a bit vector.
Definition: bitvec.h:87
char * name
A unique string identifier for this FSG.
Definition: fsg_model.h:93
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
char ** vocab
Vocabulary for this FSG.
Definition: fsg_model.h:96