#include <lm.h>
|
|
Updated on every lm_{tg,bg,ug}_score call to reflect the kind of n-gram accessed: 3 for 3-gram, 2 for 2-gram and 1 for 1-gram |
|
|
NULL iff disk-based |
|
|
Bigram 32 bits, NULL iff disk-based |
|
|
|
|
|
BG offsets into DMP file (used iff disk-based) |
|
|
Table of actual bigram probs |
|
|
Whether this file is in the WRONG byte order |
|
|
a mapping from dictionary word to LM word |
|
|
Only used in class-based LM, because class-based LM is addressed in the dictionary space. |
|
|
S3_FINISH_WORD id, if it exists |
|
|
|
|
|
hash table for word-string->word-id map |
|
|
An array of inter-class unigram probability |
|
|
Input encoding method |
|
|
Whether the current LM is 32 bits or not. Derived from version and n_ug |
|
|
Whether LM in in memory, it is a property, potentially it means the code could allow you some model to be disk-based, some are not. |
|
|
LM class for this LM |
|
|
See big comment above |
|
|
|
|
|
Language weight currently in effect for this LM |
|
|
Temporary Variable: 2x the maximum size of the MAX_SORTED_ENTRIES |
|
|
To which n_ug can grow with dynamic addition of words |
|
|
membg[w1] = bigrams for lm wid w1 (used iff disk-based) |
|
|
membg 32bits membg[w1] = bigrams for lm wid w1 (used iff disk-based) |
|
|
#bigrams in entire LM |
|
|
#bg_score ops backed off to ug |
|
|
bg fill operations |
|
|
bg in memory |
|
|
#bg_score operations |
|
|
|
|
|
# LM class |
|
|
if unigram, n_ng=1, if bigram n_bg=2 and so one |
|
|
#trigrams in entire LM |
|
|
#tg_score ops backed off to bg |
|
|
Similar stats for trigrams |
|
|
tg in memory |
|
|
#tg_score operations |
|
|
|
|
|
# of trigram cache hit ops backed off to bg |
|
|
|
|
|
#unigrams in LM |
|
|
The name of the LM |
|
|
Output encoding method |
|
|
Temporary Variable: Sorted list |
|
|
Temporary Variable: Sorted list |
|
|
Temporary Variable: Sorted list |
|
|
S3_START_WORD id, if it exists |
|
|
NULL iff disk-based |
|
|
Trigram 32 bits NULL iff disk-based |
|
|
tg_segbase[i>>lm_t.log_bg_seg_sz] = index of 1st trigram for bigram segment (i>>lm_t.log_bg_seg_sz) |
|
|
Table of actual trigram backoff weights |
|
|
<w0,w1,w2> hashed to an entry into this array. Only the last trigram mapping to any * given hash entry is kept in that entry. (The cache doesn't have to be super-efficient.) |
|
|
|
|
|
tginfo[w2] = fast trigram access info for bigrams (*,w2) |
|
|
tginfo 32bits tginfo[w2] = fast trigram access info for bigrams (*,w2) |
|
|
TG offsets into DMP file (used iff disk-based) |
|
|
Table of actual trigram probs |
|
|
Unigrams |
|
|
The version number of LM, in particular, this is the version that recently read in. |
|
|
logs3(word insertion penalty) in effect for this LM |
|
|
The LM word list (in unigram order) |
1.3.9.1