Public Attributes | List of all members
lm_s Struct Reference

#include <lm.h>

Public Attributes

char * name
 
int32 n_ug
 
int32 n_bg
 
int32 n_tg
 
int32 max_ug
 
int32 n_ng
 
char ** wordstr
 
uint32 log_bg_seg_sz
 
uint32 bg_seg_sz
 
ug_tug
 
s3lmwid32_tdict2lmwid
 
s3lmwid32_t startlwid
 
s3lmwid32_t finishlwid
 
bg_tbg
 
tg_ttg
 
membg_tmembg
 
tginfo_t ** tginfo
 
lm_tgcache_entry_ttgcache
 
bg32_tbg32
 
tg32_ttg32
 
membg32_tmembg32
 
tginfo32_t ** tginfo32
 
lm_tgcache_entry32_ttgcache32
 
lmlog_tbgprob
 
lmlog_ttgprob
 
lmlog_ttgbowt
 
int32 * tg_segbase
 
int32 n_bgprob
 
int32 n_tgprob
 
int32 n_tgbowt
 
FILE * fp
 
int32 byteswap
 
int32 bgoff
 
int32 tgoff
 
float32 lw
 
int32 wip
 
int32 n_bg_fill
 
int32 n_bg_inmem
 
int32 n_bg_score
 
int32 n_bg_bo
 
int32 n_tg_fill
 
int32 n_tg_inmem
 
int32 n_tg_score
 
int32 n_tg_bo
 
int32 n_tgcache_hit
 
int32 access_type
 
int32 isLM_IN_MEMORY
 
int32 dict_size
 
hash_table_t * HT
 
lmclass_t ** lmclass
 
int32 n_lmclass
 
int32 * inclass_ugscore
 
int32 inputenc
 
int32 outputenc
 
int32 version
 
int32 is32bits
 
sorted_list_t sorted_prob2
 
sorted_list_t sorted_bowt2
 
sorted_list_t sorted_prob3
 
int32 max_sorted_entries
 
logmath_t * logmath
 

Member Data Documentation

◆ access_type

int32 lm_s::access_type

Updated on every lm_{tg,bg,ug}_score call to reflect the kind of n-gram accessed: 3 for 3-gram, 2 for 2-gram and 1 for 1-gram

◆ bg

bg_t* lm_s::bg

NULL iff disk-based

◆ bg32

bg32_t* lm_s::bg32

Bigram 32 bits, NULL iff disk-based

◆ bg_seg_sz

uint32 lm_s::bg_seg_sz

◆ bgoff

int32 lm_s::bgoff

BG offsets into DMP file (used iff disk-based)

◆ bgprob

lmlog_t* lm_s::bgprob

tgcache 32 bits Table of actual bigram probs

◆ byteswap

int32 lm_s::byteswap

Whether this file is in the WRONG byte order

◆ dict2lmwid

s3lmwid32_t* lm_s::dict2lmwid

a mapping from dictionary word to LM word

◆ dict_size

int32 lm_s::dict_size

Only used in class-based LM, because class-based LM is addressed in the dictionary space.

◆ finishlwid

s3lmwid32_t lm_s::finishlwid

S3_FINISH_WORD id, if it exists

◆ fp

FILE* lm_s::fp

◆ HT

hash_table_t* lm_s::HT

hash table for word-string->word-id map

◆ inclass_ugscore

int32* lm_s::inclass_ugscore

An array of inter-class unigram probability

◆ inputenc

int32 lm_s::inputenc

Input encoding method

◆ is32bits

int32 lm_s::is32bits

Whether the current LM is 32 bits or not. Derived from version and n_ug

◆ isLM_IN_MEMORY

int32 lm_s::isLM_IN_MEMORY

Whether LM in in memory, it is a property, potentially it means the code could allow you some model to be disk-based, some are not.

◆ lmclass

lmclass_t** lm_s::lmclass

LM class for this LM

◆ log_bg_seg_sz

uint32 lm_s::log_bg_seg_sz

See big comment above

◆ logmath

logmath_t* lm_s::logmath

◆ lw

float32 lm_s::lw

Language weight currently in effect for this LM

◆ max_sorted_entries

int32 lm_s::max_sorted_entries

Temporary Variable: 2x the maximum size of the MAX_SORTED_ENTRIES

◆ max_ug

int32 lm_s::max_ug

To which n_ug can grow with dynamic addition of words

◆ membg

membg_t* lm_s::membg

membg[w1] = bigrams for lm wid w1 (used iff disk-based)

◆ membg32

membg32_t* lm_s::membg32

membg 32bits membg[w1] = bigrams for lm wid w1 (used iff disk-based)

◆ n_bg

int32 lm_s::n_bg

#bigrams in entire LM

◆ n_bg_bo

int32 lm_s::n_bg_bo

#bg_score ops backed off to ug

◆ n_bg_fill

int32 lm_s::n_bg_fill

#bg fill operations

◆ n_bg_inmem

int32 lm_s::n_bg_inmem

#bg in memory

◆ n_bg_score

int32 lm_s::n_bg_score

#bg_score operations

◆ n_bgprob

int32 lm_s::n_bgprob

◆ n_lmclass

int32 lm_s::n_lmclass

# LM class

◆ n_ng

int32 lm_s::n_ng

if unigram, n_ng=1, if bigram n_bg=2 and so one

◆ n_tg

int32 lm_s::n_tg

#trigrams in entire LM

◆ n_tg_bo

int32 lm_s::n_tg_bo

#tg_score ops backed off to bg

◆ n_tg_fill

int32 lm_s::n_tg_fill

Similar stats for trigrams

◆ n_tg_inmem

int32 lm_s::n_tg_inmem

#tg in memory

◆ n_tg_score

int32 lm_s::n_tg_score

#tg_score operations

◆ n_tgbowt

int32 lm_s::n_tgbowt

◆ n_tgcache_hit

int32 lm_s::n_tgcache_hit

# of trigram cache hit ops backed off to bg

◆ n_tgprob

int32 lm_s::n_tgprob

◆ n_ug

int32 lm_s::n_ug

#unigrams in LM

◆ name

char* lm_s::name

The name of the LM

◆ outputenc

int32 lm_s::outputenc

Output encoding method

◆ sorted_bowt2

sorted_list_t lm_s::sorted_bowt2

Temporary Variable: Sorted list

◆ sorted_prob2

sorted_list_t lm_s::sorted_prob2

Temporary Variable: Sorted list

◆ sorted_prob3

sorted_list_t lm_s::sorted_prob3

Temporary Variable: Sorted list

◆ startlwid

s3lmwid32_t lm_s::startlwid

S3_START_WORD id, if it exists

◆ tg

tg_t* lm_s::tg

NULL iff disk-based

◆ tg32

tg32_t* lm_s::tg32

Trigram 32 bits NULL iff disk-based

◆ tg_segbase

int32* lm_s::tg_segbase

tg_segbase[i>>lm_t.log_bg_seg_sz] = index of 1st trigram for bigram segment (i>>lm_t.log_bg_seg_sz)

◆ tgbowt

lmlog_t* lm_s::tgbowt

Table of actual trigram backoff weights

◆ tgcache

lm_tgcache_entry_t* lm_s::tgcache

<w0,w1,w2> hashed to an entry into this array. Only the last trigram mapping to any * given hash entry is kept in that entry. (The cache doesn't have to be super-efficient.)

◆ tgcache32

lm_tgcache_entry32_t* lm_s::tgcache32

◆ tginfo

tginfo_t** lm_s::tginfo

tginfo[w2] = fast trigram access info for bigrams (*,w2)

◆ tginfo32

tginfo32_t** lm_s::tginfo32

tginfo 32bits tginfo[w2] = fast trigram access info for bigrams (*,w2)

◆ tgoff

int32 lm_s::tgoff

TG offsets into DMP file (used iff disk-based)

◆ tgprob

lmlog_t* lm_s::tgprob

Table of actual trigram probs

◆ ug

ug_t* lm_s::ug

Unigrams

◆ version

int32 lm_s::version

The version number of LM, in particular, this is the version that recently read in.

◆ wip

int32 lm_s::wip

logs3(word insertion penalty) in effect for this LM

◆ wordstr

char** lm_s::wordstr

The LM word list (in unigram order)


The documentation for this struct was generated from the following file: