libak
0.4.0
|
Data Structures | |
struct | akProbPair |
A pair of log-probabilities. More... | |
struct | akFileList |
List of file names. More... | |
struct | akTar |
Tar files. More... | |
struct | akFea |
Feature vectors. More... | |
struct | akSeqs |
Character sequence manager. More... | |
struct | akBuffer |
Text buffer. More... | |
struct | akDict |
Dictionary of tokens. More... | |
struct | akCSeq |
Character sequence. More... | |
struct | akWGState |
Word graph state. More... | |
struct | akWGSegment |
Word graph segment. More... | |
struct | akWGEdge |
Word graph edge. More... | |
struct | akWGList |
List node. More... | |
struct | akWordGraph |
Word graph. More... | |
Defines | |
#define | ak_fea_get_real_data(FEA) ((float **) (FEA)->v) |
Gets a pointer to real data. | |
#define | ak_fea_load_fd(FEA, FD, ERR) (FEA)->_load ( (FEA), (FD), (ERR) ) |
Loads a feature vector sequence. | |
#define | ak_fea_print(FEA, TO) (FEA)->_print ( (FEA), (TO) ) |
Prints the content. | |
#define | ak_fea_write_fd(FEA, TO, BINARY, ERR) (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) ) |
Writes the content. | |
#define | ak_seqs_get_num_cseqs(SEQS) ((SEQS)->cseqs->N) |
Gets the number of different character sequences. | |
#define | ak_seqs_get_cseq(SEQS, ID) ((const akCSeq *) (SEQS)->cseqs->data[(ID)]) |
Gets a character sequence. | |
#define | ak_dict_get_token(DICT, ID) ((const char *) (DICT)->data[(ID)]) |
Gets the corresponding token. | |
Typedefs | |
typedef double | akFloat |
Type float. | |
typedef akFloat | akProb |
An alias of the type akFloat used to represent log-probabilities. | |
typedef unsigned char | akBool |
Boolean type. | |
Enumerations | |
enum | akFeaType { AK_FEA_BINARY, AK_FEA_REAL } |
Type of features. More... | |
Functions | |
akProb | ak_atop (const char *str) |
From ASCII to Prob. | |
void | ak_prob_print (const akProb prob, FILE *to) |
Prints a probability. | |
void | ak_error (const char *format,...) |
Terminates the application. | |
void | ak_warning (const char *format,...) |
Warning message. | |
akFileList * | ak_filelist_new_from_file (const char *file_name) |
Creates a new file list from a text file. | |
void | ak_filelist_free (akFileList *filelist) |
Frees memory. | |
void | ak_tar_free (akTar *tar) |
Frees memory. | |
FILE * | ak_tar_get_file (akTar *tar, const char *name, char **err) |
Returns a file descriptor to desired file. | |
akTar * | ak_tar_new (const char *file_name, char **err) |
Creates a new akTar. | |
void | ak_fea_free (akFea *fea) |
Frees memory. | |
char ** | ak_fea_get_binary_data (akFea *fea) |
Gets a pointer to binary data. | |
int | ak_fea_load (akFea *fea, const char *file_name, char **err) |
Loads a feature vector sequence. | |
akFea * | ak_fea_new (const akFeaType type) |
Creates a new feature vectors manager. | |
void | ak_fea_resize (akFea *fea, const int dim, const int nvecs) |
Resizes memory. | |
int | ak_fea_write (const akFea *fea, const char *file_name, const akBool binary, char **err) |
Writes the content. | |
void | ak_seqs_adjust (akSeqs *seqs) |
Adjusts memory. | |
size_t | ak_seqs_append (akSeqs *seqs, const akCSeq *cseq) |
Appends a new character sequence. | |
void | ak_seqs_free (akSeqs *seqs) |
Frees memory. | |
akSeqs * | ak_seqs_load (const akFileList *filelist, const akDict *syms, char **err) |
Creates a new character sequence manager from files. | |
akSeqs * | ak_seqs_load_words (const akFileList *filelist, const akDict *syms, char **err) |
Creates a new character sequence manager from files containing words. | |
akSeqs * | ak_seqs_new (void) |
Creates a new character sequence manager. | |
akBuffer * | ak_buffer_new () |
Creates a new buffer. | |
void | ak_buffer_free (akBuffer *buffer) |
Frees memory. | |
int | ak_gline (FILE *f, akBuffer *buffer) |
Reads line. | |
int | ak_gtoken (FILE *f, akBuffer *buffer) |
Reads token. | |
int | ak_gtokenstr (char *str, char **begin, char **end) |
Reads token from string. | |
akDict * | ak_dict_new () |
Creates a new dictionary. | |
int | ak_dict_insert (akDict *dict, const void *token, size_t *id) |
Inserts a token. | |
size_t | ak_dict_find (const akDict *dict, const void *token) |
Searches a token. | |
void | ak_dict_free (akDict *dict) |
Frees memory. | |
akCSeq * | ak_cseq_copy (const akCSeq *cs) |
Create a new character sequence from other one. | |
void | ak_cseq_free (akCSeq *cseq) |
Frees memory. | |
akCSeq * | ak_cseq_new (const size_t length) |
Creates a new empty character sequence. | |
akCSeq * | ak_cseq_new_from_word (const char *word, const akDict *syms, char **err) |
Creates a new character sequence from a word. | |
void | ak_cseq_print (const akCSeq *cs, FILE *to, const akDict *syms, const char sep) |
Prints a character sequence. | |
void | ak_cseq_print_wildcards (const akCSeq *cs, FILE *to, const akDict *syms, const char sep, const size_t first, const size_t last) |
Prints a character sequence with wildcards. | |
void | ak_wordgraph_free (akWordGraph *wg) |
Frees memory. | |
int | ak_wordgraph_load (akWordGraph **wg, FILE *from, const akDict *words, const akDict *syms, const char *end_sym, char **err) |
Loads a word graph. | |
void | ak_wordgraph_print (const akWordGraph *wg, FILE *to, const akDict *words, const akDict *syms, const char *end_sym) |
Prints the word graph. | |
void | ak_wordgraph_print_htk (const akWordGraph *wg, FILE *to, const akDict *words, const akDict *syms, const char *feaname, const char *start_sym, const char *end_sym) |
Prints the word graph using the HTK format. |
#define ak_dict_get_token | ( | DICT, | |
ID | |||
) | ((const char *) (DICT)->data[(ID)]) |
Gets the corresponding token.
Returns the mapped token to a given number. The number is supposed to be mapped to an existing token.
DICT | The dictionary. |
ID | The number of the requested token. |
#define ak_fea_get_real_data | ( | FEA | ) | ((float **) (FEA)->v) |
Gets a pointer to real data.
This macro gets a pointer to the feature vectors manager data. The data can be directly modified by accessing as a C matrix of floats [N][D], where N is the feature vector index, and D is the selected dimension. This macros is only valid to AK_FEA_REAL feature vectors managers.
FEA | The feature vectors manager. |
#define ak_fea_load_fd | ( | FEA, | |
FD, | |||
ERR | |||
) | (FEA)->_load ( (FEA), (FD), (ERR) ) |
Loads a feature vector sequence.
This macro loads into the feature vector manager a feature vector sequence stored in the given file. The feature vector sequence must be of the same type than the feature vector manager. Binary feature vector sequences are expected to be in PBM image, where each column is a binary feature vector.
FEA | The feature vectors manager. |
FD | The file where a feature vector sequence is stored. |
ERR | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
#define ak_fea_print | ( | FEA, | |
TO | |||
) | (FEA)->_print ( (FEA), (TO) ) |
Prints the content.
This macro prints the content in text representation. The feature vectors manager must be loaded, in other case an unexpected error can happen.
FEA | The feature vectors manager. |
TO | File where content is written. |
#define ak_fea_write_fd | ( | FEA, | |
TO, | |||
BINARY, | |||
ERR | |||
) | (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) ) |
Writes the content.
This macro writes the content into a file descriptor. The feature vectors manager must be loaded, in other case an unexpected error can happen.
FEA | The feature vectors manager. |
TO | File where content is written. |
BINARY | Specifies whether the content must be written using the binary or the text representation. |
ERR | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
#define ak_seqs_get_cseq | ( | SEQS, | |
ID | |||
) | ((const akCSeq *) (SEQS)->cseqs->data[(ID)]) |
Gets a character sequence.
This macro returns, from the provided character sequence manager, a reference to the character sequence encoded as ID.
SEQS | The character sequence manager. |
ID | An integer encoding a character sequence. |
#define ak_seqs_get_num_cseqs | ( | SEQS | ) | ((SEQS)->cseqs->N) |
Gets the number of different character sequences.
This macro return the number of different character sequences encoded in the character sequence manager. Character sequences are always encoded as consecutively number starting with the 0.
SEQS | The character sequence manager. |
typedef double akFloat |
Type float.
enum akFeaType |
From ASCII to Prob.
Converts a string to akProb.
str | A pointer to string. Can not be NULL. |
void ak_buffer_free | ( | akBuffer * | buffer | ) |
Frees memory.
Frees the memory allocated for the buffer.
buffer | The buffer to be freed. |
akBuffer* ak_buffer_new | ( | ) |
Creates a new buffer.
akCSeq* ak_cseq_copy | ( | const akCSeq * | cs | ) |
Create a new character sequence from other one.
This function creates a new character sequence with the same length and content of the provided character sequence.
cs | The character sequence to be copied. |
void ak_cseq_free | ( | akCSeq * | cseq | ) |
Frees memory.
Frees the memory allocated for the character sequence.
akCSeq* ak_cseq_new | ( | const size_t | length | ) |
Creates a new empty character sequence.
This function creates a new character sequence of the specified length, and sets all symbol identifiers to 0.
length | The length of the new character sequence. Must be greater than 0. |
akCSeq* ak_cseq_new_from_word | ( | const char * | word, |
const akDict * | syms, | ||
char ** | err | ||
) |
Creates a new character sequence from a word.
This function split the provided word into UTF-8 characters, and using the provided symbol dictionary, creates a new character sequence containing the encoded word.
word | A string containing a UTF-8 word. |
syms | A dictionary used to convert UTF-8 characters to numbers. |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
void ak_cseq_print | ( | const akCSeq * | cs, |
FILE * | to, | ||
const akDict * | syms, | ||
const char | sep | ||
) |
Prints a character sequence.
This function writes in the given file the character sequence. The character sequence is decoded using the given character dictionary. If sep is set to '\0' the sequence is printed without spaces between characters.
cs | The character sequence. |
to | File where the sequence is written. |
syms | Dictionary with the character symbols. |
sep | Character used as separator. '\0' means no separator. |
void ak_cseq_print_wildcards | ( | const akCSeq * | cs, |
FILE * | to, | ||
const akDict * | syms, | ||
const char | sep, | ||
const size_t | first, | ||
const size_t | last | ||
) |
Prints a character sequence with wildcards.
This function writes in the given file the character sequence, but replacing the first and last characters, which are assumed to be wildcards, with the given characters. The character sequence is decoded using the given character dictionary. If sep is set to '\0' the sequence is printed without spaces between characters.
cs | The character sequence. |
to | File where the sequence is written. |
syms | Dictionary with the character symbols. |
sep | Character used as separator. '\0' means no separator. |
first | First character. |
last | Last character. |
size_t ak_dict_find | ( | const akDict * | dict, |
const void * | token | ||
) |
Searches a token.
Returns the number mapped to the token, or dict->N if the token is not inserted in the dictionary.
dict | The dictionary. |
token | A C string (char *) containing the token. |
void ak_dict_free | ( | akDict * | dict | ) |
Frees memory.
Frees the memory allocated for the dictionary.
dict | The dictionary. |
int ak_dict_insert | ( | akDict * | dict, |
const void * | token, | ||
size_t * | id | ||
) |
Inserts a token.
Tries to insert a token into the dictionary. If the token already exists then it is not inserted. In any case the number mapped to the token is stored in id.
The first token inserted into the dictionary is mapped to 0, the second is mapped to 1, and so on.
dict | The dictionary. |
token | A C string (char *) containing the token. |
id | A pointer to a variable of type 'size_t'. The mapped number is stored in this variable. |
akDict* ak_dict_new | ( | ) |
Creates a new dictionary.
void ak_error | ( | const char * | format, |
... | |||
) |
Terminates the application.
Terminates the application with an error exit status, and shows an error message by the standard error.
format | Format of the error message. |
void ak_fea_free | ( | akFea * | fea | ) |
Frees memory.
Frees the memory allocated for the feature vectors manager.
fea | The feature vectors manager. |
char** ak_fea_get_binary_data | ( | akFea * | fea | ) |
Gets a pointer to binary data.
This function gets a pointer to the feature vectors manager data. The data can be directly modified by accessing as a C matrix of chars [N][D], where N is the feature vector index, and D is the selected dimension. This macros is only valid to AK_FEA_BINARY feature vectors managers. The only allowed values are 0 and 1, other values could produce undesirable behavior. Once the feature vector manager has been used, this function must be called again if we want to modify the values again.
fea | The feature vectors manager. |
int ak_fea_load | ( | akFea * | fea, |
const char * | file_name, | ||
char ** | err | ||
) |
Loads a feature vector sequence.
This function does the same than ak_fea_load_fd. The difference is that in this function the name of the file is provided instead of the file descriptor.
fea | The feature vectors manager. |
file_name | The name of the file where a feature vector sequence is stored. |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
akFea* ak_fea_new | ( | const akFeaType | type | ) |
Creates a new feature vectors manager.
This functions creates a new manager for feature vectors of the provided type.
type | The type of the feature vectors. |
void ak_fea_resize | ( | akFea * | fea, |
const int | dim, | ||
const int | nvecs | ||
) |
Resizes memory.
This function resizes the memory used by the manager to store the sample data. If the current capacity is enough this function does nothing.
fea | The feature vectors manager. |
dim | The required feature vector dimension. |
nvecs | The required number of feature vectors. |
int ak_fea_write | ( | const akFea * | fea, |
const char * | file_name, | ||
const akBool | binary, | ||
char ** | err | ||
) |
Writes the content.
This macro writes the content into a file The feature vectors manager must be loaded, in other case an unexpected error can happen.
fea | The feature vectors manager. |
file_name | Name of the file where content must be written. |
binary | Specifies whether the content must be written using the binary or the text representation. |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
void ak_filelist_free | ( | akFileList * | filelist | ) |
Frees memory.
Frees the memory allocated for the file list.
akFileList* ak_filelist_new_from_file | ( | const char * | file_name | ) |
Creates a new file list from a text file.
This function reads the content of a text file, where each line is the name of a file, and loads it into a new file list.
file_name | File name with the file list. |
Reads line.
This function reads the next line from file into the provided text buffer. Line separators are ignored.
f | Input file descriptor. |
buffer | Text buffer where the line is read. |
Reads token.
This function reads the next token from file into the provided text buffer. Blank separators are ignored.
f | Input file descriptor. |
buffer | Text buffer where the token is read. |
int ak_gtokenstr | ( | char * | str, |
char ** | begin, | ||
char ** | end | ||
) |
Reads token from string.
This function reads the next token from a given string.
str | Input string. |
begin | A pointer to string, where the address of the first character of the token is stored, or NULL if there is no token in str. |
end | A pointer to string, where, if it exists, the address of the next character to the token is stored. |
void ak_prob_print | ( | const akProb | prob, |
FILE * | to | ||
) |
Prints a probability.
Prints a probability in the specified file. The format depends on the current locale.
prob | Probability. |
to | File where the probability is printed. |
void ak_seqs_adjust | ( | akSeqs * | seqs | ) |
Adjusts memory.
This function adjust the memory related to the field 'seqs'.
seqs | The character sequence manager. |
size_t ak_seqs_append | ( | akSeqs * | seqs, |
const akCSeq * | cseq | ||
) |
Appends a new character sequence.
This function appends a new character sequence to the character sequence manager, the character sequence may be repeated, and returns the identifier of the inserted character sequence.
seqs | The character sequence manager. |
cseq | The new character sequence. |
void ak_seqs_free | ( | akSeqs * | seqs | ) |
Frees memory.
Frees the memory allocated for the character sequence manager.
seqs | The character sequence manager. |
akSeqs* ak_seqs_load | ( | const akFileList * | filelist, |
const akDict * | syms, | ||
char ** | err | ||
) |
Creates a new character sequence manager from files.
This function creates a new sequence manager and loads all the character sequences from the provided akFileList. Each file is supposed to contain a character sequence encoded as a sequence of tokens separated by blank characters. Sequences are loaded respecting the original order in the akFileList. The provided character dictionary is used to encode the character tokens.
filelist | The list with the files containing the character sequences. |
syms | The dictionary with the character tokens. |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
akSeqs* ak_seqs_load_words | ( | const akFileList * | filelist, |
const akDict * | syms, | ||
char ** | err | ||
) |
Creates a new character sequence manager from files containing words.
This function works as ak_seqs_load, but the character sequences are supposed to be words encoded using UTF-8.
filelist | The list with the files containing the character sequences. |
syms | The dictionary with the characters. |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
akSeqs* ak_seqs_new | ( | void | ) |
Creates a new character sequence manager.
This function creates a new empty character sequence manager.
void ak_tar_free | ( | akTar * | tar | ) |
FILE* ak_tar_get_file | ( | akTar * | tar, |
const char * | name, | ||
char ** | err | ||
) |
Returns a file descriptor to desired file.
This function returns the file descriptor of the tar file, seeked to beginning of the desired file. Therefore, the file descriptor must not be closed.
tar | The akTar structure. |
name | The name of the desired file. |
err | Pointer to string variable. If not NULL an error * message is allocated in the variable in case of error. |
akTar* ak_tar_new | ( | const char * | file_name, |
char ** | err | ||
) |
void ak_warning | ( | const char * | format, |
... | |||
) |
Warning message.
Shows a warning message by the standard error.
format | Format of the warning message. |
void ak_wordgraph_free | ( | akWordGraph * | wg | ) |
Frees memory.
Frees the memory allocated for the word graph.
wg | The word graph. |
int ak_wordgraph_load | ( | akWordGraph ** | wg, |
FILE * | from, | ||
const akDict * | words, | ||
const akDict * | syms, | ||
const char * | end_sym, | ||
char ** | err | ||
) |
Loads a word graph.
Loads a word graph from a text description.
wg | Pointer to the memory address where the word graph will be loaded. If points to NULL a new word graph will be created. |
from | File where the text description is stored. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. Only required in case of words graphs with segments. |
end_sym | Token used to represent the special final word. NULL is equivalent to "</s>". |
err | Pointer to string variable. If not NULL an error message is allocated in the variable in case of error. |
void ak_wordgraph_print | ( | const akWordGraph * | wg, |
FILE * | to, | ||
const akDict * | words, | ||
const akDict * | syms, | ||
const char * | end_sym | ||
) |
Prints the word graph.
This function writes in the given file a text representation of the word graph. The provided word dictionary is supposed to contain all needed words, in another case an unexpected error could happen.
wg | The word graph. |
to | File where the word graph is written. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. No required if the word graph doesn't contain segments. |
end_sym | Token used to print the special final word. NULL is equivalent to "</s>". |
void ak_wordgraph_print_htk | ( | const akWordGraph * | wg, |
FILE * | to, | ||
const akDict * | words, | ||
const akDict * | syms, | ||
const char * | feaname, | ||
const char * | start_sym, | ||
const char * | end_sym | ||
) |
Prints the word graph using the HTK format.
This function writes in the given file a text representation of the word graph using the HTK format. The provided word dictionary is supposed to contain all needed words, in another case an unexpected error could happen.
wg | The word graph. |
to | File where the word graph is written. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. No required if the word graph doesn't contain segments. |
feaname | A string with the name of the feature related to the word graph. |
start_sym | Token used to print the special initial word. NULL is equivalent to "<s>". |
end_sym | Token used to print the special final word. NULL is equivalent to "</s>". |