libak  0.4.0
Data Structures | Defines | Typedefs | Enumerations | Functions
Basic types and utilities

Data Structures

struct  akProbPair
 A pair of log-probabilities. More...
struct  akFileList
 List of file names. More...
struct  akTar
 Tar files. More...
struct  akFea
 Feature vectors. More...
struct  akSeqs
 Character sequence manager. More...
struct  akBuffer
 Text buffer. More...
struct  akDict
 Dictionary of tokens. More...
struct  akCSeq
 Character sequence. More...
struct  akWGState
 Word graph state. More...
struct  akWGSegment
 Word graph segment. More...
struct  akWGEdge
 Word graph edge. More...
struct  akWGList
 List node. More...
struct  akWordGraph
 Word graph. More...

Defines

#define ak_fea_get_real_data(FEA)   ((float **) (FEA)->v)
 Gets a pointer to real data.
#define ak_fea_load_fd(FEA, FD, ERR)   (FEA)->_load ( (FEA), (FD), (ERR) )
 Loads a feature vector sequence.
#define ak_fea_print(FEA, TO)   (FEA)->_print ( (FEA), (TO) )
 Prints the content.
#define ak_fea_write_fd(FEA, TO, BINARY, ERR)   (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) )
 Writes the content.
#define ak_seqs_get_num_cseqs(SEQS)   ((SEQS)->cseqs->N)
 Gets the number of different character sequences.
#define ak_seqs_get_cseq(SEQS, ID)   ((const akCSeq *) (SEQS)->cseqs->data[(ID)])
 Gets a character sequence.
#define ak_dict_get_token(DICT, ID)   ((const char *) (DICT)->data[(ID)])
 Gets the corresponding token.

Typedefs

typedef double akFloat
 Type float.
typedef akFloat akProb
 An alias of the type akFloat used to represent log-probabilities.
typedef unsigned char akBool
 Boolean type.

Enumerations

enum  akFeaType { AK_FEA_BINARY, AK_FEA_REAL }
 Type of features. More...

Functions

akProb ak_atop (const char *str)
 From ASCII to Prob.
void ak_prob_print (const akProb prob, FILE *to)
 Prints a probability.
void ak_error (const char *format,...)
 Terminates the application.
void ak_warning (const char *format,...)
 Warning message.
akFileListak_filelist_new_from_file (const char *file_name)
 Creates a new file list from a text file.
void ak_filelist_free (akFileList *filelist)
 Frees memory.
void ak_tar_free (akTar *tar)
 Frees memory.
FILE * ak_tar_get_file (akTar *tar, const char *name, char **err)
 Returns a file descriptor to desired file.
akTarak_tar_new (const char *file_name, char **err)
 Creates a new akTar.
void ak_fea_free (akFea *fea)
 Frees memory.
char ** ak_fea_get_binary_data (akFea *fea)
 Gets a pointer to binary data.
int ak_fea_load (akFea *fea, const char *file_name, char **err)
 Loads a feature vector sequence.
akFeaak_fea_new (const akFeaType type)
 Creates a new feature vectors manager.
void ak_fea_resize (akFea *fea, const int dim, const int nvecs)
 Resizes memory.
int ak_fea_write (const akFea *fea, const char *file_name, const akBool binary, char **err)
 Writes the content.
void ak_seqs_adjust (akSeqs *seqs)
 Adjusts memory.
size_t ak_seqs_append (akSeqs *seqs, const akCSeq *cseq)
 Appends a new character sequence.
void ak_seqs_free (akSeqs *seqs)
 Frees memory.
akSeqsak_seqs_load (const akFileList *filelist, const akDict *syms, char **err)
 Creates a new character sequence manager from files.
akSeqsak_seqs_load_words (const akFileList *filelist, const akDict *syms, char **err)
 Creates a new character sequence manager from files containing words.
akSeqsak_seqs_new (void)
 Creates a new character sequence manager.
akBufferak_buffer_new ()
 Creates a new buffer.
void ak_buffer_free (akBuffer *buffer)
 Frees memory.
int ak_gline (FILE *f, akBuffer *buffer)
 Reads line.
int ak_gtoken (FILE *f, akBuffer *buffer)
 Reads token.
int ak_gtokenstr (char *str, char **begin, char **end)
 Reads token from string.
akDictak_dict_new ()
 Creates a new dictionary.
int ak_dict_insert (akDict *dict, const void *token, size_t *id)
 Inserts a token.
size_t ak_dict_find (const akDict *dict, const void *token)
 Searches a token.
void ak_dict_free (akDict *dict)
 Frees memory.
akCSeqak_cseq_copy (const akCSeq *cs)
 Create a new character sequence from other one.
void ak_cseq_free (akCSeq *cseq)
 Frees memory.
akCSeqak_cseq_new (const size_t length)
 Creates a new empty character sequence.
akCSeqak_cseq_new_from_word (const char *word, const akDict *syms, char **err)
 Creates a new character sequence from a word.
void ak_cseq_print (const akCSeq *cs, FILE *to, const akDict *syms, const char sep)
 Prints a character sequence.
void ak_cseq_print_wildcards (const akCSeq *cs, FILE *to, const akDict *syms, const char sep, const size_t first, const size_t last)
 Prints a character sequence with wildcards.
void ak_wordgraph_free (akWordGraph *wg)
 Frees memory.
int ak_wordgraph_load (akWordGraph **wg, FILE *from, const akDict *words, const akDict *syms, const char *end_sym, char **err)
 Loads a word graph.
void ak_wordgraph_print (const akWordGraph *wg, FILE *to, const akDict *words, const akDict *syms, const char *end_sym)
 Prints the word graph.
void ak_wordgraph_print_htk (const akWordGraph *wg, FILE *to, const akDict *words, const akDict *syms, const char *feaname, const char *start_sym, const char *end_sym)
 Prints the word graph using the HTK format.

Define Documentation

#define ak_dict_get_token (   DICT,
  ID 
)    ((const char *) (DICT)->data[(ID)])

Gets the corresponding token.

Returns the mapped token to a given number. The number is supposed to be mapped to an existing token.

Parameters:
DICTThe dictionary.
IDThe number of the requested token.
Returns:
The token.
#define ak_fea_get_real_data (   FEA)    ((float **) (FEA)->v)

Gets a pointer to real data.

This macro gets a pointer to the feature vectors manager data. The data can be directly modified by accessing as a C matrix of floats [N][D], where N is the feature vector index, and D is the selected dimension. This macros is only valid to AK_FEA_REAL feature vectors managers.

Parameters:
FEAThe feature vectors manager.
Returns:
A pointer (float **) to the real data.
#define ak_fea_load_fd (   FEA,
  FD,
  ERR 
)    (FEA)->_load ( (FEA), (FD), (ERR) )

Loads a feature vector sequence.

This macro loads into the feature vector manager a feature vector sequence stored in the given file. The feature vector sequence must be of the same type than the feature vector manager. Binary feature vector sequences are expected to be in PBM image, where each column is a binary feature vector.

Parameters:
FEAThe feature vectors manager.
FDThe file where a feature vector sequence is stored.
ERRPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 in other case.
#define ak_fea_print (   FEA,
  TO 
)    (FEA)->_print ( (FEA), (TO) )

Prints the content.

This macro prints the content in text representation. The feature vectors manager must be loaded, in other case an unexpected error can happen.

Parameters:
FEAThe feature vectors manager.
TOFile where content is written.
#define ak_fea_write_fd (   FEA,
  TO,
  BINARY,
  ERR 
)    (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) )

Writes the content.

This macro writes the content into a file descriptor. The feature vectors manager must be loaded, in other case an unexpected error can happen.

Parameters:
FEAThe feature vectors manager.
TOFile where content is written.
BINARYSpecifies whether the content must be written using the binary or the text representation.
ERRPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 in other case.
#define ak_seqs_get_cseq (   SEQS,
  ID 
)    ((const akCSeq *) (SEQS)->cseqs->data[(ID)])

Gets a character sequence.

This macro returns, from the provided character sequence manager, a reference to the character sequence encoded as ID.

Parameters:
SEQSThe character sequence manager.
IDAn integer encoding a character sequence.
Returns:
A reference to the required character sequence.
#define ak_seqs_get_num_cseqs (   SEQS)    ((SEQS)->cseqs->N)

Gets the number of different character sequences.

This macro return the number of different character sequences encoded in the character sequence manager. Character sequences are always encoded as consecutively number starting with the 0.

Parameters:
SEQSThe character sequence manager.
Returns:
The number of different character sequences into the manager.

Typedef Documentation

typedef double akFloat

Type float.


Enumeration Type Documentation

enum akFeaType

Type of features.

Enumerator:
AK_FEA_BINARY 

Binary feature.

AK_FEA_REAL 

Real feature.


Function Documentation

akProb ak_atop ( const char *  str)

From ASCII to Prob.

Converts a string to akProb.

Parameters:
strA pointer to string. Can not be NULL.
Returns:
The converted value.
void ak_buffer_free ( akBuffer buffer)

Frees memory.

Frees the memory allocated for the buffer.

Parameters:
bufferThe buffer to be freed.

Creates a new buffer.

Returns:
The new buffer.
akCSeq* ak_cseq_copy ( const akCSeq cs)

Create a new character sequence from other one.

This function creates a new character sequence with the same length and content of the provided character sequence.

Parameters:
csThe character sequence to be copied.
Returns:
The new character sequence.
void ak_cseq_free ( akCSeq cseq)

Frees memory.

Frees the memory allocated for the character sequence.

akCSeq* ak_cseq_new ( const size_t  length)

Creates a new empty character sequence.

This function creates a new character sequence of the specified length, and sets all symbol identifiers to 0.

Parameters:
lengthThe length of the new character sequence. Must be greater than 0.
Returns:
The new character sequence.
akCSeq* ak_cseq_new_from_word ( const char *  word,
const akDict syms,
char **  err 
)

Creates a new character sequence from a word.

This function split the provided word into UTF-8 characters, and using the provided symbol dictionary, creates a new character sequence containing the encoded word.

Parameters:
wordA string containing a UTF-8 word.
symsA dictionary used to convert UTF-8 characters to numbers.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
The new character sequence, or NULL in case of error.
void ak_cseq_print ( const akCSeq cs,
FILE *  to,
const akDict syms,
const char  sep 
)

Prints a character sequence.

This function writes in the given file the character sequence. The character sequence is decoded using the given character dictionary. If sep is set to '\0' the sequence is printed without spaces between characters.

Parameters:
csThe character sequence.
toFile where the sequence is written.
symsDictionary with the character symbols.
sepCharacter used as separator. '\0' means no separator.
void ak_cseq_print_wildcards ( const akCSeq cs,
FILE *  to,
const akDict syms,
const char  sep,
const size_t  first,
const size_t  last 
)

Prints a character sequence with wildcards.

This function writes in the given file the character sequence, but replacing the first and last characters, which are assumed to be wildcards, with the given characters. The character sequence is decoded using the given character dictionary. If sep is set to '\0' the sequence is printed without spaces between characters.

Parameters:
csThe character sequence.
toFile where the sequence is written.
symsDictionary with the character symbols.
sepCharacter used as separator. '\0' means no separator.
firstFirst character.
lastLast character.
size_t ak_dict_find ( const akDict dict,
const void *  token 
)

Searches a token.

Returns the number mapped to the token, or dict->N if the token is not inserted in the dictionary.

Parameters:
dictThe dictionary.
tokenA C string (char *) containing the token.
Returns:
The number mapped to the token, or dict->N if the token it not found.
void ak_dict_free ( akDict dict)

Frees memory.

Frees the memory allocated for the dictionary.

Parameters:
dictThe dictionary.
int ak_dict_insert ( akDict dict,
const void *  token,
size_t *  id 
)

Inserts a token.

Tries to insert a token into the dictionary. If the token already exists then it is not inserted. In any case the number mapped to the token is stored in id.

The first token inserted into the dictionary is mapped to 0, the second is mapped to 1, and so on.

Parameters:
dictThe dictionary.
tokenA C string (char *) containing the token.
idA pointer to a variable of type 'size_t'. The mapped number is stored in this variable.
Returns:
Returns 1 if the token has been inserted, 0 if the token is already in the dictionary.

Creates a new dictionary.

Returns:
The new dictionary.
void ak_error ( const char *  format,
  ... 
)

Terminates the application.

Terminates the application with an error exit status, and shows an error message by the standard error.

Parameters:
formatFormat of the error message.
void ak_fea_free ( akFea fea)

Frees memory.

Frees the memory allocated for the feature vectors manager.

Parameters:
feaThe feature vectors manager.
char** ak_fea_get_binary_data ( akFea fea)

Gets a pointer to binary data.

This function gets a pointer to the feature vectors manager data. The data can be directly modified by accessing as a C matrix of chars [N][D], where N is the feature vector index, and D is the selected dimension. This macros is only valid to AK_FEA_BINARY feature vectors managers. The only allowed values are 0 and 1, other values could produce undesirable behavior. Once the feature vector manager has been used, this function must be called again if we want to modify the values again.

Parameters:
feaThe feature vectors manager.
Returns:
A pointer to the binary data.
int ak_fea_load ( akFea fea,
const char *  file_name,
char **  err 
)

Loads a feature vector sequence.

This function does the same than ak_fea_load_fd. The difference is that in this function the name of the file is provided instead of the file descriptor.

Parameters:
feaThe feature vectors manager.
file_nameThe name of the file where a feature vector sequence is stored.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 in other case.
akFea* ak_fea_new ( const akFeaType  type)

Creates a new feature vectors manager.

This functions creates a new manager for feature vectors of the provided type.

Parameters:
typeThe type of the feature vectors.
Returns:
The new feature vectors manager.
void ak_fea_resize ( akFea fea,
const int  dim,
const int  nvecs 
)

Resizes memory.

This function resizes the memory used by the manager to store the sample data. If the current capacity is enough this function does nothing.

Parameters:
feaThe feature vectors manager.
dimThe required feature vector dimension.
nvecsThe required number of feature vectors.
int ak_fea_write ( const akFea fea,
const char *  file_name,
const akBool  binary,
char **  err 
)

Writes the content.

This macro writes the content into a file The feature vectors manager must be loaded, in other case an unexpected error can happen.

Parameters:
feaThe feature vectors manager.
file_nameName of the file where content must be written.
binarySpecifies whether the content must be written using the binary or the text representation.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 in other case.
void ak_filelist_free ( akFileList filelist)

Frees memory.

Frees the memory allocated for the file list.

akFileList* ak_filelist_new_from_file ( const char *  file_name)

Creates a new file list from a text file.

This function reads the content of a text file, where each line is the name of a file, and loads it into a new file list.

Parameters:
file_nameFile name with the file list.
Returns:
The new file list, or NULL if the file can not be opened. In that case errno is set by the lib C.
int ak_gline ( FILE *  f,
akBuffer buffer 
)

Reads line.

This function reads the next line from file into the provided text buffer. Line separators are ignored.

Parameters:
fInput file descriptor.
bufferText buffer where the line is read.
Returns:
0 if no line has been read.
int ak_gtoken ( FILE *  f,
akBuffer buffer 
)

Reads token.

This function reads the next token from file into the provided text buffer. Blank separators are ignored.

Parameters:
fInput file descriptor.
bufferText buffer where the token is read.
Returns:
0 if no token has been read.
int ak_gtokenstr ( char *  str,
char **  begin,
char **  end 
)

Reads token from string.

This function reads the next token from a given string.

Parameters:
strInput string.
beginA pointer to string, where the address of the first character of the token is stored, or NULL if there is no token in str.
endA pointer to string, where, if it exists, the address of the next character to the token is stored.
Returns:
1 if the input string has been fully processed, 0 in other case.
void ak_prob_print ( const akProb  prob,
FILE *  to 
)

Prints a probability.

Prints a probability in the specified file. The format depends on the current locale.

Parameters:
probProbability.
toFile where the probability is printed.
void ak_seqs_adjust ( akSeqs seqs)

Adjusts memory.

This function adjust the memory related to the field 'seqs'.

Parameters:
seqsThe character sequence manager.
size_t ak_seqs_append ( akSeqs seqs,
const akCSeq cseq 
)

Appends a new character sequence.

This function appends a new character sequence to the character sequence manager, the character sequence may be repeated, and returns the identifier of the inserted character sequence.

Parameters:
seqsThe character sequence manager.
cseqThe new character sequence.
Returns:
The identifier of the inserted character sequence.
void ak_seqs_free ( akSeqs seqs)

Frees memory.

Frees the memory allocated for the character sequence manager.

Parameters:
seqsThe character sequence manager.
akSeqs* ak_seqs_load ( const akFileList filelist,
const akDict syms,
char **  err 
)

Creates a new character sequence manager from files.

This function creates a new sequence manager and loads all the character sequences from the provided akFileList. Each file is supposed to contain a character sequence encoded as a sequence of tokens separated by blank characters. Sequences are loaded respecting the original order in the akFileList. The provided character dictionary is used to encode the character tokens.

Parameters:
filelistThe list with the files containing the character sequences.
symsThe dictionary with the character tokens.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
The new character sequence manager, or NULL in case of error.
akSeqs* ak_seqs_load_words ( const akFileList filelist,
const akDict syms,
char **  err 
)

Creates a new character sequence manager from files containing words.

This function works as ak_seqs_load, but the character sequences are supposed to be words encoded using UTF-8.

Parameters:
filelistThe list with the files containing the character sequences.
symsThe dictionary with the characters.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
The new character sequence manager, or NULL in case of error.
akSeqs* ak_seqs_new ( void  )

Creates a new character sequence manager.

This function creates a new empty character sequence manager.

Returns:
The new character sequence manager.
void ak_tar_free ( akTar tar)

Frees memory.

Frees the memory allocated for an akTar.

Parameters:
tarThe akTar structure.
FILE* ak_tar_get_file ( akTar tar,
const char *  name,
char **  err 
)

Returns a file descriptor to desired file.

This function returns the file descriptor of the tar file, seeked to beginning of the desired file. Therefore, the file descriptor must not be closed.

Parameters:
tarThe akTar structure.
nameThe name of the desired file.
errPointer to string variable. If not NULL an error * message is allocated in the variable in case of error.
Returns:
The file descriptor to the desired file, or NULL in case of error. This descriptor must not be closed.
akTar* ak_tar_new ( const char *  file_name,
char **  err 
)

Creates a new akTar.

This functions creates a new akTar for a given file.

Parameters:
file_nameThe name of the file.
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
The new feature vectors manager.
void ak_warning ( const char *  format,
  ... 
)

Warning message.

Shows a warning message by the standard error.

Parameters:
formatFormat of the warning message.

Frees memory.

Frees the memory allocated for the word graph.

Parameters:
wgThe word graph.
int ak_wordgraph_load ( akWordGraph **  wg,
FILE *  from,
const akDict words,
const akDict syms,
const char *  end_sym,
char **  err 
)

Loads a word graph.

Loads a word graph from a text description.

Parameters:
wgPointer to the memory address where the word graph will be loaded. If points to NULL a new word graph will be created.
fromFile where the text description is stored.
wordsDictionary containing the words.
symsDictionary containing the symbols. Only required in case of words graphs with segments.
end_symToken used to represent the special final word. NULL is equivalent to "</s>".
errPointer to string variable. If not NULL an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 in other case.
void ak_wordgraph_print ( const akWordGraph wg,
FILE *  to,
const akDict words,
const akDict syms,
const char *  end_sym 
)

Prints the word graph.

This function writes in the given file a text representation of the word graph. The provided word dictionary is supposed to contain all needed words, in another case an unexpected error could happen.

Parameters:
wgThe word graph.
toFile where the word graph is written.
wordsDictionary containing the words.
symsDictionary containing the symbols. No required if the word graph doesn't contain segments.
end_symToken used to print the special final word. NULL is equivalent to "</s>".
void ak_wordgraph_print_htk ( const akWordGraph wg,
FILE *  to,
const akDict words,
const akDict syms,
const char *  feaname,
const char *  start_sym,
const char *  end_sym 
)

Prints the word graph using the HTK format.

This function writes in the given file a text representation of the word graph using the HTK format. The provided word dictionary is supposed to contain all needed words, in another case an unexpected error could happen.

Parameters:
wgThe word graph.
toFile where the word graph is written.
wordsDictionary containing the words.
symsDictionary containing the symbols. No required if the word graph doesn't contain segments.
feanameA string with the name of the feature related to the word graph.
start_symToken used to print the special initial word. NULL is equivalent to "<s>".
end_symToken used to print the special final word. NULL is equivalent to "</s>".
 All Data Structures Variables