#include "../cl/globals.h"
#include "../cl/cl.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/storage.h"
#include "../cl/bitio.h"
#include "../cl/macros.h"
void bprintf | ( | unsigned int | i, | |
int | width, | |||
FILE * | stream | |||
) |
Prints a binary representation of an integer to a stream.
i | Integer to print | |
width | Number of bits in the integer | |
stream | Where to print to. |
Referenced by compute_code_lengths().
Compresses the token stream of a p-attribute.
Three files are created: the compressed token stream, the descriptor block, and a sync file.
attr | The attribute to compress. | |
hc | Location for the resulting Huffmann code descriptor block. | |
fname | Base filename for the resulting files. |
References _Attribute::any, BFclose(), BFflush(), BFopen(), BFposition(), BFwriteWord(), bprintf(), CDA_OK, cderrno, cdperror(), cl_calloc(), cl_cpos2id, cl_malloc(), cl_max_cpos, cl_max_id, CompCorpus, CompCorpusFreqs, CompHuffCodes, CompHuffSeq, CompHuffSync, CompLexicon, CompLexiconIdx, component_full_name(), corpus_id, do_protocol, ensure_component(), get_id_frequency(), get_string_of_id(), _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAX_LINE_LENGTH, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NwriteInt(), print_heap(), protocol, TCorpus::registry_dir, TCorpus::registry_name, sift(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, SYNCHRONIZATION, and WriteHCD().
Referenced by main().
void decode_check_huff | ( | Attribute * | attr, | |
char * | fname | |||
) |
Checks a huffcoded attribute for errors by decompressing it.
This function assumes that compute_code_lengths() has been called beforehand and made sure that the _uncompressed_ token sequence is used by CL access functions.
attr | The attribute to check. | |
fname | Base filename to use for the three compressed-attribute files. Can be NULL, in which case the filenames in the attribute are used. |
References _Attribute::any, BFclose(), BFflush(), BFopen(), BFposition(), BFread(), CDA_OK, cderrno, cl_cpos2id, cl_max_cpos, CompCorpus, CompHuffCodes, CompHuffSeq, CompHuffSync, component_full_name(), corpus_id, _huffman_code_descriptor::length, MAX_LINE_LENGTH, _huffman_code_descriptor::min_code, NreadInt(), ReadHCD(), _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, and SYNCHRONIZATION.
Referenced by main().
void dump_heap | ( | int * | heap, | |
int | heap_size, | |||
int | node, | |||
int | indent | |||
) |
Dumps the specified heap of memory to the program output stream.
heap | Location of the heap to dump. | |
heap_size | Number of nodes in the heap. | |
node | Heap at which to begin dumping. | |
indent | How many tabs to indent the start of each line. |
References protocol.
Referenced by print_heap().
int main | ( | int | argc, | |
char ** | argv | |||
) |
Main function for cwb-huffcode.
argc | Number of command-line arguments. | |
argv | Command-line arguments. |
References _Attribute::any, ATT_POS, TCorpus::attributes, central_corpus_directory(), cl_delete_corpus, cl_new_attribute, cl_new_corpus, compute_code_lengths(), corpus_id, debug, decode_check_huff(), DEFAULT_ATT_NAME, do_protocol, progname, protocol, registry_directory, and usage().
void print_heap | ( | int * | heap, | |
int | heap_size, | |||
char * | title | |||
) |
Prints a description of the specified heap of memory to the program output stream.
heap | Location of the heap to print. | |
heap_size | Number of nodes in the heap. | |
title | Title of the heap to print. |
References dump_heap(), node, and protocol.
Referenced by compute_code_lengths().
int ReadHCD | ( | char * | filename, | |
HCD * | hc | |||
) |
Reads a Huffman compressed sequence from file.
filename | Path to file where compressed sequence is saved. | |
hc | Pointer to location where the sequence's descriptor block will be loaded to. |
References cl_malloc(), _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NreadInt(), NreadInts(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, and _huffman_code_descriptor::symindex.
Referenced by decode_check_huff().
static int sift | ( | int * | heap, | |
int | heap_size, | |||
int | node | |||
) | [static] |
Sifts the heap into order.
heap | Location of the heap to sift. | |
heap_size | Number of nodes in the heap. | |
node | Node at which to begin sifting. |
Referenced by compute_code_lengths().
void usage | ( | char * | msg, | |
int | error_code | |||
) |
Prints a usage message and exits the program.
msg | A message about the error. | |
error_code | Value to be returned by the program when it exits. |
References drop_corpus(), and progname.
int WriteHCD | ( | char * | filename, | |
HCD * | hc | |||
) |
Writes a Huffman code descriptor to file.
filename | Path to file where descriptor is to be saved. | |
hc | Pointer to the descriptor block to save. |
References _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NwriteInt(), NwriteInts(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, and _huffman_code_descriptor::symindex.
Referenced by compute_code_lengths().
char* corpus_id = NULL |
int debug = 0 |
int do_protocol = 0 |
Level of progress-info (inc compression protocol) message output: 0 = none.
Referenced by compute_code_lengths(), and main().
char* progname |
FILE* protocol |
File handle for this program's progress-info output: always stdout.
Referenced by compute_code_lengths(), dump_heap(), main(), and print_heap().