#include <stdarg.h>
#include <math.h>
#include <ctype.h>
#include <sys/types.h>
#include "globals.h"
#include "endian.h"
#include <regex.h>
#include "macros.h"
#include "attributes.h"
#include "special-chars.h"
#include "bitio.h"
#include "compression.h"
#include "regopt.h"
#include "cdaccess.h"
#define check_arg | ( | arg, | |||
atyp, | |||||
rval | ) |
if (arg == NULL) { \ cderrno = CDA_ENULLATT; return rval; \ } \ else if (arg->type != atyp) { \ cderrno = CDA_EATTTYPE; return rval; \ }
Checks an Attribute passed as a function argument for usability in that function.
(a) arg must not be NULL.
(b) arg type has to be the type specified in atyp.
If these conditions are not specified, the function returns rval.
Referenced by call_dynamic_attribute(), cl_has_extended_alignment(), collect_matches(), collect_matching_ids(), cumulative_id_frequency(), get_alg_attribute(), get_attribute_size(), get_bounds_of_nth_struc(), get_id_at_position(), get_id_frequency(), get_id_from_sortidx(), get_id_info(), get_id_of_string(), get_id_range(), get_id_string_len(), get_nr_of_strucs(), get_num_of_struc(), get_positions(), get_sortidxpos_of_id(), get_string_at_position(), get_string_of_id(), get_struc_attribute(), inverted_file_is_compressed(), item_sequence_is_compressed(), nr_of_arguments(), OpenPositionStream(), structure_has_values(), and structure_value().
#define COMPRESS_DEBUG 0 |
If COMPRESS_DEBUG is set to a positive integer, get_id_at_position() will print debugging messages.
(2 prints more than 1!)
Referenced by get_id_at_position().
typedef struct _position_stream_rec_ PositionStreamRecord |
Underlying structure for the PositionStream object.
PositionStreams are used for accessing Attributes. Each one represents a stream of corpus positions, representing positions where a given item occurs.
int call_dynamic_attribute | ( | Attribute * | attribute, | |
DynCallResult * | dcr, | |||
DynCallResult * | args, | |||
int | nr_args | |||
) |
Calls a dynamic attribute.
This is the attribute access function for dynamic attributes.
attribute | The (dynamic) attribute in question. | |
dcr | Location for the result (*int or *char). | |
args | Location of the parameters (of *int or *char). | |
nr_args | Number of parameters. |
References Dynamic_Attribute::arglist, ATT_DYN, ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, ATTAT_VAR, Dynamic_Attribute::call, CDA_EARGS, CDA_OK, cderrno, _DCR::charres, check_arg, cl_strdup(), _Attribute::dyn, error(), _DCR::floatres, _DCR::intres, _DynArg::next, Dynamic_Attribute::res_type, _DCR::type, _DynArg::type, and _DCR::value.
Referenced by get_leaf_value().
void cdperror | ( | char * | message | ) |
Prints an error message, together with a string identifying the current error number.
References cderrno, and cdperror_string().
Referenced by compress_reversed_index(), compute_code_lengths(), decompress_check_reversed_index(), do_show(), print_info(), and show_position_values().
char* cdperror_string | ( | int | errno | ) |
Gets a string describing the error identified by an error number.
errno | Error number integer (a CDA_* constant as defined in cl.h) |
References CDA_EALIGN, CDA_EARGS, CDA_EATTTYPE, CDA_EBADREGEX, CDA_EBUFFER, CDA_EFSETINV, CDA_EIDORNG, CDA_EIDXORNG, CDA_EINTERNAL, CDA_ENODATA, CDA_ENOMEM, CDA_ENOSTRING, CDA_ENULLATT, CDA_ENYI, CDA_EOTHER, CDA_EPATTERN, CDA_EPOSORNG, CDA_EREMOTE, CDA_ESTRUC, and CDA_OK.
Referenced by cdperror(), ensure_corpus_size(), and OptimizeStringConstraint().
int cl_alg2cpos | ( | Attribute * | attribute, | |
int | alg, | |||
int * | source_region_start, | |||
int * | source_region_end, | |||
int * | target_region_start, | |||
int * | target_region_end | |||
) |
Gets the corpus positions of an alignment on the given align-attribute.
Note that four corpus positions are retrieved, into the addresses given as parameters.
attribute | The align-attribute to look on. | |
alg | The ID of the alignment whose positions are wanted. | |
source_region_start | Location to put source corpus start position. | |
source_region_end | Location to put source corpus end position. | |
target_region_start | Location to put target corpus start position. | |
target_region_end | Location to put target corpus end position. |
References CDA_EIDXORNG, CDA_ENODATA, CDA_OK, cderrno, cl_has_extended_alignment(), CompAlignData, CompXAlignData, TMblob::data, TComponent::data, ensure_component(), and TComponent::size.
Referenced by check_alignment_constraints(), compose_kwic_line(), do_cqi_cl_alg2cpos(), printAlignedStrings(), and show_position_values().
int cl_cpos2alg | ( | Attribute * | attribute, | |
int | cpos | |||
) |
Gets the id number of the alignment at the specified corpus position.
attribute | The align-attribute to look on. | |
cpos | The corpus position to look at. |
References CDA_EALIGN, CDA_ENODATA, CDA_EPOSORNG, CDA_OK, cderrno, cl_has_extended_alignment(), CompAlignData, CompXAlignData, TMblob::data, TComponent::data, ensure_component(), get_alignment(), get_extended_alignment(), and TComponent::size.
Referenced by check_alignment_constraints(), compose_kwic_line(), do_cqi_cl_cpos2alg(), printAlignedStrings(), and show_position_values().
int cl_cpos2boundary | ( | Attribute * | a, | |
int | cpos | |||
) |
Compares the location of a corpus position to the regions of an s-attribute.
This determines whether the specified corpus position is within a region (i.e. a structure, an instance of that s-attribute) on the given s-attribute; and/or on a boundary; or outside a region.
a | The s-attribute on which to search. | |
cpos | The corpus position to look for. |
References CDA_ESTRUC, cderrno, cl_cpos2struc2cpos, STRUC_INSIDE, STRUC_LBOUND, and STRUC_RBOUND.
int cl_cpos2struc | ( | Attribute * | a, | |
int | cpos | |||
) |
Gets the ID number of a structure (instance of an s-attribute) that is found at the given corpus position.
a | The s-attribute on which to search. | |
cpos | The corpus position to look for. |
References cderrno, and get_num_of_struc().
Referenced by compose_kwic_line(), do_cqi_cl_cpos2lbound(), do_cqi_cl_cpos2rbound(), do_cqi_cl_cpos2struc(), eval_constraint(), get_position_values(), main(), show_position_values(), and showSurroundingStructureValues().
int cl_has_extended_alignment | ( | Attribute * | attribute | ) |
Checks whether an attribute's XALIGN component exists, that is, whether or not it has extended alignment.
attribute | An align-attribute. |
References ATT_ALIGN, cderrno, check_arg, component_state(), ComponentLoaded, ComponentUnloaded, and CompXAlignData.
Referenced by cl_alg2cpos(), cl_cpos2alg(), cl_max_alg(), and show_statistics().
int cl_max_alg | ( | Attribute * | attribute | ) |
Gets the id number of alignments on this align-attribute.
This is equal to the maximum alignment on this attribute.
attribute | An align-attribute. |
References CDA_ENODATA, CDA_OK, cderrno, cl_has_extended_alignment(), CompAlignData, CompXAlignData, ensure_component(), and TComponent::size.
Referenced by do_cqi_cl_attribute_size(), and show_statistics().
int cl_max_struc | ( | Attribute * | a | ) |
Gets the maximum for this S-attribute (ie the size of the S-attribute).
The result of this function is equal to the number of instances of this s-attribute in the corpus.
The s-attribute to evaluate.
References cderrno, and get_nr_of_strucs().
Referenced by add_key(), compose_kwic_line(), do_cqi_cl_attribute_size(), main(), matchfirstpattern(), and show_statistics().
int cl_strcmp | ( | char * | s1, | |
char * | s2 | |||
) |
CL internal string comparison (uses signed char on all platforms).
Referenced by cl_set_intersection(), cl_string_list_strcmp(), get_id_of_string(), and scompare().
int ClosePositionStream | ( | PositionStream * | ps | ) |
Deletes a PositionStream object.
References BSclose().
Referenced by compress_reversed_index(), and decompress_check_reversed_index().
int* collect_matches | ( | Attribute * | attribute, | |
int * | word_ids, | |||
int | number_of_words, | |||
int | sort, | |||
int * | size_of_table, | |||
int * | restrictor_list, | |||
int | restrictor_list_size | |||
) |
Gets a list of corpus positions matching a list of ids.
This function returns an (ordered) list of all corpus positions which matches one of the ids given in the list of ids. The table is allocated with malloc, so free it when you don't need any more.
The list itself is returned; its size is placed in size_of_table. This size is, of course, the same as the cumulative id frequency of the ids (because each corpus position matching one of the ids is added into the list).
BEWARE: when the id list is rather big or there are highly-frequent ids in the id list (for example, after a call to collect_matching_ids with the pattern ".*") this will give a copy of the corpus -- for which you probably don't have enough memory!!! It is therefore a good idea to call cumulative_id_frequency before and to introduce some kind of bias.
A note on the last two parameters, which are currently unused: restrictor_list is a list of integer pairs [a,b] which means that the returned value only contains positions which fall within at least one of these intervals. The list must be sorted by the start positions, and secondarily by b. restrictor_list_size is the number of integers in this list, NOT THE NUMBER OF PAIRS. WARNING: CURRENTLY UNIMPLEMENTED {NB -- this descrtiption of restrictor_list_size DOESN'T MATCH the one for get_positions(), which this function calls...
REMEMBER: this monster returns a list of corpus indices, not a list of ids.
attribute | The P-attribute we are looking in | |
word_ids | A list of item ids (i.e. id codes for items on this attribute). | |
number_of_words | The length of this list. | |
sort | boolean: return sorted list? | |
size_of_table | The size of the allocated table will be placed here. | |
restrictor_list | See function description. | |
restrictor_list_size | See function description. |
References ATT_POS, CDA_EIDORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, cl_malloc(), CompLexiconIdx, cumulative_id_frequency(), ensure_component(), get_positions(), intcompare(), and TComponent::size.
Referenced by calculate_initial_matchlist_1(), get_corpus_positions(), and get_matched_corpus_positions().
int* collect_matching_ids | ( | Attribute * | attribute, | |
char * | pattern, | |||
int | flags, | |||
int * | number_of_matches | |||
) |
Gets a list of the ids of those items on a given Attribute that match a particular regular-expression pattern.
The pattern is interpreted with the CL regex engine, q.v.
The function returns a pointer to a sequence of ints of size number_of_matches. The list is allocated with malloc(), so do a cl_free() when you don't need it any more.
attribute | The p-attribute to look on. | |
pattern | String containing the pattern against which to match each item on the attribute | |
flags | Flags for the regular expression system via cl_new_regex. | |
number_of_matches | This is set to the number of item ids found, i.e. the size of the returned buffer. |
References _CL_Regex::anchor_end, _CL_Regex::anchor_start, ATT_POS, _CL_Regex::buffer, CDA_EBADREGEX, CDA_ENODATA, CDA_OK, cderrno, check_arg, cl_calloc(), cl_debug, cl_delete_regex(), cl_free, cl_malloc(), cl_new_regex(), cl_regex_error, cl_regex_optimised(), cl_string_canonical(), CompLexicon, CompLexiconIdx, TMblob::data, TComponent::data, ensure_component(), _CL_Regex::flags, _CL_Regex::grain, _CL_Regex::grain_len, _CL_Regex::grains, IGNORE_CASE, IGNORE_DIAC, _CL_Regex::iso_string, _CL_Regex::jumptable, latin1, TComponent::size, and word.
Referenced by get_matched_corpus_positions(), and OptimizeStringConstraint().
int cumulative_id_frequency | ( | Attribute * | attribute, | |
int * | word_ids, | |||
int | number_of_words | |||
) |
Calculates the total frequency of all items on a list of item IDs.
This function returns the sum of the word frequencies of words, which is an array of word_ids with length number_of_words.
The result is therefore the number of corpus positions which match one of the words.
attribute | P-attribute on which these items are found. | |
word_ids | An array of item IDs. | |
number_of_words | Length of the word_ids array. |
References ATT_POS, CDA_ENODATA, CDA_OK, cderrno, check_arg, and get_id_frequency().
Referenced by collect_matches().
int get_alg_attribute | ( | Attribute * | attribute, | |
int | position, | |||
int * | source_corpus_start, | |||
int * | source_corpus_end, | |||
int * | aligned_corpus_start, | |||
int * | aligned_corpus_end | |||
) |
Gets the corpus positions of an alignment on the given align-attribute.
This is for old-style alignments only: it doesn't (can't) deal with extended alignments. Depracated: use cl_alg2cpos instead (but note its parameters are not identical).
attribute | The align-attribute to look on. | |
position | The corpus position {??} of the alignment whose positions are wanted. | |
source_corpus_start | Location to put source corpus start position. | |
source_corpus_end | Location to put source corpus end position. | |
aligned_corpus_start | Location to put target corpus start position. | |
aligned_corpus_end | Location to put target corpus end position. |
References ATT_ALIGN, CDA_ENODATA, CDA_EPOSORNG, CDA_OK, cderrno, check_arg, CompAlignData, TMblob::data, TComponent::data, ensure_component(), get_alignment(), and TComponent::size.
int get_alignment | ( | int * | data, | |
int | size, | |||
int | position | |||
) |
Gets the id number of the alignment at the specified corpus position.
For use with non-extended alignments. Requires members of the ALIGN component as arguments.
Not an exported function!
{Query:am I correct that "position" here means a cpos?? -- AH} {If I'm not, other docblocks in cdaccess also have errors}
data | The data member of a CompAlignData component. | |
size | The size member of the same CompAlignData component. | |
position | The corpus position to look at. |
Referenced by cl_cpos2alg(), and get_alg_attribute().
int get_attribute_size | ( | Attribute * | attribute | ) |
Gets the maximum position on this P-attribute (ie the size of the attribute).
The result of this function is equal to the number of tokens in the attribute.
If the attribute's item sequence is compressed, this is read from the attribute's Huffman code descriptor block.
Otherwise, it is read from the size member of the Attribute's CompCorpus component.
References ATT_POS, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompCorpus, CompHuffCodes, corpus, ensure_component(), POS_Attribute::hc, item_sequence_is_compressed(), _huffman_code_descriptor::length, _Attribute::pos, and TComponent::size.
Referenced by compose_kwic_line(), get_matched_corpus_positions(), get_positions(), OpenPositionStream(), Setop(), and SystemCorpusSize().
int get_bounds_of_nth_struc | ( | Attribute * | attribute, | |
int | struc_num, | |||
int * | struc_start, | |||
int * | struc_end | |||
) |
Retrieves the start-and-end corpus positions of a specified structure of the given s-attribute type.
attribute | An s-attribute. | |
struc_num | The instance of that s-attribute to retrieve (i.e. the struc_num'th instance of this s-attribute in the corpus). | |
struc_start | Location to put the starting corpus position. | |
struc_end | Location to put the ending corpus position. |
References ATT_STRUC, CDA_EIDXORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompStrucData, TMblob::data, TComponent::data, ensure_component(), and TComponent::size.
Referenced by calculate_ranges(), and feature_match().
int get_extended_alignment | ( | int * | data, | |
int | size, | |||
int | position | |||
) |
Gets the id number of the alignment at the specified corpus position.
For use with extended alignments. Requires members of the XALIGN component as arguments.
Not an exported function!
data | The data member of a CompXAlignData component. | |
size | The size member of the same CompXAlignData component. | |
position | The corpus position to look at. |
References CDA_EALIGN.
Referenced by cl_cpos2alg().
int get_id_at_position | ( | Attribute * | attribute, | |
int | position | |||
) |
Gets the integer ID of the item at the specified position on the given p-attribute.
attribute | The P-attribute to look on. | |
position | The corpus position to look at. |
References _Attribute::any, ATT_POS, BSclose(), BSopen(), BSread(), BSseek(), CDA_ENODATA, CDA_EPOSORNG, CDA_OK, cderrno, check_arg, CompCorpus, CompHuffCodes, CompHuffSeq, CompHuffSync, COMPRESS_DEBUG, corpus, TMblob::data, TComponent::data, ensure_component(), POS_Attribute::hc, item_sequence_is_compressed(), _huffman_code_descriptor::length, _huffman_code_descriptor::min_code, _Attribute::pos, _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, SYNCHRONIZATION, POS_Attribute::this_block, and POS_Attribute::this_block_nr.
Referenced by eval_bool(), feature_match(), get_leaf_value(), get_position_values(), and get_string_at_position().
int get_id_frequency | ( | Attribute * | attribute, | |
int | id | |||
) |
Gets the frequency of an item on this attribute.
attribute | The P-attribute to look on | |
id | Identifier of an item on this attribute. |
References ATT_POS, CDA_EIDXORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompCorpusFreqs, TMblob::data, TComponent::data, and ensure_component().
Referenced by call_predefined_function(), compute_code_lengths(), cumulative_id_frequency(), get_id_info(), get_positions(), and OpenPositionStream().
int get_id_from_sortidx | ( | Attribute * | attribute, | |
int | sort_index_position | |||
) |
Gets the ID code of the item at the specified position in the Attribute's sorted wordlist index.
attribute | The (positional) Attribute whose index is to be searched. | |
sort_index_position | The offset in the index where the ID code is to be found. |
References ATT_POS, CDA_EIDXORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompLexiconSrt, TMblob::data, TComponent::data, and ensure_component().
Referenced by do_show().
char* get_id_info | ( | Attribute * | attribute, | |
int | index, | |||
int * | freq, | |||
int * | slen | |||
) |
Gets the string of the item with the specified ID on the given p-attribute.
As well as returning the string, other information about the item is inserted into locations specified by other parameters.
attribute | The P-attribute to look on. | |
index | The ID of the item to look at. | |
freq | Will be set to the frequency of the item. | |
slen | Will be set to the string-length of the item. |
References ATT_POS, CDA_OK, cderrno, check_arg, get_id_frequency(), get_id_string_len(), and get_string_of_id().
Referenced by print_info().
int get_id_of_string | ( | Attribute * | attribute, | |
char * | id_string | |||
) |
Gets the ID code that corresponds to the specified string on the given P-attribute.
attribute | The (positional) Attribute to look the string up on | |
id_string | The string of an item on this attribute |
References ATT_POS, CDA_ENODATA, CDA_ENOSTRING, CDA_EOTHER, CDA_OK, cderrno, check_arg, cl_strcmp(), CompLexicon, CompLexiconIdx, CompLexiconSrt, TMblob::data, TComponent::data, ensure_component(), and TComponent::size.
Referenced by call_predefined_function(), get_corpus_positions(), map_token_to_class_number(), member_of_class_s(), OptimizeStringConstraint(), read_mapping(), show_features(), and VerifyVariable().
int get_id_range | ( | Attribute * | attribute | ) |
Gets the maximum id on this P-attribute (ie the range of the attribute's ID codes).
The result of this function is equal to the number of types in this attribute.
References ATT_POS, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompLexiconIdx, ensure_component(), and TComponent::size.
Referenced by get_matched_corpus_positions(), get_positions(), OpenPositionStream(), and OptimizeStringConstraint().
int get_id_string_len | ( | Attribute * | attribute, | |
int | id | |||
) |
Calculates the length of the string that corresponds to the specified item on the given P-attribute.
attribute | The (positional) Attribute to look up the item on | |
id | Identifier of an item on this attribute. |
References ATT_POS, CDA_EIDORNG, CDA_ENODATA, CDA_EOTHER, CDA_OK, cderrno, check_arg, CompLexiconIdx, TMblob::data, TComponent::data, ensure_component(), get_string_of_id(), and TComponent::size.
Referenced by get_id_info().
int get_nr_of_strucs | ( | Attribute * | attribute, | |
int * | nr_strucs | |||
) |
Gets the number of instances of an s-attribute in the corpus.
Depracated: use cl_max_struc instead.
attribute | The s-attribute to count. | |
nr_strucs | The number of instances is put here. |
References ATT_STRUC, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompStrucData, ensure_component(), and TComponent::size.
Referenced by calculate_ranges(), and cl_max_struc().
int get_num_of_struc | ( | Attribute * | attribute, | |
int | position, | |||
int * | struc_num | |||
) |
Gets the ID number of a structure (instance of an s-attribute) that is found at the given corpus position.
Depracated function: use cl_cpos2struc.
attribute | The s-attribute on which to search. | |
position | The corpus position to look for. | |
struc_num | Location where the number of the structure that is found will be put. |
References ATT_STRUC, CDA_ENODATA, CDA_ESTRUC, CDA_OK, cderrno, check_arg, CompStrucData, TMblob::data, TComponent::data, ensure_component(), get_previous_mark(), and TComponent::size.
Referenced by calculate_ranges(), cl_cpos2struc(), and structure_value_at_position().
int* get_positions | ( | Attribute * | attribute, | |
int | id, | |||
int * | freq, | |||
int * | restrictor_list, | |||
int | restrictor_list_size | |||
) |
Gets all the corpus positions where the specified item is found on the given P-attribute.
The restrictor list is a set of ranges in which instances of the item MUST occur to be collected by this function. If no restrictor list is specified (i.e. restrictor_list is NULL), then ALL corpus positions where the item occurs are returned.
This restrictor list has the form of a list of ranges {start,end} of size restrictor_list_size, that is, the number of ints in this area is 2 * restrictor_list_size!!!
attribute | The P-attribute to look on. | |
id | The id of the item to look for. | |
freq | The frequency of the specified item is written here. This will be 0 in the case of errors. | |
restrictor_list | A list of pairs of integers specifying ranges {start,end} in the corpus | |
restrictor_list_size | The number of PAIRS of ints in the restrictor list. |
References ATT_POS, BSclose(), BSopen(), BSseek(), CDA_EIDORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, cl_free, cl_malloc(), cl_realloc(), CompCompRF, CompCompRFX, CompRevCorpus, CompRevCorpusIdx, compute_ba(), TMblob::data, TComponent::data, ensure_component(), get_attribute_size(), get_id_frequency(), get_id_range(), inverted_file_is_compressed(), range, and read_golomb_code_bs().
Referenced by calculate_initial_matchlist_1(), and collect_matches().
int* get_previous_mark | ( | int * | data, | |
int | size, | |||
int | position | |||
) |
Gets a pointer to the location where a structure is stored.
The structure (instance of an s-attribute) that is found is the one in which the specified corpus position occurs.
Non-exported function.
data | "data.data" member of an s-attribute | |
size | "size" member of the same s-attribute | |
position | The corpus position to look for. |
Referenced by get_num_of_struc(), and get_struc_attribute().
int get_sortidxpos_of_id | ( | Attribute * | attribute, | |
int | id | |||
) |
Gets the position in the Attribute's sorted wordlist index of the item with the specified ID code.
This function is NOT YET IMPLEMENTED.
attribute | The (positional) Attribute whose index is to be searched | |
id | Identifier of an item on this attribute. |
References ATT_POS, CDA_ENODATA, CDA_ENYI, CDA_OK, cderrno, check_arg, CompLexiconSrt, and ensure_component().
char* get_string_at_position | ( | Attribute * | attribute, | |
int | position | |||
) |
Gets the string of the item at the specified position on the given p-attribute.
attribute | The P-attribute to look on. | |
position | The corpus position to look at. |
References ATT_POS, CDA_OK, cderrno, check_arg, get_id_at_position(), and get_string_of_id().
Referenced by get_leaf_value(), get_position_values(), and print_next_region().
char* get_string_of_id | ( | Attribute * | attribute, | |
int | id | |||
) |
Gets the string that corresponds to the specified item on the given P-attribute.
attribute | The Attribute to look the item up on | |
id | Identifier of an item on this attribute. |
References ATT_POS, CDA_EIDORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompLexicon, CompLexiconIdx, TMblob::data, TComponent::data, ensure_component(), and TComponent::size.
Referenced by call_predefined_function(), compute_code_lengths(), eval_bool(), get_id_info(), get_id_string_len(), get_string_at_position(), and print_mapping().
int get_struc_attribute | ( | Attribute * | attribute, | |
int | position, | |||
int * | struc_start, | |||
int * | struc_end | |||
) |
Gets the start and end positions of the instance of the given S-attribute found at the specified corpus position.
This function finds one particular instance of the S-attribute, and assigns its start and end points to the locations given as arguments.
attribute | The s-attribute to search. | |
position | The corpus position to search for. | |
struc_start | Location for the start position of the instance. | |
struc_end | Location for the end position of the instance. |
References ATT_STRUC, CDA_ENODATA, CDA_ESTRUC, CDA_OK, cderrno, check_arg, CompStrucData, TMblob::data, TComponent::data, ensure_component(), get_previous_mark(), and TComponent::size.
Referenced by calculate_ranges(), eval_bool(), get_leaf_value(), meet_mu(), show_position_values(), and simulate().
static int intcompare | ( | const void * | i, | |
const void * | j | |||
) | [static] |
internal function for use with qsort
Referenced by collect_matches().
int inverted_file_is_compressed | ( | Attribute * | attribute | ) |
Check whether the index (inverted file) of the given P-attribute is compressed.
See comments in body of function for what counts as "compressed".
References ATT_POS, cderrno, check_arg, CompCompRF, CompCompRFX, component_state(), ComponentLoaded, ComponentUnloaded, CompRevCorpus, and CompRevCorpusIdx.
Referenced by get_positions(), and OpenPositionStream().
int item_sequence_is_compressed | ( | Attribute * | attribute | ) |
Checks whether the item sequence of the given P-attribute is compressed.
See comments in body of function for what counts as "compressed".
References ATT_POS, cderrno, check_arg, CompCorpus, CompHuffCodes, CompHuffSeq, CompHuffSync, component_state(), ComponentLoaded, ComponentUnloaded, POS_Attribute::hc, and _Attribute::pos.
Referenced by get_attribute_size(), get_id_at_position(), and load_component().
int nr_of_arguments | ( | Attribute * | attribute | ) |
Count the number of arguments on an attribute's dynamic argument list.
attribute | pointer to the Attribute object to analyse; it must be a dynamic attribute. |
References Dynamic_Attribute::arglist, ATT_DYN, ATTAT_VAR, CDA_OK, cderrno, check_arg, _Attribute::dyn, _DynArg::next, and _DynArg::type.
PositionStream OpenPositionStream | ( | Attribute * | attribute, | |
int | id | |||
) |
Creates a new PositionStream object.
attribute | The P-attribute to open the position stream on | |
id | The id that the new PositionStream will have. This the id of an item on the specified attribute. |
References ATT_POS, _position_stream_rec_::attribute, _position_stream_rec_::b, _position_stream_rec_::base, _position_stream_rec_::bs, BSopen(), BSseek(), CDA_EIDORNG, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompCompRF, CompCompRFX, CompRevCorpus, CompRevCorpusIdx, compute_ba(), TMblob::data, TComponent::data, ensure_component(), get_attribute_size(), get_id_frequency(), get_id_range(), _position_stream_rec_::id, _position_stream_rec_::id_freq, inverted_file_is_compressed(), _position_stream_rec_::is_compressed, _position_stream_rec_::last_pos, _position_stream_rec_::nr_items, and range.
Referenced by compress_reversed_index(), and decompress_check_reversed_index().
int ReadPositionStream | ( | PositionStream | ps, | |
int * | buffer, | |||
int | buffer_size | |||
) |
Reads corpus positions from a position stream to a buffer.
ps | The position stream to read. | |
buffer | Location to put the resulting item positions. | |
buffer_size | Maximum number of item positions to read. (Fewer will be read if fewer are available). |
References _position_stream_rec_::b, _position_stream_rec_::base, _position_stream_rec_::bs, _position_stream_rec_::id_freq, _position_stream_rec_::is_compressed, _position_stream_rec_::last_pos, _position_stream_rec_::nr_items, and read_golomb_code_bs().
Referenced by compress_reversed_index(), and decompress_check_reversed_index().
int s_v_comp | ( | const void * | v1, | |
const void * | v2 | |||
) |
A non-exported function used by structure_value.
Referenced by structure_value().
int structure_has_values | ( | Attribute * | attribute | ) |
Checks whether this s-attribute has attribute values.
References ATT_STRUC, CDA_OK, cderrno, check_arg, component_state(), ComponentLoaded, ComponentUnloaded, CompStrucAVS, CompStrucAVX, Struc_Attribute::has_attribute_values, and _Attribute::struc.
Referenced by ComputePrintStructures(), do_LabelReference(), structure_value(), and update_context_descriptor().
char* structure_value | ( | Attribute * | attribute, | |
int | struc_num | |||
) |
Gets the value that is associated with the specified instance of the given s-attribute.
attribute | An S-attribute. | |
struc_num | ID of the structure whose value is wanted (ie, function gets value of struc_num'th instance of this s-attribute) |
References ATT_STRUC, CDA_EIDXORNG, CDA_EINTERNAL, CDA_ENODATA, CDA_OK, cderrno, check_arg, CompStrucAVS, CompStrucAVX, TMblob::data, TComponent::data, ensure_component(), s_v_comp(), TComponent::size, and structure_has_values().
Referenced by structure_value_at_position().
char* structure_value_at_position | ( | Attribute * | struc, | |
int | position | |||
) |
Gets the value associated with the instance of the given s-attribute that occurs at the specified corpus position.
struc | The s-attribute to search through. | |
position | The corpus position being queried. |
References get_num_of_struc(), and structure_value().
Referenced by get_leaf_value(), and get_print_attribute_values().
int cderrno |
Error number for CL: is set after access to any of various corpus-data-access functions.
Referenced by call_dynamic_attribute(), call_predefined_function(), cdperror(), check_alignment_constraints(), cl_alg2cpos(), cl_cpos2alg(), cl_cpos2boundary(), cl_cpos2struc(), cl_has_extended_alignment(), cl_make_set(), cl_max_alg(), cl_max_struc(), cl_new_regex(), cl_set_intersection(), cl_set_size(), collect_matches(), collect_matching_ids(), compress_reversed_index(), compute_code_lengths(), cumulative_id_frequency(), decode_check_huff(), decompress_check_reversed_index(), do_cqi_cl_regex2id(), do_show(), ensure_corpus_size(), eval_bool(), get_alg_attribute(), get_attribute_size(), get_bounds_of_nth_struc(), get_corpus_positions(), get_id_at_position(), get_id_frequency(), get_id_from_sortidx(), get_id_info(), get_id_of_string(), get_id_range(), get_id_string_len(), get_leaf_value(), get_nr_of_strucs(), get_num_of_struc(), get_position_values(), get_positions(), get_sortidxpos_of_id(), get_string_at_position(), get_string_of_id(), get_struc_attribute(), inverted_file_is_compressed(), item_sequence_is_compressed(), map_token_to_class_number(), meet_mu(), member_of_class_s(), nr_of_arguments(), OpenPositionStream(), OptimizeStringConstraint(), print_info(), read_mapping(), send_cl_error(), Setop(), show_position_values(), structure_has_values(), and structure_value().