#include <ctype.h>
#include <sys/types.h>
#include "globals.h"
#include "endian.h"
#include "corpus.h"
#include "macros.h"
#include "fileutils.h"
#include "cdaccess.h"
#include "makecomps.h"
#include "list.h"
#include "attributes.h"
#define KEEP_SILENT |
if KEEP_SILENT is defined, ensure_component won't complain about non-accessible data.
char* aid_name | ( | int | i | ) |
Gets a string containing a description of the specified attribute type.
Non-exported function.
i | The attribute-type whose name is required. (Should be one of the values of the constants defined in cl.h.) |
References ATT_ALIGN, ATT_DYN, ATT_NONE, ATT_POS, and ATT_STRUC.
Referenced by create_component(), describe_attribute(), load_component(), and setup_attribute().
char* argid_name | ( | int | i | ) |
Gets a string containing a description of the specified dynamic attribute argument type.
Non-exported function.
i | The argument-type whose name is required. (Should be one of the values of the constants defined in cl.h.) |
References ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, and ATTAT_VAR.
Referenced by describe_attribute().
int attr_drop_attribute | ( | Attribute * | attribute | ) |
Deletes the specified Attribute object.
References _Attribute::any, Dynamic_Attribute::arglist, ATT_DYN, ATT_NONE, ATT_POS, TCorpus::attributes, Dynamic_Attribute::call, cl_free, comp_drop_component(), CompDirectory, CompLast, corpus, _Attribute::dyn, POS_Attribute::hc, _DynArg::next, _Attribute::pos, and _Attribute::type.
Referenced by drop_attribute(), and drop_corpus().
char* cid_name | ( | ComponentID | id | ) |
Gets a string containing the name of the attribute component with the specified ID-code.
References find_cid_id(), and component_field_spec::name.
Referenced by create_component(), declare_component(), describe_component(), do_attribute(), ensure_component(), load_component(), make_component(), and validate_revcorp().
char* cl_make_set | ( | char * | s, | |
int | split | |||
) |
Generates a set attribute value.
s | The input string. | |
split | Boolean; if True, s is split on whitespace. If False, the function expects input in '|'-delimited format. |
References CDA_EFSETINV, CDA_OK, cderrno, cl_delete_string_list(), cl_free, cl_malloc(), cl_new_string_list(), cl_strdup(), cl_string_list_append(), cl_string_list_get(), cl_string_list_qsort(), and cl_string_list_size().
Referenced by addline(), check_set(), and open_range().
int cl_set_intersection | ( | char * | result, | |
const char * | s1, | |||
const char * | s2 | |||
) |
Computes the intersection of two set attribute values.
Compute intersection of two set attribute values (in standard syntax, i.e. sorted and '|'-delimited); memory for the result string must be allocated by the caller.
References CDA_EBUFFER, CDA_EFSETINV, CDA_OK, cderrno, CL_DYN_STRING_SIZE, and cl_strcmp().
Referenced by call_predefined_function().
int cl_set_size | ( | char * | s | ) |
Counts the number of elements in a set attribute value.
This function counts the number of elements in a set attribute value (using '|'-delimited standard syntax);
References CDA_EFSETINV, CDA_OK, and cderrno.
Referenced by call_predefined_function().
ComponentState comp_component_state | ( | Component * | component | ) |
Works out and returns the state of the component.
References CompDirectory, ComponentDefined, ComponentLoaded, ComponentUndefined, ComponentUnloaded, TMblob::data, TComponent::data, file_length(), TComponent::id, and TComponent::path.
Referenced by component_state(), creat_sort_lexicon(), describe_component(), ensure_component(), and load_component().
int comp_drop_component | ( | Component * | comp | ) |
Delete a Component object.
The specified component object, and all memory associated with it, is freed.
References _Attribute::any, TComponent::attribute, cl_free, CompHuffCodes, CompLast, TComponent::corpus, TComponent::data, POS_Attribute::hc, TComponent::id, mfree(), TComponent::path, and _Attribute::pos.
Referenced by attr_drop_attribute(), and drop_component().
char* component_full_name | ( | Attribute * | attribute, | |
ComponentID | cid, | |||
char * | path | |||
) |
Initializes the path of an attribute Component.
This function starts with the path it is passed, and then evaluates variables in the form $UPPERCASE. The resulting path is assigned to the specified entry in the component array for the given Attribute.
Note that if it is called for a Component that does not yet exist, this function creates the component by calling declare_component().
attribute | The Attribute object to work with. | |
cid | The identifier of the Component to which the path is to be added. | |
path | The path to assign to the component. Can be NULL, in which case, the default path from Component_Field_Specs is used. |
References _Attribute::any, buf, cl_strdup(), component_full_name(), declare_component(), component_field_spec::default_path, find_cid_id(), find_cid_name(), component_field_spec::id, MAX_LINE_LENGTH, TComponent::path, and STREQ.
Referenced by component_full_name(), compress_reversed_index(), compute_code_lengths(), creat_freqs(), declare_component(), decode_check_huff(), decompress_check_reversed_index(), and main().
ComponentID component_id | ( | char * | name | ) |
Gets the identifier of the attribute component with the specified name.
References CompLast, find_cid_name(), and component_field_spec::id.
Referenced by main().
ComponentState component_state | ( | Attribute * | attribute, | |
ComponentID | cid | |||
) |
Gets the state of a specified component on the given attribute.
attribute | The attribute to look at. | |
cid | The component whose state to get. |
References _Attribute::any, comp_component_state(), CompLast, and ComponentUndefined.
Referenced by cl_has_extended_alignment(), component_ok(), create_component(), inverted_file_is_compressed(), item_sequence_is_compressed(), make_component(), and structure_has_values().
Component* create_component | ( | Attribute * | attribute, | |
ComponentID | cid | |||
) |
Creates the specified component for the given Attribute.
This function only works for the following components: CompRevCorpus, CompRevCorpusIdx, CompLexiconSrt, CompCorpusFreqs. Also, it only works if the state of the component is ComponentDefined
"Create" here means create the CWB data files. This is accomplished by calling one of the "creat_*" functions, of which there is one for each of the four available component types. These are defined in makecomps.c.
Each of these functions reads in the data it needs, processes it, and then writes a new file.
attribute | The Attribute object to work with. | |
cid | The identifier of the Component to create. |
References aid_name(), _Attribute::any, cid_name(), cl_debug, CompAlignData, CompCompRF, CompCompRFX, CompCorpus, CompCorpusFreqs, CompDirectory, CompHuffCodes, CompHuffSeq, CompHuffSync, CompLast, CompLexicon, CompLexiconIdx, CompLexiconSrt, component_state(), ComponentDefined, CompRevCorpus, CompRevCorpusIdx, CompStrucAVS, CompStrucAVX, CompStrucData, CompXAlignData, creat_freqs(), creat_rev_corpus(), creat_rev_corpus_idx(), creat_sort_lexicon(), TMblob::data, TComponent::data, TComponent::path, and _Attribute::type.
Referenced by ensure_component(), and make_component().
Component* declare_component | ( | Attribute * | attribute, | |
ComponentID | cid, | |||
char * | path | |||
) |
Sets up a component for the given attribute.
If the component of the specified ComponentID does not already exist, a new Component object is created, set up, and assigned to the attribute's component array. Finally, the component path is initialised using the path argument.
attribute | The Attribute for which to create this component. | |
cid | The ID of the component to create. | |
path | Path to be passed to component_full_name. Can be NULL. |
References _Attribute::any, TComponent::attribute, cid_name(), component_full_name(), TComponent::corpus, TComponent::data, TComponent::id, init_mblob(), and TComponent::path.
Referenced by component_full_name(), and declare_default_components().
void declare_default_components | ( | Attribute * | attribute | ) |
Sets up a default set of components on the given attribute.
Note that in each case, a call is made to declare_component with the path as NULL.
References _Attribute::any, CompDirectory, CompLast, declare_component(), _Attribute::type, and component_field_spec::using_atts.
void describe_attribute | ( | Attribute * | attribute | ) |
Prints a description of the attribute (inc.components) to STDOUT.
References aid_name(), _Attribute::any, argid_name(), Dynamic_Attribute::arglist, ATT_DYN, Dynamic_Attribute::call, CompDirectory, CompLast, describe_component(), _Attribute::dyn, _DynArg::next, Dynamic_Attribute::res_type, _DynArg::type, and _Attribute::type.
Referenced by describe_corpus().
void describe_component | ( | Component * | component | ) |
Prints a description of the component to STDOUT.
References _Attribute::any, TComponent::attribute, cid_name(), comp_component_state(), ComponentDefined, ComponentLoaded, ComponentUndefined, ComponentUnloaded, TComponent::id, and TComponent::path.
Referenced by describe_attribute().
int drop_attribute | ( | Corpus * | corpus, | |
char * | attribute_name, | |||
int | type, | |||
char * | data | |||
) |
Drops an attribute for the given corpus.
The attribute to be dropped is specified by its attribute name and its type (i.e. no pointer needed: compare attr_drop_attribute).
After calling this, the corpus does not have the attribute any longer -- it is the same as it was never defined.
References attr_drop_attribute(), and find_attribute().
int drop_component | ( | Attribute * | attribute, | |
ComponentID | cid | |||
) |
Drops the specified component for the given Attribute.
attribute | The Attribute object to work with. | |
cid | The identifier of the Component to drop. |
References _Attribute::any, and comp_drop_component().
Referenced by main().
Component* ensure_component | ( | Attribute * | attribute, | |
ComponentID | cid, | |||
int | try_creation | |||
) |
Ensures that a component is loaded and ready.
The state of the component specified should be ComponentLoaded once this function has run (assuming all is well). If the component is unloaded, the function will try to load it. If the component is defined, the function MAY try to create it. If the component is undefined, nothing will be done.
There are flags in attributes.c that control the behaviour of this function (e.g. if failure to ensure causes the program to abort).
attribute | The Attribute object to work with. | |
cid | The identifier of the Component to "ensure". | |
try_creation | Boolean. True = attempt to create a component that does not exist. False = don't. This behaviour only applies when ALLOW_COMPONENT CREATION is defined; otherwise component creation will never be attempted. |
References _Attribute::any, cid_name(), comp_component_state(), ComponentDefined, ComponentLoaded, ComponentUndefined, ComponentUnloaded, create_component(), and load_component().
Referenced by cl_alg2cpos(), cl_cpos2alg(), cl_max_alg(), collect_matches(), collect_matching_ids(), compress_reversed_index(), compute_code_lengths(), creat_freqs(), creat_rev_corpus(), creat_rev_corpus_idx(), creat_sort_lexicon(), get_alg_attribute(), get_attribute_size(), get_bounds_of_nth_struc(), get_id_at_position(), get_id_frequency(), get_id_from_sortidx(), get_id_of_string(), get_id_range(), get_id_string_len(), get_nr_of_strucs(), get_num_of_struc(), get_positions(), get_sortidxpos_of_id(), get_string_of_id(), get_struc_attribute(), OpenPositionStream(), structure_value(), and validate_revcorp().
Finds an attribute that matches the specified parameters, if one exists.
corpus | The corpus in which to search for the attribute. | |
attribute_name | The name of the attribute (i.e. the handle it has in the registry file) | |
type | Type of attribute to be searched for. | |
data | NOT USED. |
References _Attribute::any, TCorpus::attributes, STREQ, and _Attribute::type.
Referenced by compute_grouping(), ComputePrintStructures(), do_attribute_show(), do_Description(), do_IDReference(), do_LabelReference(), do_SimpleVariableReference(), do_StringConstraint(), do_StructuralContext(), drop_attribute(), evaluate_target(), findcorpus(), FunctionCall(), get_matched_corpus_positions(), main(), prepare_AlignmentConstraints(), printAlignedStrings(), read_mapping(), RecomputeAL(), red_factor(), Setop(), setup_attribute(), SortSubcorpus(), SystemCorpusSize(), update_context_descriptor(), verify_context_descriptor(), and VerifyList().
struct component_field_spec* find_cid_id | ( | ComponentID | id | ) | [read] |
Gets the specification for the identified component field.
This function returns a pointer to an element of the global, static Component_Field_Specs array.
id | The ComponentID for the component field to be looked up. |
References CompLast.
Referenced by cid_name(), component_full_name(), and MayHaveComponent().
struct component_field_spec* find_cid_name | ( | char * | name | ) | [read] |
Gets the specification for the named component field.
This function returns a pointer to an element of the global, static Component_Field_Specs array.
name | A string that identifies the component field to be looked up. |
References CompLast.
Referenced by component_full_name(), and component_id().
Component* find_component | ( | Attribute * | attribute, | |
ComponentID | cid | |||
) |
Gets a pointer to the specified component for the given Attribute.
References _Attribute::any.
Referenced by creat_freqs().
Get a pointer to the head entry in the specified corpus's list of attributes.
References TCorpus::attributes.
Referenced by send_cqi_corpus_attributes().
Component* load_component | ( | Attribute * | attribute, | |
ComponentID | cid | |||
) |
Loads the specified component for this attribute.
"Loading" means that the file specified by the component's "path" member is read into the "data" member.
If the component is CompHuffCodes, the data is also copied to the attribute's pos.hc member.
Note that the action of this function is dependent on the component's state. If the component's state is ComponentUnloaded, the component is loaded. If the component's state is ComponentDefined, the size is set to 0 and nothing else is done.
attribute | The Attribute object to work with. | |
cid | The identifier of the Component to load. |
References aid_name(), _Attribute::any, cid_name(), comp_component_state(), CompDirectory, CompHuffCodes, CompLast, ComponentDefined, ComponentLoaded, ComponentUnloaded, TMblob::data, TComponent::data, POS_Attribute::hc, item_sequence_is_compressed(), _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, MMAPPED, TMblob::nr_items, TComponent::path, _Attribute::pos, read_file_into_blob(), TComponent::size, _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, and _Attribute::type.
Referenced by ensure_component().
DynArg* makearg | ( | char * | type_id | ) |
Creates a DynArg object.
The object created is a dynamic argument of the type specified by the argument type_id, with its "next" pointer set to NULL.
type_id | String specifying the type of argument required, choose from: STRING, POS, INT, VARARG, FLOAT |
References ATTAT_FLOAT, ATTAT_INT, ATTAT_POS, ATTAT_STRING, ATTAT_VAR, _DynArg::next, and _DynArg::type.
int MayHaveComponent | ( | int | attr_type, | |
ComponentID | cid | |||
) |
Checks whether a particular Attribute type can possess the specified component field.
References CompLast, find_cid_id(), component_field_spec::id, and component_field_spec::using_atts.
Attribute* next_corpus_attribute | ( | ) |
Get a pointer to the next attribute on the list currently being processed.
References _Attribute::any.
Referenced by send_cqi_corpus_attributes().
Sets up a corpus attribute.
NEVER CALL THIS!! ONLY USED WHILE PARSING A REGISTRY ENTRY!!!!
corpus | The corpus this attribute belongs to. | |
attribute_name | The name of the attribute (i.e. the handle it has in the registry file) | |
type | Type of attribute to be created. | |
data | Used for a call to find_attribute. It is unused there. |
References aid_name(), _Attribute::any, ATT_POS, ATT_STRUC, ATTAT_POS, TCorpus::attributes, CompDirectory, CompLast, DEFAULT_ATT_NAME, find_attribute(), Struc_Attribute::has_attribute_values, POS_Attribute::hc, TCorpus::id, _Attribute::pos, _Attribute::struc, POS_Attribute::this_block_nr, and _Attribute::type.
struct component_field_spec Component_Field_Specs[] [static] |
{ { CompDirectory, "DIR", ATT_ALL, "$APATH"}, { CompCorpus, "CORPUS", ATT_POS, "$DIR/$ANAME.corpus"}, { CompRevCorpus, "REVCORP", ATT_POS, "$CORPUS.rev"}, { CompRevCorpusIdx, "REVCIDX", ATT_POS, "$CORPUS.rdx"}, { CompCorpusFreqs, "FREQS", ATT_POS, "$CORPUS.cnt"}, { CompLexicon, "LEXICON", ATT_POS, "$DIR/$ANAME.lexicon"}, { CompLexiconIdx, "LEXIDX", ATT_POS, "$LEXICON.idx"}, { CompLexiconSrt, "LEXSRT", ATT_POS, "$LEXICON.srt"}, { CompAlignData, "ALIGN", ATT_ALIGN, "$DIR/$ANAME.alg"}, { CompXAlignData, "XALIGN", ATT_ALIGN, "$DIR/$ANAME.alx"}, { CompStrucData, "STRUC", ATT_STRUC, "$DIR/$ANAME.rng"}, { CompStrucAVS, "STRAVS", ATT_STRUC, "$DIR/$ANAME.avs"}, { CompStrucAVX, "STRAVX", ATT_STRUC, "$DIR/$ANAME.avx"}, { CompHuffSeq, "CIS", ATT_POS, "$DIR/$ANAME.huf"}, { CompHuffCodes, "CISCODE", ATT_POS, "$DIR/$ANAME.hcd"}, { CompHuffSync, "CISSYNC", ATT_POS, "$CIS.syn"}, { CompCompRF, "CRC", ATT_POS, "$DIR/$ANAME.crc"}, { CompCompRFX, "CRCIDX", ATT_POS, "$DIR/$ANAME.crx"}, { CompLast, "INVALID", 0, "INVALID"} }
Global object in the "attributes" module, giving specifications for each component in the array of components that each Attribute object contains.
Non-exported variable: accessed via the attribute-looping functions.