#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "../cl/fileutils.h"
#include "corpmanag.h"
#include "cqp.h"
#include "options.h"
#include "output.h"
#include "ranges.h"
#include "paths.h"
#define COLON ':' |
Referenced by findcorpus(), is_qualified(), load_corpusnames(), and split_subcorpus_name().
#define SLASH '^' |
Referenced by findcorpus().
#define subcorpload_debug False |
Referenced by attach_subcorpus().
#define SUBCORPMAGIC 36193928 |
magic number for {?? subcorpus files}
Referenced by attach_subcorpus(), check_stamp(), save_subcorpus(), and show_subcorpus_info().
Boolean access_corpus | ( | CorpusList * | cl | ) |
References attach_subcorpus(), False, cl::loaded, cl::range, cl::saved, cl::size, SUB, SYSTEM, TEMP, True, and cl::type.
Referenced by catalog_corpus(), change_corpus(), CorpusLoad(), cqi_find_corpus(), cqi_lookup_attribute(), do_cqi_corpus_attributes(), do_cqi_corpus_full_name(), findcorpus(), prepare_AlignmentConstraints(), prepare_Query(), red_factor(), Setop(), SortSubcorpus(), and SortSubcorpusRandomize().
static Boolean accessible | ( | char * | dir, | |
char * | file | |||
) | [static] |
Tests whether a file is accessible.
A file is considered accessible iff user can read it and it is not a (sub)directory.
This test is used for registry entries.
dir | Directory in which the file is to be found. | |
file | The filename to check. |
References cl_malloc(), False, and True.
Referenced by load_corpusnames().
CorpusList* assign_temp_to_sub | ( | CorpusList * | tmp, | |
char * | subname | |||
) |
References cl::abs_fn, auto_save, cl_free, cl_strdup(), cl::corpus, dropcorpus(), False, findcorpus(), initialize_cl(), cl::keywords, cl::loaded, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, cl::query_corpus, cl::query_text, cl::range, cl::registry, save_subcorpus(), cl::saved, cl::size, cl::sortidx, SUB, cl::targets, TEMP, True, cl::type, and UNDEF.
Referenced by CorpusChangeTMPtoSUB(), do_undump(), and in_UnnamedCorpusCommand().
static Boolean attach_subcorpus | ( | CorpusList * | cl, | |
char * | advertised_directory, | |||
char * | advertised_filename | |||
) | [static] |
References cl::abs_fn, cl_free, cl_malloc(), cl_strdup(), cl::corpus, cqpmessage(), dropcorpus(), ensure_syscorpus(), False, file_length(), get_fulllocalpath(), initialize_cl(), cl::keywords, cl::loaded, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, OpenFile(), cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, SUB, subcorpload_debug, SUBCORPMAGIC, cl::targets, TEMP, True, cl::type, and Warning.
Referenced by access_corpus(), and ensure_corpus_size().
References access_corpus(), False, cl::name, search_corpus(), set_current_corpus(), and True.
static char* changecase_string | ( | char * | str, | |
enum case_mode | mode | |||
) | [static] |
References cl_strdup(), and LOWER.
Referenced by get_fulllocalpath(), and GetSystemCorpus().
static char* changecase_string_no_copy | ( | char * | str, | |
enum case_mode | mode | |||
) | [static] |
References LOWER.
Referenced by load_corpusnames().
void check_available_corpora | ( | enum corpus_type | ct | ) |
References load_corpusnames(), LOCAL_CORP_PATH, set_current_corpus(), SUB, SYSTEM, TEMP, and UNDEF.
Referenced by CorpusLoadDescriptors(), execute_side_effects(), and initialize_cqp().
int check_stamp | ( | char * | directory, | |
char * | fname | |||
) |
References OpenFile(), and SUBCORPMAGIC.
CorpusList* CorpusChangeTMPtoSUB | ( | CorpusList * | tmp, | |
char * | subname | |||
) |
References assign_temp_to_sub().
Boolean CorpusDiscard | ( | CorpusList * | cl, | |
Boolean | remove_file_also, | |||
Boolean | save_if_unsaved | |||
) |
References dropcorpus(), and True.
Boolean CorpusDiscardTMPCorpora | ( | void | ) |
References drop_temp_corpora(), and True.
CorpusList* CorpusDuplicate | ( | CorpusList * | cl, | |
char * | new_name, | |||
Boolean | force_overwrite | |||
) |
References duplicate_corpus().
CorpusList* CorpusDuplicateIntoTMP | ( | CorpusList * | cl, | |
char * | new_name | |||
) |
References make_temp_corpus().
void CorpusListFree | ( | void | ) |
References free_corpuslist().
void CorpusListInit | ( | void | ) |
References init_corpuslist().
Boolean CorpusLoad | ( | CorpusList * | cl | ) |
References access_corpus().
void CorpusLoadDescriptors | ( | CorpusType | ct | ) |
References check_available_corpora().
Boolean CorpusNameQualified | ( | char * | name | ) |
References is_qualified().
Boolean CorpusNameValid | ( | char * | name | ) |
References valid_subcorpus_id().
Boolean CorpusSave | ( | CorpusList * | cl, | |
char * | file_name | |||
) |
References save_subcorpus().
Boolean CorpusSaveAll | ( | void | ) |
References save_unsaved_subcorpora(), and True.
Boolean CorpusSetCurrent | ( | CorpusList * | cl | ) |
References set_current_corpus().
Boolean CorpusSetCurrentByname | ( | char * | name | ) |
References set_current_corpus_name().
void CorpusShowNames | ( | CorpusType | ct | ) |
References show_corpora_files().
Boolean CorpusTouch | ( | CorpusList * | cl | ) |
References touch_corpus().
void drop_temp_corpora | ( | void | ) |
References dropcorpus(), initialize_cl(), cl::next, TEMP, True, and cl::type.
Referenced by CorpusDiscardTMPCorpora(), do_undump(), in_UnnamedCorpusCommand(), and load_corpusnames().
void dropcorpus | ( | CorpusList * | cl | ) |
References current_corpus, initialize_cl(), cl::next, set_current_corpus(), and True.
Referenced by assign_temp_to_sub(), attach_subcorpus(), copy_intervals(), CorpusDiscard(), do_cqi_cqp_drop_subcorpus(), drop_temp_corpora(), ensure_corpus_size(), and main().
CorpusList* duplicate_corpus | ( | CorpusList * | cl, | |
char * | new_name, | |||
Boolean | force_overwrite | |||
) |
References cl::abs_fn, auto_save, cl_malloc(), cl_strdup(), cl::corpus, cqpmessage(), False, initialize_cl(), cl::keywords, cl::loaded, LoadedCorpus(), cl::mother_name, cl::mother_size, cl::name, cl::needs_update, NewCL(), cl::next, cl::query_corpus, cl::query_text, cl::range, cl::registry, save_subcorpus(), cl::saved, cl::size, cl::sortidx, SUB, SYSTEM, cl::targets, True, cl::type, and Warning.
Referenced by copy_intervals(), CorpusDuplicate(), findcorpus(), and in_CorpusCommand().
Boolean ensure_corpus_size | ( | CorpusList * | cl | ) |
This is an internal function used to ensure that a system corpus from the corpus list is accessible and that its size has been computed.
In case of subcorpora, this function implements delayed loading. It is necessary because of a hack that prevents CQP from determining the sizes of all know corpora at start-up (which caused annoying delays if one or more corpora are not accessible) and from reading all subcorpora in the local corpus directory (which caused a number of delays and crashes with MP templates). ensure_corpus_size() is needed by findcorpus() and ensure_syscorpus() at the very least. It may be needed in other places to keep CQP from crashing.
cl | The corpus whose accessibility is to be checked. |
References attach_subcorpus(), cderrno, cdperror_string(), cl::corpus, cqpmessage(), dropcorpus(), False, cl::loaded, cl::local_dir, cl::mother_name, cl::mother_size, cl::name, cl::range, SUB, SYSTEM, SystemCorpusSize(), True, cl::type, user_level, and Warning.
Referenced by ensure_syscorpus(), and findcorpus().
CorpusList* ensure_syscorpus | ( | char * | registry, | |
char * | name | |||
) |
References ensure_corpus_size(), GetSystemCorpus(), LoadedCorpus(), cl::next, and SYSTEM.
Referenced by attach_subcorpus().
FieldType field_name_to_type | ( | char * | name | ) |
References KeywordField, MatchEndField, MatchField, NoField, and TargetField.
Referenced by do_cqi_cqp_fdist_1(), do_cqi_cqp_fdist_2(), and labellookup().
char* field_type_to_name | ( | FieldType | ft | ) |
References cqpmessage(), Error, KeywordField, MatchEndField, MatchField, NoField, and TargetField.
Referenced by do_AnchorPoint(), and prepare_do_subset().
CorpusList* findcorpus | ( | char * | s, | |
CorpusType | type, | |||
int | try_recursive_search | |||
) |
References access_corpus(), ATT_STRUC, ctxtsp::attrib, calculate_leftboundary(), calculate_rightboundary(), COLON, cl::corpus, cqpmessage(), ctxtsp::direction, duplicate_corpus(), ensure_corpus_size(), expansion, False, find_attribute(), left, leftright, LoadedCorpus(), cl::mother_name, cl::range, RangeSetop(), right, RUniq, cl::size, ctxtsp::size, SLASH, structure, SYSTEM, touch_corpus(), ctxtsp::type, and Warning.
Referenced by assign_temp_to_sub(), copy_intervals(), corpus_info(), cqi_find_corpus(), cqi_lookup_attribute(), do_cqi_corpus_attributes(), do_cqi_corpus_full_name(), do_undump(), make_temp_corpus(), prepare_AlignmentConstraints(), search_corpus(), set_current_corpus_name(), and valid_subcorpus_id().
CorpusList* FirstCorpusFromList | ( | ) |
Gets the CorpusList pointer for the first corpus on the currently-loaded list.
Function for iterating through the list of currently-loaded corpora.
Referenced by do_cqi_corpus_list_corpora(), do_cqi_cqp_list_subcorpora(), and main().
void free_corpuslist | ( | void | ) |
Frees the global list of currently-loaded corpora.
This function sets the corpus list to NULL and frees all members of the list.
References initialize_cl(), cl::next, set_current_corpus(), and True.
Referenced by CorpusListFree().
static char* get_fulllocalpath | ( | CorpusList * | cl, | |
int | qualify | |||
) | [static] |
References changecase_string(), cl_free, cl_strdup(), LOCAL_CORP_PATH, cl::mother_name, cl::name, and UPPER.
Referenced by attach_subcorpus().
CorpusList * GetSystemCorpus | ( | char * | name, | |
char * | registry | |||
) |
References cl::abs_fn, changecase_string(), cl_strdup(), cl::corpus, False, cl::keywords, cl::loaded, LOWER, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, New, NewCL(), cl::next, cl::range, cl::registry, TCorpus::registry_dir, cl::saved, setup_corpus(), cl::size, cl::sortidx, SYSTEM, cl::targets, True, and cl::type.
Referenced by ensure_syscorpus(), and load_corpusnames().
void init_corpuslist | ( | void | ) |
Initialises the global corpus list (sets it to NULL, no matter what its value was).
References set_current_corpus().
Referenced by CorpusListInit().
void initialize_cl | ( | CorpusList * | cl, | |
int | free_name | |||
) |
Resets to empty a CorpusList object.
This is done, largely, by freeing all its members (and setting nonfreeable members to 0 or NULL)...
cl | The corpus list to initialise. | |
free_name | Boolean: the name, mother_name and mother_sizemembers will be cleared iff free_name. |
References cl::abs_fn, cl::cd, cl_free, cl::corpus, False, FreeContextDescriptor(), cl::keywords, cl::loaded, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, cl::query_corpus, cl::query_text, cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, cl::targets, cl::type, and UNDEF.
Referenced by assign_temp_to_sub(), attach_subcorpus(), drop_temp_corpora(), dropcorpus(), duplicate_corpus(), free_corpuslist(), and make_temp_corpus().
Boolean is_qualified | ( | char * | corpusname | ) |
References COLON.
Referenced by CorpusNameQualified(), do_undump(), and in_CorpusCommand().
void load_corpusnames | ( | enum corpus_type | ct | ) |
References accessible(), central_corpus_directory(), changecase_string_no_copy(), cl_strdup(), COLON, corpus, cqpmessage(), drop_temp_corpora(), False, get_path_component(), GetSystemCorpus(), cl::loaded, LoadedCorpus(), LOCAL_CORP_PATH, cl::local_dir, MAX_LINE_LENGTH, cl::mother_name, cl::name, cl::needs_update, NewCL(), cl::next, registry, cl::saved, silent, SUB, SYSTEM, TEMP, True, cl::type, UPPER, and Warning.
Referenced by check_available_corpora().
CorpusList* LoadedCorpus | ( | char * | name, | |
char * | qualifier, | |||
CorpusType | type | |||
) |
Finds a loaded corpus.
This function tries to find the corpus with name 'name' in the list of currently loaded corpora. In case of subcorpora, qualifier is the mother's name. in case of system corpora, qualifier is the registry. If qualifier is NULL, it is neglected and the first matching corpus is returned. If type is not UNDEF, only corpora of that type are returned. No side effects take place.
name | The corpus we are lookign for. | |
qualifier | An extra "bit" of the corpus name (see function description). | |
type | Which type of corpus is wanted (may be UNDEF). |
References current_corpus, cl::mother_name, cl::name, cl::next, cl::registry, STREQ, SUB, SYSTEM, TEMP, cl::type, and UNDEF.
Referenced by duplicate_corpus(), ensure_syscorpus(), findcorpus(), and load_corpusnames().
CorpusList* make_temp_corpus | ( | CorpusList * | cl, | |
char * | new_name | |||
) |
References cl::abs_fn, cl_malloc(), cl_strdup(), cl::corpus, False, findcorpus(), initialize_cl(), cl::keywords, cl::loaded, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, NewCL(), cl::next, cl::query_corpus, cl::query_text, cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, cl::targets, TEMP, True, and cl::type.
Referenced by CorpusDuplicateIntoTMP(), do_setop(), do_undump(), in_UnnamedCorpusCommand(), prepare_do_subset(), and prepare_Query().
CorpusList* NewCL | ( | void | ) |
Creates a new CorpusList object.
References cl::abs_fn, cl::cd, cl::corpus, False, cl::keywords, cl::loaded, cl::local_dir, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, New, cl::next, cl::query_corpus, cl::query_text, cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, cl::targets, cl::type, and UNDEF.
Referenced by duplicate_corpus(), GetSystemCorpus(), load_corpusnames(), and make_temp_corpus().
CorpusList* NextCorpusFromList | ( | CorpusList * | cl | ) |
Gets the CorpusList pointer for the next corpus on the currently-loaded list.
Function for iterating through the list of currently-loaded corpora.
cl | The current corpus on the list. |
References cl::next.
Referenced by do_cqi_corpus_list_corpora(), do_cqi_cqp_list_subcorpora(), and main().
int NrFieldValues | ( | CorpusList * | cl, | |
FieldType | ft | |||
) |
References KeywordField, cl::keywords, MatchField, NoField, cl::size, TargetField, and cl::targets.
Boolean save_subcorpus | ( | CorpusList * | cl, | |
char * | fname | |||
) |
References cl::abs_fn, cqpmessage(), False, cl::keywords, cl::loaded, LOCAL_CORP_PATH, cl::mother_name, cl::name, cl::needs_update, OpenFile(), cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, SUB, SUBCORPMAGIC, cl::targets, True, cl::type, and Warning.
Referenced by after_CorpusCommand(), assign_temp_to_sub(), copy_intervals(), CorpusSave(), delete_intervals(), do_save(), duplicate_corpus(), and save_unsaved_subcorpora().
void save_unsaved_subcorpora | ( | ) |
References cqpmessage(), False, LOCAL_CORP_PATH, cl::next, save_subcorpus(), cl::saved, SUB, cl::type, and Warning.
Referenced by CorpusSaveAll(), and cqp_parse_file().
CorpusList* search_corpus | ( | char * | name | ) |
References findcorpus(), SUB, and SYSTEM.
Referenced by change_corpus().
int set_current_corpus | ( | CorpusList * | cp, | |
int | force | |||
) |
Sets the current corpus (by pointer to the corpus).
Also, execustes Xkwic side effects, if necessary
cp | Pointer to the corpus to set as current. cp may be NULL, which is legal. | |
force | If true, the current corpus is set to the specified corpus, even if it is ALREADY set to that corpus. |
References _context_description_block::attributes, CD, cl::corpus, current_corpus, DEFAULT_ATT_NAME, DestroyAttributeList(), FindInAL(), _attlist::list, _attrbuf::next, _attrbuf::status, _context_description_block::strucAttributes, and update_context_descriptor().
Referenced by after_CorpusCommand(), change_corpus(), check_available_corpora(), CorpusSetCurrent(), cqi_activate_corpus(), dropcorpus(), free_corpuslist(), init_corpuslist(), and set_current_corpus_name().
int set_current_corpus_name | ( | char * | name, | |
int | force | |||
) |
Sets the current corpus (by name).
Also, execustes Xkwic side effects, if necessary.
name | Name of the corpus to set as current. | |
force | If true, the current corpus is set to the specified corpus, even if it is ALREADY set to that corpus. |
References findcorpus(), set_current_corpus(), and UNDEF.
Referenced by CorpusSetCurrentByname(), and initialize_cqp().
void show_corpora_files | ( | enum corpus_type | ct | ) |
References show_corpora_files1(), SUB, SYSTEM, and UNDEF.
Referenced by CorpusShowNames().
void show_corpora_files1 | ( | enum corpus_type | ct | ) |
References cl_malloc(), end_indented_list(), cl::loaded, cl::mother_name, cl::name, cl::needs_update, cl::next, pretty_print, print_indented_list_br(), print_indented_list_item(), cl::saved, show_corpora_files_sort(), cl::size, start_indented_list(), SUB, SYSTEM, and cl::type.
Referenced by show_corpora_files().
static int show_corpora_files_sort | ( | const void * | p1, | |
const void * | p2 | |||
) | [static] |
Internal function for sorting list of corpus names.
Referenced by show_corpora_files1().
char* split_subcorpus_name | ( | char * | corpusname, | |
char * | mother_name | |||
) |
Splits a query result corpus-name into qualifier and local name.
This function splits query result name {corpusname} into qualifier (name of mother corpus) and local name; returns pointer to local name part, or NULL if {corpusname} is not syntactically valid; if mother_name is not NULL, it must point to a buffer of suitable length (MAX_LINE_LENGTH is sufficient) where the qualifier will be stored (empty string for unqualified corpus, and return value == {corpusname} in this case)
References COLON.
Referenced by do_undump(), and valid_subcorpus_name().
int SystemCorpusSize | ( | Corpus * | corpus | ) |
References ATT_POS, DEFAULT_ATT_NAME, find_attribute(), and get_attribute_size().
Referenced by ensure_corpus_size().
Boolean touch_corpus | ( | CorpusList * | cp | ) |
Touches a corpus, ie, marks it as changed.
cp | The corpus to touch. This must be of type SUB. |
References False, cl::needs_update, cl::saved, SUB, True, and cl::type.
Referenced by CorpusTouch(), delete_intervals(), do_cut(), evaluate_target(), findcorpus(), RangeSetop(), set_target(), SortSubcorpus(), and SortSubcorpusRandomize().
Boolean valid_subcorpus_id | ( | char * | corpusname | ) |
References False, findcorpus(), SYSTEM, and True.
Referenced by CorpusNameValid().
Boolean valid_subcorpus_name | ( | char * | corpusname | ) |
References False, split_subcorpus_name(), and True.
Referenced by do_undump().
Global list of currently-loaded corpora.
Referenced by initialize_cqp().