corpmanag.c File Reference

#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "../cl/fileutils.h"
#include "corpmanag.h"
#include "cqp.h"
#include "options.h"
#include "output.h"
#include "ranges.h"
#include "paths.h"

Defines

Functions

Variables


Define Documentation

#define COLON   ':'
#define SLASH   '^'

Referenced by findcorpus().

#define subcorpload_debug   False

Referenced by attach_subcorpus().

#define SUBCORPMAGIC   36193928

magic number for {?? subcorpus files}

Referenced by attach_subcorpus(), check_stamp(), save_subcorpus(), and show_subcorpus_info().


Function Documentation

Boolean access_corpus ( CorpusList cl  ) 
static Boolean accessible ( char *  dir,
char *  file 
) [static]

Tests whether a file is accessible.

A file is considered accessible iff user can read it and it is not a (sub)directory.

This test is used for registry entries.

Parameters:
dir Directory in which the file is to be found.
file The filename to check.
Returns:
Boolean: true iff file is accessible.

References cl_malloc(), False, and True.

Referenced by load_corpusnames().

CorpusList* assign_temp_to_sub ( CorpusList tmp,
char *  subname 
)
static Boolean attach_subcorpus ( CorpusList cl,
char *  advertised_directory,
char *  advertised_filename 
) [static]
Boolean change_corpus ( char *  name,
Boolean  silent 
)
static char* changecase_string ( char *  str,
enum case_mode  mode 
) [static]

References cl_strdup(), and LOWER.

Referenced by get_fulllocalpath(), and GetSystemCorpus().

static char* changecase_string_no_copy ( char *  str,
enum case_mode  mode 
) [static]

References LOWER.

Referenced by load_corpusnames().

void check_available_corpora ( enum corpus_type  ct  ) 
int check_stamp ( char *  directory,
char *  fname 
)

References OpenFile(), and SUBCORPMAGIC.

CorpusList* CorpusChangeTMPtoSUB ( CorpusList tmp,
char *  subname 
)

References assign_temp_to_sub().

Boolean CorpusDiscard ( CorpusList cl,
Boolean  remove_file_also,
Boolean  save_if_unsaved 
)

References dropcorpus(), and True.

Boolean CorpusDiscardTMPCorpora ( void   ) 

References drop_temp_corpora(), and True.

CorpusList* CorpusDuplicate ( CorpusList cl,
char *  new_name,
Boolean  force_overwrite 
)

References duplicate_corpus().

CorpusList* CorpusDuplicateIntoTMP ( CorpusList cl,
char *  new_name 
)

References make_temp_corpus().

void CorpusListFree ( void   ) 

References free_corpuslist().

void CorpusListInit ( void   ) 

References init_corpuslist().

Boolean CorpusLoad ( CorpusList cl  ) 

References access_corpus().

void CorpusLoadDescriptors ( CorpusType  ct  ) 
Boolean CorpusNameQualified ( char *  name  ) 

References is_qualified().

Boolean CorpusNameValid ( char *  name  ) 

References valid_subcorpus_id().

Boolean CorpusSave ( CorpusList cl,
char *  file_name 
)

References save_subcorpus().

Boolean CorpusSaveAll ( void   ) 

References save_unsaved_subcorpora(), and True.

Boolean CorpusSetCurrent ( CorpusList cl  ) 

References set_current_corpus().

Boolean CorpusSetCurrentByname ( char *  name  ) 
void CorpusShowNames ( CorpusType  ct  ) 

References show_corpora_files().

Boolean CorpusTouch ( CorpusList cl  ) 

References touch_corpus().

void drop_temp_corpora ( void   ) 
void dropcorpus ( CorpusList cl  ) 
CorpusList* duplicate_corpus ( CorpusList cl,
char *  new_name,
Boolean  force_overwrite 
)
Boolean ensure_corpus_size ( CorpusList cl  ) 

This is an internal function used to ensure that a system corpus from the corpus list is accessible and that its size has been computed.

In case of subcorpora, this function implements delayed loading. It is necessary because of a hack that prevents CQP from determining the sizes of all know corpora at start-up (which caused annoying delays if one or more corpora are not accessible) and from reading all subcorpora in the local corpus directory (which caused a number of delays and crashes with MP templates). ensure_corpus_size() is needed by findcorpus() and ensure_syscorpus() at the very least. It may be needed in other places to keep CQP from crashing.

Parameters:
cl The corpus whose accessibility is to be checked.
Returns:
Boolean: true if access is OK.

References attach_subcorpus(), cderrno, cdperror_string(), cl::corpus, cqpmessage(), dropcorpus(), False, cl::loaded, cl::local_dir, cl::mother_name, cl::mother_size, cl::name, cl::range, SUB, SYSTEM, SystemCorpusSize(), True, cl::type, user_level, and Warning.

Referenced by ensure_syscorpus(), and findcorpus().

CorpusList* ensure_syscorpus ( char *  registry,
char *  name 
)
FieldType field_name_to_type ( char *  name  ) 
char* field_type_to_name ( FieldType  ft  ) 
CorpusList* findcorpus ( char *  s,
CorpusType  type,
int  try_recursive_search 
)
CorpusList* FirstCorpusFromList (  ) 

Gets the CorpusList pointer for the first corpus on the currently-loaded list.

Function for iterating through the list of currently-loaded corpora.

Returns:
The requested CorpusList pointer.

Referenced by do_cqi_corpus_list_corpora(), do_cqi_cqp_list_subcorpora(), and main().

void free_corpuslist ( void   ) 

Frees the global list of currently-loaded corpora.

This function sets the corpus list to NULL and frees all members of the list.

References initialize_cl(), cl::next, set_current_corpus(), and True.

Referenced by CorpusListFree().

static char* get_fulllocalpath ( CorpusList cl,
int  qualify 
) [static]
CorpusList * GetSystemCorpus ( char *  name,
char *  registry 
)
void init_corpuslist ( void   ) 

Initialises the global corpus list (sets it to NULL, no matter what its value was).

References set_current_corpus().

Referenced by CorpusListInit().

void initialize_cl ( CorpusList cl,
int  free_name 
)

Resets to empty a CorpusList object.

This is done, largely, by freeing all its members (and setting nonfreeable members to 0 or NULL)...

Parameters:
cl The corpus list to initialise.
free_name Boolean: the name, mother_name and mother_sizemembers will be cleared iff free_name.

References cl::abs_fn, cl::cd, cl_free, cl::corpus, False, FreeContextDescriptor(), cl::keywords, cl::loaded, cl::mother_name, cl::mother_size, cl::name, cl::needs_update, cl::query_corpus, cl::query_text, cl::range, cl::registry, cl::saved, cl::size, cl::sortidx, cl::targets, cl::type, and UNDEF.

Referenced by assign_temp_to_sub(), attach_subcorpus(), drop_temp_corpora(), dropcorpus(), duplicate_corpus(), free_corpuslist(), and make_temp_corpus().

Boolean is_qualified ( char *  corpusname  ) 

References COLON.

Referenced by CorpusNameQualified(), do_undump(), and in_CorpusCommand().

void load_corpusnames ( enum corpus_type  ct  ) 
CorpusList* LoadedCorpus ( char *  name,
char *  qualifier,
CorpusType  type 
)

Finds a loaded corpus.

This function tries to find the corpus with name 'name' in the list of currently loaded corpora. In case of subcorpora, qualifier is the mother's name. in case of system corpora, qualifier is the registry. If qualifier is NULL, it is neglected and the first matching corpus is returned. If type is not UNDEF, only corpora of that type are returned. No side effects take place.

Parameters:
name The corpus we are lookign for.
qualifier An extra "bit" of the corpus name (see function description).
type Which type of corpus is wanted (may be UNDEF).
Returns:
Pointer to the CorpusList of the corpus that was found.

References current_corpus, cl::mother_name, cl::name, cl::next, cl::registry, STREQ, SUB, SYSTEM, TEMP, cl::type, and UNDEF.

Referenced by duplicate_corpus(), ensure_syscorpus(), findcorpus(), and load_corpusnames().

CorpusList* make_temp_corpus ( CorpusList cl,
char *  new_name 
)
CorpusList* NewCL ( void   ) 
CorpusList* NextCorpusFromList ( CorpusList cl  ) 

Gets the CorpusList pointer for the next corpus on the currently-loaded list.

Function for iterating through the list of currently-loaded corpora.

Parameters:
cl The current corpus on the list.
Returns:
The requested CorpusList pointer.

References cl::next.

Referenced by do_cqi_corpus_list_corpora(), do_cqi_cqp_list_subcorpora(), and main().

int NrFieldValues ( CorpusList cl,
FieldType  ft 
)
Boolean save_subcorpus ( CorpusList cl,
char *  fname 
)
void save_unsaved_subcorpora (  ) 
CorpusList* search_corpus ( char *  name  ) 

References findcorpus(), SUB, and SYSTEM.

Referenced by change_corpus().

int set_current_corpus ( CorpusList cp,
int  force 
)

Sets the current corpus (by pointer to the corpus).

Also, execustes Xkwic side effects, if necessary

Parameters:
cp Pointer to the corpus to set as current. cp may be NULL, which is legal.
force If true, the current corpus is set to the specified corpus, even if it is ALREADY set to that corpus.
Returns:
Always 1.

References _context_description_block::attributes, CD, cl::corpus, current_corpus, DEFAULT_ATT_NAME, DestroyAttributeList(), FindInAL(), _attlist::list, _attrbuf::next, _attrbuf::status, _context_description_block::strucAttributes, and update_context_descriptor().

Referenced by after_CorpusCommand(), change_corpus(), check_available_corpora(), CorpusSetCurrent(), cqi_activate_corpus(), dropcorpus(), free_corpuslist(), init_corpuslist(), and set_current_corpus_name().

int set_current_corpus_name ( char *  name,
int  force 
)

Sets the current corpus (by name).

Also, execustes Xkwic side effects, if necessary.

Parameters:
name Name of the corpus to set as current.
force If true, the current corpus is set to the specified corpus, even if it is ALREADY set to that corpus.
Returns:
True if the corpus was found and set, otherwise false if the corpus could not be found.

References findcorpus(), set_current_corpus(), and UNDEF.

Referenced by CorpusSetCurrentByname(), and initialize_cqp().

void show_corpora_files ( enum corpus_type  ct  ) 

References show_corpora_files1(), SUB, SYSTEM, and UNDEF.

Referenced by CorpusShowNames().

void show_corpora_files1 ( enum corpus_type  ct  ) 
static int show_corpora_files_sort ( const void *  p1,
const void *  p2 
) [static]

Internal function for sorting list of corpus names.

Referenced by show_corpora_files1().

char* split_subcorpus_name ( char *  corpusname,
char *  mother_name 
)

Splits a query result corpus-name into qualifier and local name.

This function splits query result name {corpusname} into qualifier (name of mother corpus) and local name; returns pointer to local name part, or NULL if {corpusname} is not syntactically valid; if mother_name is not NULL, it must point to a buffer of suitable length (MAX_LINE_LENGTH is sufficient) where the qualifier will be stored (empty string for unqualified corpus, and return value == {corpusname} in this case)

References COLON.

Referenced by do_undump(), and valid_subcorpus_name().

int SystemCorpusSize ( Corpus corpus  ) 
Boolean touch_corpus ( CorpusList cp  ) 

Touches a corpus, ie, marks it as changed.

Parameters:
cp The corpus to touch. This must be of type SUB.
Returns:
Boolean: true if the touch worked, otherwise false.

References False, cl::needs_update, cl::saved, SUB, True, and cl::type.

Referenced by CorpusTouch(), delete_intervals(), do_cut(), evaluate_target(), findcorpus(), RangeSetop(), set_target(), SortSubcorpus(), and SortSubcorpusRandomize().

Boolean valid_subcorpus_id ( char *  corpusname  ) 

References False, findcorpus(), SYSTEM, and True.

Referenced by CorpusNameValid().

Boolean valid_subcorpus_name ( char *  corpusname  ) 

References False, split_subcorpus_name(), and True.

Referenced by do_undump().


Variable Documentation

Global list of currently-loaded corpora.

Referenced by initialize_cqp().


Generated on Sun Feb 28 18:08:04 2010 for CWB by  doxygen 1.6.1