#include <ctype.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <pwd.h>
#include <grp.h>
#include "globals.h"
#include "macros.h"
#include "attributes.h"
#include "registry.tab.h"
#include "corpus.h"
typedef struct _charset_spec charset_spec |
structure for the global list of charset names
void add_corpus_property | ( | Corpus * | corpus, | |
char * | property, | |||
char * | value | |||
) |
Adds a property to the list of corpus properties.
If the property is already defined, ignore and warn. If the property is 'charset', corpus charset is set as well.
corpus | Corpus object to add property to. | |
property | Name of property to add. | |
value | Value of property to add. |
References TCorpus::charset, charset, cl_charset_from_name(), cl_corpus_property(), cl_malloc(), cregin_name, cregin_path, TCorpusProperty::next, TCorpus::properties, TCorpusProperty::property, and TCorpusProperty::value.
char* central_corpus_directory | ( | ) |
Gets a string containing the path of the default registry directory.
References regdir, REGISTRY_DEFAULT_PATH, and REGISTRY_ENVVAR.
Referenced by load_corpusnames(), main(), and setup_corpus().
int check_access_conditions | ( | Corpus * | corpus, | |
int | verbose | |||
) |
Checks whether the corpus can be accessed.
If this corpus has access restriction in the form of a list of users, then this function checks if the current user is on that list.
Then ditto for the list of groups and current group; then ditto for the list of hosts and current host.
Note that this is currently disabled for users/groups. So, if either Corpus::userAccessList or Corpus::groupAccessList are changed from their initial (NULL) setting, this function will return false.
On the other hand, the function does work with hosts. If a list of allows hosts is set, this function will return true iff the current host is on that list.
corpus | The corpus. | |
verbose | A boolean. Currently ignored. |
References TCorpus::groupAccessList, TCorpus::hostAccessList, TCorpus::id, memberIDList(), TCorpus::name, _idbuf::next, passwd, _idbuf::string, and TCorpus::userAccessList.
Referenced by setup_corpus().
CorpusCharset cl_charset_from_name | ( | char * | name | ) |
Gets a CorpusCharset enumeration with the id code for the given string.
References _charset_spec::name, and unknown_charset.
Referenced by add_corpus_property(), and cwbci_parse_options().
char* cl_charset_name | ( | CorpusCharset | id | ) |
Gets a string containing the name of the specified CorpusCharset character set object.
Note that returned string cannot be modified.
References _charset_spec::name.
Referenced by corpus_info().
char* cl_charset_name_canonical | ( | char * | name_to_check | ) |
Checks whether a string represents a valid charset, and returns a pointer to the name in canonical form (ie lacking any non-standard case there may be in the input string).
name_to_check | String containing the character set name to be checked |
References _charset_spec::name.
Referenced by cwbci_parse_options(), and parse_options().
CorpusCharset cl_corpus_charset | ( | Corpus * | corpus | ) |
Retrieves the special 'charset' property.
corpus | The corpus object from which to retrieve the charset |
References TCorpus::charset.
Referenced by add_key(), and print_xml_declaration().
char* cl_corpus_property | ( | Corpus * | corpus, | |
char * | property | |||
) |
Gets the value of the specified corpus property.
corpus | Pointer to the Corpus object. | |
property | Name of the property to retrieve. |
References cl_first_corpus_property(), cl_next_corpus_property(), TCorpusProperty::property, and TCorpusProperty::value.
Referenced by add_corpus_property(), and corpus_info().
CorpusProperty cl_first_corpus_property | ( | Corpus * | corpus | ) |
Gets the first entry in this corpus's list of properties.
(The corpus properties iterator / property datatype is public.)
corpus | Pointer to the Corpus object. |
References TCorpus::properties.
Referenced by cl_corpus_property(), and corpus_info().
CorpusProperty cl_next_corpus_property | ( | CorpusProperty | prop | ) |
Gets the next corpus property on the list of properties.
(The corpus properties iterator / property datatype is public.)
prop | The current property. |
References TCorpusProperty::next.
Referenced by cl_corpus_property(), and corpus_info().
void creg_scan_string | ( | const char * | str | ) |
Function created in output from parsing registry.y (function only used by demo version).
void cregerror | ( | char * | message | ) |
Function created in output from parsing registry.y.
int cregparse | ( | ) |
Parse a corpus registry file.
The file to be parsed is specified by global variables cregin_path and cregin_name.
Function created in output from parsing registry.y
Referenced by setup_corpus().
void cregrestart | ( | FILE * | file | ) |
Restarts the registry file parse.
Function created in output from parsing registry.y
Referenced by setup_corpus().
void describe_corpus | ( | Corpus * | corpus | ) |
Prints a description of the corpus to STDOUT.
References _Attribute::any, TCorpus::attributes, describe_attribute(), TCorpus::id, TCorpus::info_file, TCorpus::name, TCorpus::path, TCorpus::registry_dir, and TCorpus::registry_name.
Referenced by main().
int drop_corpus | ( | Corpus * | corpus | ) |
Deletes a Corpus object.
corpus | The Corpus to delete |
References TCorpus::admin, attr_drop_attribute(), TCorpus::attributes, cl_free, FreeIDList(), TCorpus::groupAccessList, TCorpus::hostAccessList, TCorpus::id, TCorpus::info_file, TCorpus::name, TCorpus::next, TCorpus::nr_of_loads, TCorpus::path, TCorpus::registry_dir, TCorpus::registry_name, and TCorpus::userAccessList.
Referenced by cleanup(), main(), setup_corpus(), and usage().
Corpus* find_corpus | ( | char * | registry_dir, | |
char * | registry_name | |||
) |
Gets a pointer to the Corpus object with the specified CWB-name and registry location.
(Works by searching the loaded_corpora global linked list.)
registry_dir | The registry directory. | |
registry_name | The CWB name of the corpus. |
References cl_standard_registry, TCorpus::next, TCorpus::registry_dir, TCorpus::registry_name, and STREQ.
Referenced by setup_corpus().
FILE* find_corpus_registry | ( | char * | registry_dir, | |
char * | registry_name, | |||
char ** | real_registry_dir | |||
) |
Gets a file handle for the registry file of the corpus with the specified CWB-name and registry location.
registry_dir | The registry directory. | |
registry_name | The CWB name of the corpus. | |
real_registry_dir | This will be set to a pointer to the "real" name of the directory derived from the registry_dir parameter. |
References cl_malloc(), and MAX_LINE_LENGTH.
Referenced by setup_corpus().
void FreeIDList | ( | IDList * | list | ) |
Deletes an IDList object, and sets the argument pointer to NULL.
list | IDList to delete. |
References cl_free, _idbuf::next, and _idbuf::string.
Referenced by drop_corpus().
int memberIDList | ( | char * | s, | |
IDList | l | |||
) |
Checks whether the specified string occurs in the given IDList.
s | The username, groupname, or hostname to look for. | |
l | The IDList to search. |
References _idbuf::next, and _idbuf::string.
Referenced by check_access_conditions().
Corpus* setup_corpus | ( | char * | registry_dir, | |
char * | registry_name | |||
) |
Creates a Corpus object.
registry_dir | Path to the CWB registry directory from which the corpus is to be loaded. This may be NULL, in which case the default registry directory is used. | |
registry_name | The CWB-name of the indexed corpus to load (in the all-lowercase form) |
References central_corpus_directory(), check_access_conditions(), cl_free, cl_strdup(), cl_string_canonical(), corpus, cregin, cregin_name, cregin_path, cregparse(), cregrestart(), drop_corpus(), find_corpus(), find_corpus_registry(), TCorpus::id, IGNORE_CASE, TCorpus::next, TCorpus::nr_of_loads, TCorpus::registry_dir, and TCorpus::registry_name.
Referenced by GetSystemCorpus(), main(), and printAlignedStrings().
a list of charset names as strings linked to CorpusCharset enumerations
Pointer to a corpus object that is used when loading from the registry.
(External variable, defined in the output from parsing registry.y)
FILE* cregin |
File pointer for loading corpus registry.
(External variable, defined in the output from parsing registry.y)
Referenced by setup_corpus().
char* cregin_name = "" |
The name of registry file currently being parsed (for registry parser error messages).
Referenced by add_corpus_property(), and setup_corpus().
char* cregin_path = "" |
Full path of the registry file currently being parsed (for registry parser error messages).
Referenced by add_corpus_property(), and setup_corpus().
char errmsg[MAX_LINE_LENGTH] |
Buffer for an error message.
{Used in registry parser???}
Corpus* loaded_corpora = NULL |
Head of a linked list of loaded corpus handles (for memory manager).
char* regdir = NULL [static] |
The default registry directory.
It is initialised when the function that reads it is first called.
Referenced by central_corpus_directory().