#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <sys/types.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include <regex.h>
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "../cl/special-chars.h"
#include "cqp.h"
#include "ranges.h"
#include "options.h"
#include "tree.h"
#include "symtab.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "eval.h"
#include "builtins.h"
#include "output.h"
#include "matchlist.h"
#define no_match -1 |
Referenced by check_alignment_constraints().
#define RED_THRESHOLD 0.01 |
Referenced by matchfirstpattern().
Boolean calculate_initial_matchlist | ( | Constrainttree | ctptr, | |
Matchlist * | matchlist, | |||
CorpusList * | corpus | |||
) |
References calculate_initial_matchlist_1(), Complement, _Matchlist::is_inverted, mark_offrange_cells(), Reduce, and Setop().
Referenced by cqp_run_tab_query(), eval_mu_tree(), and matchfirstpattern().
Boolean calculate_initial_matchlist_1 | ( | Constrainttree | ctptr, | |
Matchlist * | matchlist, | |||
CorpusList * | corpus | |||
) |
References c_tree::attr, b_and, b_implies, b_not, b_or, bnode, c_tree::canon, CID, cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, collect_matches(), Complement, c_tree::constnode, cqpmessage(), c_tree::ctype, _Matchlist::end, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, free_matchlist(), func, get_corpus_positions(), get_matched_corpus_positions(), get_positions(), id_list, c_tree::idlist, init_matchlist(), Intersection, _Matchlist::is_inverted, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, left, mark_offrange_cells(), _Matchlist::matches_whole_corpus, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, c_tree::pa_ref, pa_ref, c_tree::pat_type, cl::range, Reduce, REGEXP, c_tree::right, right, sa_ref, Setop(), cl::size, _Matchlist::start, string_leaf, _Matchlist::tabsize, True, c_tree::type, Union, and c_tree::val.
Referenced by calculate_initial_matchlist().
int check_alignment_constraints | ( | Matchlist * | ml | ) |
References CDA_OK, cderrno, cl_alg2cpos(), cl_cpos2alg(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), evalenv::dfa, eep, _Matchlist::end, Environment, EvaluationIsRunning, free_matchlist(), Info, init_matchlist(), install_signal_handler(), dfa::Max_States, new_reftab(), no_match, simulate(), _Matchlist::start, _Matchlist::tabsize, and which_app.
Referenced by simulate_dfa().
void cqp_run_mu_query | ( | int | keep_old_ranges, | |
int | cut_value | |||
) |
References cl_malloc(), cqpmessage(), _Matchlist::end, Environment, Error, eval_mu_tree(), free_matchlist(), init_matchlist(), mark_offrange_cells(), Reduce, set_corpus_matchlists(), Setop(), _Matchlist::start, and _Matchlist::tabsize.
Referenced by do_MUQuery().
void cqp_run_query | ( | int | cut, | |
int | keep_old_ranges | |||
) |
References eep, hard_cut, and simulate_dfa().
Referenced by do_StandardQuery().
void cqp_run_tab_query | ( | int | implode | ) |
References calculate_initial_matchlist(), cl_malloc(), Environment, free_matchlist(), hard_boundary, init_matchlist(), mark_offrange_cells(), e_tree::next, e_tree::patindex, Reduce, repeat_inf, set_corpus_matchlists(), Setop(), e_tree::tab_el, tabular, and e_tree::type.
Referenced by do_TABQuery().
Boolean eval_bool | ( | Constrainttree | ctptr, | |
RefTab | rt, | |||
int | corppos | |||
) |
References ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, ATTAT_VAR, c_tree::attr, b_and, b_implies, b_not, b_or, bnode, CDA_OK, cderrno, _DCR::charres, cl_regex_match(), cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, c_tree::constnode, cqpmessage(), c_tree::delete, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, func, get_id_at_position(), get_label_referenced_position(), get_leaf_value(), get_string_of_id(), get_struc_attribute(), id_list, c_tree::idlist, int_leaf, intcompare(), _DCR::intres, c_tree::is_closing, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, pa_ref, _DCR::parefres, c_tree::pat_type, _label_entry::ref, REGEXP, c_tree::right, c_tree::rx, sa_ref, c_tree::sbound, sbound, set_reftab(), STREQ, string_leaf, c_tree::strucattr, True, _DCR::type, c_tree::type, c_tree::val, and _DCR::value.
Referenced by calculate_initial_matchlist_1(), eval_bool(), eval_constraint(), evaluate_subset(), evaluate_target(), and simulate().
References _avs::anchor, Anchor, _avs::attr, cl_cpos2struc(), cl_regex_match(), cl_struc2cpos, cl_struc2str, _avs::con, _avs::constraint, corpus, dup_reftab(), eval_bool(), False, _avs::field, get_reftab(), _avs::is_closing, KeywordField, cl::keywords, _avs::label, _avs::matchall, MatchAll, MatchEndField, MatchField, _avs::negated, Pattern, cl::query_corpus, cl::range, _label_entry::ref, _avs::right_boundary, _avs::rx, set_reftab(), strict_regions, _avs::tag, Tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate().
References calculate_initial_matchlist(), e_tree::cooc, cooc_meet, cooc_union, CurEnv, EvaluationIsRunning, free_matchlist(), init_matchlist(), e_tree::leaf, leaf, e_tree::left, e_tree::lw, meet_mu(), meet_union, e_tree::op_id, e_tree::patindex, e_tree::right, e_tree::rw, Setop(), e_tree::struc, e_tree::type, and Union.
Referenced by cqp_run_mu_query().
int free_environment | ( | int | thisenv | ) |
Frees an evaluation environment.
thisenv | The eval environment to free. |
References Anchor, ctxtsp::attrib, cl_delete_regex(), cl_free, delete_symbol_table(), ctxtsp::direction, eep, Environment, False, free_booltree(), free_dfa(), free_evaltree(), evalenv::gconstraint, evalenv::has_target_indicator, evalenv::labels, leftright, MatchAll, evalenv::MaxPatIndex, NoField, Pattern, evalenv::patternlist, evalenv::query_corpus, evalenv::search_context, ctxtsp::size, Tag, ctxtsp::type, cl::type, and word.
Referenced by free_environments().
void free_environments | ( | void | ) |
References eep, and free_environment().
Referenced by in_UnnamedCorpusCommand(), and prepare_input().
References CDA_OK, cderrno, collect_matches(), get_id_of_string(), initial_matchlist_debug, _Matchlist::matches_whole_corpus, silent, _Matchlist::start, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1().
int get_label_referenced_position | ( | LabelEntry | label, | |
RefTab | rt, | |||
int | corppos | |||
) |
References eval_debug, get_reftab(), _label_entry::name, and _label_entry::ref.
Referenced by eval_bool(), and get_leaf_value().
Boolean get_leaf_value | ( | Constrainttree | ctptr, | |
RefTab | rt, | |||
int | corppos, | |||
DynCallResult * | dcr, | |||
int | deliver_strings | |||
) |
References c_tree::args, ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, c_tree::attr, call_dynamic_attribute(), call_predefined_function(), CDA_EPOSORNG, CDA_OK, cderrno, _DCR::charres, CID, cl_malloc(), cqpmessage(), c_tree::ctype, c_tree::delete, c_tree::dynattr, Error, eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, c_tree::func, func, get_id_at_position(), get_label_referenced_position(), get_string_at_position(), get_struc_attribute(), int_leaf, _DCR::intres, c_tree::label, c_tree::leaf, _label_entry::name, _ActualParamList::next, c_tree::nr_args, c_tree::pa_ref, pa_ref, _ActualParamList::param, _DCR::parefres, c_tree::pat_type, c_tree::predef, _label_entry::ref, c_tree::sa_ref, sa_ref, set_reftab(), string_leaf, structure_value_at_position(), True, c_tree::type, _DCR::type, and _DCR::value.
Referenced by eval_bool().
int get_matched_corpus_positions | ( | Attribute * | attribute, | |
char * | regstr, | |||
int | canonicalize, | |||
Matchlist * | matchlist, | |||
int * | restrictor_list, | |||
int | restrictor_size | |||
) |
Get corpus positions matching a regular expression on a given attribute.
get_matched_corpus_positions looks in a corpus which is to be loaded for a regular expression 'regstr' of attribute 'attr' and returns the table of matching start indices (start_table) and the tablesize (tabsize).
attribute | The attribute to search on. May be NULL, in which case DEFAULT_ATT_NAME is used. | |
regstr | String containing the regular expression. | |
canonicalize | Flags to be passed to the CL regex engine. | |
matchlist | Location where the list of matches will be placed. | |
restrictor_list | ?? | |
restrictor_size | ?? |
References ATT_POS, cl_free, cl_malloc(), collect_matches(), collect_matching_ids(), DEFAULT_ATT_NAME, eval_debug, find_attribute(), get_attribute_size(), get_id_range(), initial_matchlist_debug, _Matchlist::is_inverted, _Matchlist::matches_whole_corpus, range, silent, cl::size, _Matchlist::start, STREQ, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1(), and matchfirstpattern().
static int intcompare | ( | const void * | i, | |
const void * | j | |||
) | [static] |
Referenced by eval_bool().
int mark_offrange_cells | ( | Matchlist * | matchlist, | |
CorpusList * | corpus | |||
) |
References _Matchlist::end, cl::mother_size, cl::range, cl::size, and _Matchlist::start.
Referenced by calculate_initial_matchlist(), calculate_initial_matchlist_1(), cqp_run_mu_query(), and cqp_run_tab_query().
Boolean matchfirstpattern | ( | AVS | pattern, | |
Matchlist * | matchlist, | |||
CorpusList * | corpus | |||
) |
References _avs::anchor, Anchor, _avs::attr, calculate_initial_matchlist(), cl_free, cl_malloc(), cl_max_struc(), cl_regex_match(), cl_struc2cpos, cl_struc2str, clear_all_bits(), _avs::con, _avs::constraint, cqpmessage(), create_bitfield(), destroy_bitfield(), _Matchlist::end, Error, EvaluationIsRunning, False, _avs::field, get_bit(), get_matched_corpus_positions(), _avs::is_closing, KeywordField, cl::keywords, MatchAll, MatchEndField, _Matchlist::matches_whole_corpus, MatchField, _avs::negated, Pattern, query_optimize, range, cl::range, red_factor(), RED_THRESHOLD, Reduce, _avs::rx, set_all_bits(), set_bit(), Setop(), silent, cl::size, _Matchlist::start, _Matchlist::tabsize, _avs::tag, Tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate_dfa().
References CDA_OK, cderrno, cl_free, cl_malloc(), cl_realloc(), _Matchlist::end, get_struc_attribute(), _Matchlist::matches_whole_corpus, MIN, _Matchlist::start, and _Matchlist::tabsize.
Referenced by eval_mu_tree().
int next_environment | ( | void | ) |
References ctxtsp::attrib, CurEnv, ctxtsp::direction, eep, Environment, evalenv::evaltree, evalenv::gconstraint, evalenv::has_target_indicator, init_dfa(), evalenv::labels, leftright, evalenv::match_label, evalenv::matchend_label, MAXENVIRONMENT, evalenv::MaxPatIndex, evalenv::negated, new_symbol_table(), evalenv::query_corpus, evalenv::search_context, ctxtsp::size, evalenv::target_label, ctxtsp::type, and word.
Referenced by ActivateCorpus(), after_CorpusSetExpr(), prepare_AlignmentConstraints(), and prepare_Query().
int nr_positions | ( | CorpusList * | cp | ) |
References cl::range, and cl::size.
Referenced by red_factor().
float red_factor | ( | CorpusList * | cp, | |
int * | nr_pos | |||
) |
References access_corpus(), ATT_POS, cl::corpus, DEFAULT_ATT_NAME, find_attribute(), cl::mother_size, nr_positions(), and cl::size.
Referenced by matchfirstpattern().
void set_corpus_matchlists | ( | CorpusList * | cp, | |
Matchlist * | matchlist, | |||
int | nr_lists, | |||
int | keep_old_ranges | |||
) |
References cl_free, cl_malloc(), _Matchlist::end, cl::keywords, cl::range, RangeSetop(), RReduce, cl::size, cl::sortidx, _Matchlist::start, _Matchlist::tabsize, _Matchlist::target_positions, and cl::targets.
Referenced by cqp_run_mu_query(), cqp_run_tab_query(), and simulate_dfa().
void show_environment | ( | int | thisenv | ) |
References eep, Environment, print_booltree(), print_evaltree(), show_compdfa, show_complete_dfa(), show_evaltree, show_gconstraints, show_patlist, and show_patternlist().
Referenced by debug_output().
void simulate | ( | Matchlist * | matchlist, | |
int * | cut, | |||
int | start_state, | |||
int | start_offset, | |||
int * | state_vector, | |||
int * | target_vector, | |||
RefTab * | reftab_vector, | |||
RefTab * | reftab_target_vector, | |||
int | start_transition | |||
) |
References _avs::anchor, Anchor, _avs::attr, calculate_rightboundary(), CheckForInterrupts(), _avs::con, cqpmessage(), debug_simulation, dup_reftab(), _Matchlist::end, Error, eval_bool(), eval_constraint(), EvaluationIsRunning, free_matchlist(), get_reftab(), get_struc_attribute(), _avs::is_closing, _avs::is_target, LAB_DEFINED, LAB_RDAT, LAB_USED, _avs::label, longest_match, _avs::lookahead, _avs::matchall, MatchAll, matching_strategy, MIN, Pattern, print_label_values(), progress_bar, progress_bar_percentage(), query_corpus, _label_entry::ref, reset_reftab(), _avs::right_boundary, set_reftab(), cl::size, _Matchlist::start, strict_regions, symbol_table_iterator(), symbol_table_new_iterator(), symtab_debug, _Matchlist::tabsize, _avs::tag, Tag, _Matchlist::target_positions, and _avs::type.
Referenced by check_alignment_constraints(), and simulate_dfa().
void simulate_dfa | ( | int | envidx, | |
int | cut, | |||
int | keep_old_ranges | |||
) |
References check_alignment_constraints(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), eep, _Matchlist::end, Environment, Error, EvaluationIsRunning, free_matchlist(), Info, init_matchlist(), initial_matchlist_debug, install_signal_handler(), matchfirstpattern(), new_reftab(), print_symbol_table(), progress_bar, progress_bar_clear_line(), progress_bar_message(), Reduce, reset_reftab(), set_corpus_matchlists(), Setop(), show_matchlist(), show_matchlist_firstelements(), simulate(), _Matchlist::start, _Matchlist::tabsize, _Matchlist::target_positions, True, Union, Warning, and which_app.
Referenced by cqp_run_query().