Frog
|
#include "frog/AlpinoParser.h"
#include <string>
#include <iostream>
#include <ticcutils/XMLtools.h>
#include "ticcutils/StringOps.h"
#include "ticcutils/PrettyPrint.h"
#include "ticcutils/SocketBasics.h"
#include "ticcutils/FileUtils.h"
#include "frog/Frog-util.h"
#include "frog/FrogData.h"
Macros | |
#define | LOG *TiCC::Log(errLog) |
#define | DBG *TiCC::Dbg(dbgLog) |
Functions | |
ostream & | operator<< (ostream &os, const dp_tree *node) |
void | print_nodes (int indent, const dp_tree *store) |
const dp_tree * | extract_hd (const dp_tree *node) |
dp_tree * | parse_node (xmlNode *node) |
dp_tree * | parse_nodes (xmlNode *node) |
dp_tree * | resolve_mwus (dp_tree *in, int &compensate, int &restart, frog_data &fd) |
dp_tree * | enumerate_top (dp_tree *in) |
dp_tree * | resolve_mwus (dp_tree *in, frog_data &fd) |
void | extract_dependencies (list< pair< const dp_tree *, const dp_tree * >> &result, const dp_tree *store, const dp_tree *top_root) |
vector< parsrel > | extract (list< pair< const dp_tree *, const dp_tree * >> &l) |
vector< parsrel > | extract_dp (xmlDoc *alp_doc, frog_data &fd) |
Variables | |
const string | alpino_tagset = "http://ilk.uvt.nl/folia/sets/alpino-parse-nl" |
const string | alpino_mwu_tagset = "http://ilk.uvt.nl/folia/sets/alpino-mwu-nl" |
#define DBG *TiCC::Dbg(dbgLog) |
#define LOG *TiCC::Log(errLog) |
Enumerate the TOP nodes in the dp_tree
in | the tree to enumerate |
This also changes the linking order of the children under in.
These are NOT always sorted from low to high. We make it so and return a pointer to the new top
convert a list of head-dependent pairs into a list of parsrel records
l | a list of head-dependent pairs |
This function does a lot of trickery to handle special nodes.
void extract_dependencies | ( | list< pair< const dp_tree *, const dp_tree * >> & | result, |
const dp_tree * | store, | ||
const dp_tree * | top_root | ||
) |
recursively extract all head-dependent pairs from store
result | an aggregated list of all dependency pairs found |
store | The tree to search through |
top_root | The ROOT node of this sequence. For the topmost store it is 0 |
extract a list of parsrel records from an Alpino XML file and resolve MWU's
alp_doc | an Alpino XML document |
fd | a frog_data structure to receive the MWU information |
ostream& operator<< | ( | ostream & | os, |
const dp_tree * | node | ||
) |
print out one dp_tree structure node
os | the output stream |
node | the node to print |
dp_tree* parse_node | ( | xmlNode * | node | ) |
dp_tree* parse_nodes | ( | xmlNode * | node | ) |
void print_nodes | ( | int | indent, |
const dp_tree * | store | ||
) |
recursively pretty print out a dp_tree tree to stderr
indent | indentation level |
store | the top node to print |
extract MWU information into frog_data.
in | the dp_tree to search through |
compensate | complicated |
restart | complicated |
fd | the frog_data structure to register the MWU in. |
this function searches the dp_tree for MWUS (rel='mwp'). It will merge the found MWU words in a modified dp_tree AND register the start and finish indexes of the mwu in fd
The indices of the MWU's in dp_tree are not always consecutively, so we have to check that using restart.
Also, after modifying the tree, the indices are wrong by the size of the MWU. so we need to compensate for that too.
const string alpino_mwu_tagset = "http://ilk.uvt.nl/folia/sets/alpino-mwu-nl" |
const string alpino_tagset = "http://ilk.uvt.nl/folia/sets/alpino-parse-nl" |