Frog
Macros | Functions | Variables
AlpinoParser.cxx File Reference
#include "frog/AlpinoParser.h"
#include <string>
#include <iostream>
#include <ticcutils/XMLtools.h>
#include "ticcutils/StringOps.h"
#include "ticcutils/PrettyPrint.h"
#include "ticcutils/SocketBasics.h"
#include "ticcutils/FileUtils.h"
#include "frog/Frog-util.h"
#include "frog/FrogData.h"
Include dependency graph for AlpinoParser.cxx:

Macros

#define LOG   *TiCC::Log(errLog)
 
#define DBG   *TiCC::Dbg(dbgLog)
 

Functions

ostream & operator<< (ostream &os, const dp_tree *node)
 
void print_nodes (int indent, const dp_tree *store)
 
const dp_treeextract_hd (const dp_tree *node)
 
dp_treeparse_node (xmlNode *node)
 
dp_treeparse_nodes (xmlNode *node)
 
dp_treeresolve_mwus (dp_tree *in, int &compensate, int &restart, frog_data &fd)
 
dp_treeenumerate_top (dp_tree *in)
 
dp_treeresolve_mwus (dp_tree *in, frog_data &fd)
 
void extract_dependencies (list< pair< const dp_tree *, const dp_tree * >> &result, const dp_tree *store, const dp_tree *top_root)
 
vector< parsrelextract (list< pair< const dp_tree *, const dp_tree * >> &l)
 
vector< parsrelextract_dp (xmlDoc *alp_doc, frog_data &fd)
 

Variables

const string alpino_tagset = "http://ilk.uvt.nl/folia/sets/alpino-parse-nl"
 
const string alpino_mwu_tagset = "http://ilk.uvt.nl/folia/sets/alpino-mwu-nl"
 

Macro Definition Documentation

◆ DBG

#define DBG   *TiCC::Dbg(dbgLog)

◆ LOG

#define LOG   *TiCC::Log(errLog)

Function Documentation

◆ enumerate_top()

dp_tree* enumerate_top ( dp_tree in)

Enumerate the TOP nodes in the dp_tree

Parameters
inthe tree to enumerate
Returns
a pointer to the found top node

This also changes the linking order of the children under in.

These are NOT always sorted from low to high. We make it so and return a pointer to the new top

◆ extract()

vector<parsrel> extract ( list< pair< const dp_tree *, const dp_tree * >> &  l)

convert a list of head-dependent pairs into a list of parsrel records

Parameters
la list of head-dependent pairs
Returns
a list of parsrel records. This to be able to use the same code as for the build-in non-alpino parser.

This function does a lot of trickery to handle special nodes.

◆ extract_dependencies()

void extract_dependencies ( list< pair< const dp_tree *, const dp_tree * >> &  result,
const dp_tree store,
const dp_tree top_root 
)

recursively extract all head-dependent pairs from store

Parameters
resultan aggregated list of all dependency pairs found
storeThe tree to search through
top_rootThe ROOT node of this sequence. For the topmost store it is 0

◆ extract_dp()

vector<parsrel> extract_dp ( xmlDoc *  alp_doc,
frog_data fd 
)

extract a list of parsrel records from an Alpino XML file and resolve MWU's

Parameters
alp_docan Alpino XML document
fda frog_data structure to receive the MWU information
Returns
a list of parsrel records

◆ extract_hd()

const dp_tree* extract_hd ( const dp_tree node)

search a dp_tree for a head node

Parameters
nodethe top node to start searching
Returns
the dp_tree structure holding a head

a head is defined as a node with a rel value of 'hd', 'crd' ot 'cmp'

◆ operator<<()

ostream& operator<< ( ostream &  os,
const dp_tree node 
)

print out one dp_tree structure node

Parameters
osthe output stream
nodethe node to print

◆ parse_node()

dp_tree* parse_node ( xmlNode *  node)

convert a singel node in an Alpino XML tree into a much simpler dp_tree node

Parameters
nodeThe Alpino XML node to parse
Returns
a dp_tree structure with the essential information from Alpino

◆ parse_nodes()

dp_tree* parse_nodes ( xmlNode *  node)

recurively convert an Alpino XML tree into a much simpler dp_tree tree

Parameters
nodeThe Alpino XML tree to parse
Returns
A dp_tree node tree

an Alpino XML tree is quite complex. We try to simplify it and extract only what is needed.

◆ print_nodes()

void print_nodes ( int  indent,
const dp_tree store 
)

recursively pretty print out a dp_tree tree to stderr

Parameters
indentindentation level
storethe top node to print

◆ resolve_mwus() [1/2]

dp_tree* resolve_mwus ( dp_tree in,
frog_data fd 
)

search for MWU's in the dp_tree and register them in the frog_data

Parameters
inThe dp_tree to analyze
fdthe frog_data to fill
Returns
pointer to the (modified) input.

◆ resolve_mwus() [2/2]

dp_tree* resolve_mwus ( dp_tree in,
int &  compensate,
int &  restart,
frog_data fd 
)

extract MWU information into frog_data.

Parameters
inthe dp_tree to search through
compensatecomplicated
restartcomplicated
fdthe frog_data structure to register the MWU in.
Returns
the modified dp_tree

this function searches the dp_tree for MWUS (rel='mwp'). It will merge the found MWU words in a modified dp_tree AND register the start and finish indexes of the mwu in fd

The indices of the MWU's in dp_tree are not always consecutively, so we have to check that using restart.

Also, after modifying the tree, the indices are wrong by the size of the MWU. so we need to compensate for that too.

Variable Documentation

◆ alpino_mwu_tagset

const string alpino_mwu_tagset = "http://ilk.uvt.nl/folia/sets/alpino-mwu-nl"

◆ alpino_tagset

const string alpino_tagset = "http://ilk.uvt.nl/folia/sets/alpino-parse-nl"