#include <tagger_base.h>
◆ BaseTagger() [1/2]
BaseTagger::BaseTagger |
( |
TiCC::LogStream * |
errlog, |
|
|
TiCC::LogStream * |
dbglog, |
|
|
const std::string & |
label |
|
) |
| |
|
explicit |
◆ ~BaseTagger()
BaseTagger::~BaseTagger |
( |
| ) |
|
|
virtual |
◆ BaseTagger() [2/2]
◆ add_declaration()
virtual void BaseTagger::add_declaration |
( |
folia::Document & |
, |
|
|
folia::processor * |
|
|
) |
| const |
|
pure virtual |
◆ add_provenance()
void BaseTagger::add_provenance |
( |
folia::Document & |
doc, |
|
|
folia::processor * |
main |
|
) |
| const |
add provenance information for this tagger. (FoLiA output only)
- Parameters
-
doc | the FoLiA document to add to |
main | the processor to use (presumably the Frog processor) |
◆ call_server()
vector< TagResult > BaseTagger::call_server |
( |
const std::vector< tag_entry > & |
tv | ) |
const |
|
protected |
Connect to a MBT server, send and receive JSON and translate to a TagResult list
- Parameters
-
- Returns
- a vector of TagResult elements
We set up a connection to the configured server, send a query in JSON and on succesful receiving back a JSON result we convert it back into a TagResult vector
- Note
- So this is a one-shot operation. No connection to the MBT server is kept open.
◆ Classify()
void BaseTagger::Classify |
( |
frog_data & |
sent | ) |
|
|
virtual |
Tag one sentence, give in frog_data format
- Parameters
-
When tagging succeeds, 'sent' will be extended with the tag results
Reimplemented in NERTagger, and IOBTagger.
◆ extract_words_tags()
void BaseTagger::extract_words_tags |
( |
const std::vector< folia::Word * > & |
swords, |
|
|
const std::string & |
tagset, |
|
|
std::vector< std::string > & |
words, |
|
|
std::vector< std::string > & |
ptags |
|
) |
| |
|
protected |
extract word and POS-tag information from a list of folia::Word
◆ fill_map()
bool BaseTagger::fill_map |
( |
const std::string & |
file, |
|
|
std::map< std::string, std::string > & |
mp |
|
) |
| |
fill a map op string-string vales from a fie
- Parameters
-
file | the filenam |
mp | the map to fill |
- Returns
- true on succes, false otherwise
the file should contain lines with TAB separated attribute/value pairs
lines starting with '#' are seen as comment
◆ getTagset()
std::string BaseTagger::getTagset |
( |
| ) |
const |
|
inline |
◆ init()
bool BaseTagger::init |
( |
const TiCC::Configuration & |
config | ) |
|
|
virtual |
initalize a tagger from 'config'
- Parameters
-
config | the TiCC::Configuration |
- Returns
- true on succes, false otherwise
Reimplemented in NERTagger, CGNTagger, and IOBTagger.
◆ post_process()
virtual void BaseTagger::post_process |
( |
frog_data & |
| ) |
|
|
pure virtual |
◆ set_eos_mark()
string BaseTagger::set_eos_mark |
( |
const std::string & |
eos | ) |
|
set the EOS marker for the tagger
- Parameters
-
eos | the eos marker as a string |
- Returns
- the old value
◆ tag_entries()
vector< TagResult > BaseTagger::tag_entries |
( |
const std::vector< tag_entry > & |
to_do | ) |
|
tag a vector of teag_entry into a vector of TagResult elements
- Parameters
-
to_do | a vector of tag_entry elements representing 1 sentence |
◆ tagLine()
vector< TagResult > BaseTagger::tagLine |
( |
const std::string & |
line | ) |
|
tag a string into a vector of TagResult elements
- Parameters
-
line | a (UTF8 encoded) string, may be multilined and include Enrichments |
- Returns
- a vector of TagResult
Depending on the configurarion, the input is send to the local MBT tagger or the associated MBT server.
◆ version()
std::string BaseTagger::version |
( |
| ) |
const |
|
inline |
◆ _host
std::string BaseTagger::_host |
|
protected |
◆ _label
std::string BaseTagger::_label |
|
protected |
◆ _port
std::string BaseTagger::_port |
|
protected |
◆ _tag_result
std::vector<Tagger::TagResult> BaseTagger::_tag_result |
|
protected |
◆ _version
std::string BaseTagger::_version |
|
protected |
◆ _words
std::vector<std::string> BaseTagger::_words |
|
protected |
◆ base
std::string BaseTagger::base |
|
protected |
◆ dbg_log
TiCC::LogStream* BaseTagger::dbg_log |
|
protected |
◆ debug
◆ err_log
TiCC::LogStream* BaseTagger::err_log |
|
protected |
◆ filter
TiCC::UniFilter* BaseTagger::filter |
|
protected |
◆ tagger
MbtAPI* BaseTagger::tagger |
|
protected |
◆ tagset
std::string BaseTagger::tagset |
|
protected |
◆ textclass
std::string BaseTagger::textclass |
|
protected |
◆ token_tag_map
std::map<std::string,std::string> BaseTagger::token_tag_map |
|
protected |
The documentation for this class was generated from the following files: