Frog
Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
BaseTagger Class Referenceabstract

#include <tagger_base.h>

Inheritance diagram for BaseTagger:
Inheritance graph
[legend]
Collaboration diagram for BaseTagger:
Collaboration graph
[legend]

Public Member Functions

 BaseTagger (TiCC::LogStream *, TiCC::LogStream *, const std::string &)
 
virtual ~BaseTagger ()
 
virtual bool init (const TiCC::Configuration &)
 
virtual void post_process (frog_data &)=0
 
virtual void Classify (frog_data &)
 
virtual void add_declaration (folia::Document &, folia::processor *) const =0
 
void add_provenance (folia::Document &, folia::processor *) const
 
std::string getTagset () const
 
std::string set_eos_mark (const std::string &)
 
bool fill_map (const std::string &, std::map< std::string, std::string > &)
 
std::vector< Tagger::TagResult > tagLine (const std::string &)
 
std::vector< Tagger::TagResult > tag_entries (const std::vector< tag_entry > &)
 
std::string version () const
 

Protected Member Functions

void extract_words_tags (const std::vector< folia::Word * > &, const std::string &, std::vector< std::string > &, std::vector< std::string > &)
 
std::vector< Tagger::TagResult > call_server (const std::vector< tag_entry > &) const
 
 BaseTagger (const BaseTagger &)
 

Protected Attributes

int debug
 
std::string _label
 
std::string tagset
 
std::string _version
 
std::string textclass
 
TiCC::LogStream * err_log
 
TiCC::LogStream * dbg_log
 
std::string base
 
std::string _host
 
std::string _port
 
MbtAPI * tagger
 
TiCC::UniFilter * filter
 
std::vector< std::string > _words
 
std::vector< Tagger::TagResult > _tag_result
 
std::map< std::string, std::string > token_tag_map
 

Constructor & Destructor Documentation

◆ BaseTagger() [1/2]

BaseTagger::BaseTagger ( TiCC::LogStream *  errlog,
TiCC::LogStream *  dbglog,
const std::string &  label 
)
explicit

◆ ~BaseTagger()

BaseTagger::~BaseTagger ( )
virtual

◆ BaseTagger() [2/2]

BaseTagger::BaseTagger ( const BaseTagger )
inlineprotected

Member Function Documentation

◆ add_declaration()

virtual void BaseTagger::add_declaration ( folia::Document &  ,
folia::processor *   
) const
pure virtual

Implemented in NERTagger, CGNTagger, and IOBTagger.

◆ add_provenance()

void BaseTagger::add_provenance ( folia::Document &  doc,
folia::processor *  main 
) const

add provenance information for this tagger. (FoLiA output only)

Parameters
docthe FoLiA document to add to
mainthe processor to use (presumably the Frog processor)

◆ call_server()

vector< TagResult > BaseTagger::call_server ( const std::vector< tag_entry > &  tv) const
protected

Connect to a MBT server, send and receive JSON and translate to a TagResult list

Parameters
tvthe tag_entry we would like to be seviced
Returns
a vector of TagResult elements

We set up a connection to the configured server, send a query in JSON and on succesful receiving back a JSON result we convert it back into a TagResult vector

Note
So this is a one-shot operation. No connection to the MBT server is kept open.

◆ Classify()

void BaseTagger::Classify ( frog_data sent)
virtual

Tag one sentence, give in frog_data format

Parameters
sentthe frog_data structure to analyze

When tagging succeeds, 'sent' will be extended with the tag results

Reimplemented in NERTagger, and IOBTagger.

◆ extract_words_tags()

void BaseTagger::extract_words_tags ( const std::vector< folia::Word * > &  swords,
const std::string &  tagset,
std::vector< std::string > &  words,
std::vector< std::string > &  ptags 
)
protected

extract word and POS-tag information from a list of folia::Word

◆ fill_map()

bool BaseTagger::fill_map ( const std::string &  file,
std::map< std::string, std::string > &  mp 
)

fill a map op string-string vales from a fie

Parameters
filethe filenam
mpthe map to fill
Returns
true on succes, false otherwise

the file should contain lines with TAB separated attribute/value pairs

lines starting with '#' are seen as comment

◆ getTagset()

std::string BaseTagger::getTagset ( ) const
inline

◆ init()

bool BaseTagger::init ( const TiCC::Configuration &  config)
virtual

initalize a tagger from 'config'

Parameters
configthe TiCC::Configuration
Returns
true on succes, false otherwise

Reimplemented in NERTagger, CGNTagger, and IOBTagger.

◆ post_process()

virtual void BaseTagger::post_process ( frog_data )
pure virtual

Implemented in NERTagger, CGNTagger, and IOBTagger.

◆ set_eos_mark()

string BaseTagger::set_eos_mark ( const std::string &  eos)

set the EOS marker for the tagger

Parameters
eosthe eos marker as a string
Returns
the old value

◆ tag_entries()

vector< TagResult > BaseTagger::tag_entries ( const std::vector< tag_entry > &  to_do)

tag a vector of teag_entry into a vector of TagResult elements

Parameters
to_doa vector of tag_entry elements representing 1 sentence

◆ tagLine()

vector< TagResult > BaseTagger::tagLine ( const std::string &  line)

tag a string into a vector of TagResult elements

Parameters
linea (UTF8 encoded) string, may be multilined and include Enrichments
Returns
a vector of TagResult

Depending on the configurarion, the input is send to the local MBT tagger or the associated MBT server.

◆ version()

std::string BaseTagger::version ( ) const
inline

Member Data Documentation

◆ _host

std::string BaseTagger::_host
protected

◆ _label

std::string BaseTagger::_label
protected

◆ _port

std::string BaseTagger::_port
protected

◆ _tag_result

std::vector<Tagger::TagResult> BaseTagger::_tag_result
protected

◆ _version

std::string BaseTagger::_version
protected

◆ _words

std::vector<std::string> BaseTagger::_words
protected

◆ base

std::string BaseTagger::base
protected

◆ dbg_log

TiCC::LogStream* BaseTagger::dbg_log
protected

◆ debug

int BaseTagger::debug
protected

◆ err_log

TiCC::LogStream* BaseTagger::err_log
protected

◆ filter

TiCC::UniFilter* BaseTagger::filter
protected

◆ tagger

MbtAPI* BaseTagger::tagger
protected

◆ tagset

std::string BaseTagger::tagset
protected

◆ textclass

std::string BaseTagger::textclass
protected

◆ token_tag_map

std::map<std::string,std::string> BaseTagger::token_tag_map
protected

The documentation for this class was generated from the following files: