Frog
tagger_base.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog:
8 
9  A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for
10  several languages
11 
12  frog is free software; you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation; either version 3 of the License, or
15  (at your option) any later version.
16 
17  frog is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program. If not, see <http://www.gnu.org/licenses/>.
24 
25  For questions and suggestions, see:
26  https://github.com/LanguageMachines/frog/issues
27  or send mail to:
28  lamasoftware (at ) science.ru.nl
29 
30 */
31 
32 #ifndef TAGGER_BASE_H
33 #define TAGGER_BASE_H
34 
35 #include <vector>
36 #include "ticcutils/LogStream.h"
37 #include "ticcutils/Configuration.h"
38 #include "ticcutils/Unicode.h"
39 #include "mbt/MbtAPI.h"
40 #include "libfolia/folia.h"
41 #include "ucto/tokenize.h"
42 #include "frog/FrogData.h"
43 
44 class tag_entry {
45 public:
46  std::string word;
47  std::string enrichment;
48 };
49 
50 class BaseTagger {
51  public:
52  explicit BaseTagger( TiCC::LogStream *,
53  TiCC::LogStream *,
54  const std::string& );
55  virtual ~BaseTagger();
56  virtual bool init( const TiCC::Configuration& );
57  virtual void post_process( frog_data& ) = 0;
58  virtual void Classify( frog_data& );
59  virtual void add_declaration( folia::Document&, folia::processor * ) const = 0;
60  void add_provenance( folia::Document&, folia::processor * ) const;
61  std::string getTagset() const { return tagset; };
62  std::string set_eos_mark( const std::string& );
63  bool fill_map( const std::string&, std::map<std::string,std::string>& );
64  std::vector<Tagger::TagResult> tagLine( const std::string& );
65  std::vector<Tagger::TagResult> tag_entries( const std::vector<tag_entry>& );
66  std::string version() const { return _version; };
67  private:
68  std::vector<tag_entry> extract_sentence( const frog_data& );
69  protected:
70  void extract_words_tags( const std::vector<folia::Word *>&,
71  const std::string&,
72  std::vector<std::string>&,
73  std::vector<std::string>& );
74  std::vector<Tagger::TagResult> call_server( const std::vector<tag_entry>& ) const;
75  int debug;
76  std::string _label;
77  std::string tagset;
78  std::string _version;
79  std::string textclass;
80  TiCC::LogStream *err_log;
81  TiCC::LogStream *dbg_log;
82  std::string base;
83  std::string _host;
84  std::string _port;
85  MbtAPI *tagger;
86  TiCC::UniFilter *filter;
87  std::vector<std::string> _words;
88  std::vector<Tagger::TagResult> _tag_result;
89  std::map<std::string,std::string> token_tag_map;
90  BaseTagger( const BaseTagger& ){} // inhibit copies
91 };
92 
93 #endif // TAGGER_BASE_H
BaseTagger::textclass
std::string textclass
Definition: tagger_base.h:79
BaseTagger::dbg_log
TiCC::LogStream * dbg_log
Definition: tagger_base.h:81
BaseTagger::base
std::string base
Definition: tagger_base.h:82
frog_data
a datastructure to hold all frogged information of one Sentence
Definition: FrogData.h:76
BaseTagger::add_provenance
void add_provenance(folia::Document &, folia::processor *) const
Definition: tagger_base.cxx:249
BaseTagger::token_tag_map
std::map< std::string, std::string > token_tag_map
Definition: tagger_base.h:89
BaseTagger::version
std::string version() const
Definition: tagger_base.h:66
BaseTagger::filter
TiCC::UniFilter * filter
Definition: tagger_base.h:86
BaseTagger::_version
std::string _version
Definition: tagger_base.h:78
BaseTagger
Definition: tagger_base.h:50
BaseTagger::post_process
virtual void post_process(frog_data &)=0
BaseTagger::getTagset
std::string getTagset() const
Definition: tagger_base.h:61
BaseTagger::_port
std::string _port
Definition: tagger_base.h:84
BaseTagger::~BaseTagger
virtual ~BaseTagger()
Definition: tagger_base.cxx:66
BaseTagger::tagLine
std::vector< Tagger::TagResult > tagLine(const std::string &)
Definition: tagger_base.cxx:407
BaseTagger::tagger
MbtAPI * tagger
Definition: tagger_base.h:85
tag_entry::word
std::string word
Definition: tagger_base.h:46
BaseTagger::set_eos_mark
std::string set_eos_mark(const std::string &)
Definition: tagger_base.cxx:477
BaseTagger::add_declaration
virtual void add_declaration(folia::Document &, folia::processor *) const =0
BaseTagger::tag_entries
std::vector< Tagger::TagResult > tag_entries(const std::vector< tag_entry > &)
Definition: tagger_base.cxx:442
BaseTagger::err_log
TiCC::LogStream * err_log
Definition: tagger_base.h:80
tag_entry::enrichment
std::string enrichment
Definition: tagger_base.h:47
BaseTagger::_words
std::vector< std::string > _words
Definition: tagger_base.h:87
BaseTagger::tagset
std::string tagset
Definition: tagger_base.h:77
BaseTagger::_host
std::string _host
Definition: tagger_base.h:83
BaseTagger::BaseTagger
BaseTagger(TiCC::LogStream *, TiCC::LogStream *, const std::string &)
Definition: tagger_base.cxx:50
BaseTagger::debug
int debug
Definition: tagger_base.h:75
BaseTagger::fill_map
bool fill_map(const std::string &, std::map< std::string, std::string > &)
Definition: tagger_base.cxx:75
BaseTagger::BaseTagger
BaseTagger(const BaseTagger &)
Definition: tagger_base.h:90
BaseTagger::init
virtual bool init(const TiCC::Configuration &)
Definition: tagger_base.cxx:108
FrogData.h
tag_entry
Definition: tagger_base.h:44
BaseTagger::_tag_result
std::vector< Tagger::TagResult > _tag_result
Definition: tagger_base.h:88
BaseTagger::Classify
virtual void Classify(frog_data &)
Definition: tagger_base.cxx:561
BaseTagger::extract_words_tags
void extract_words_tags(const std::vector< folia::Word * > &, const std::string &, std::vector< std::string > &, std::vector< std::string > &)
Definition: tagger_base.cxx:494
BaseTagger::_label
std::string _label
Definition: tagger_base.h:76
BaseTagger::call_server
std::vector< Tagger::TagResult > call_server(const std::vector< tag_entry > &) const
Definition: tagger_base.cxx:323