Frog
mblem_mod.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog:
8 
9  A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for
10  several languages
11 
12  frog is free software; you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation; either version 3 of the License, or
15  (at your option) any later version.
16 
17  frog is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program. If not, see <http://www.gnu.org/licenses/>.
24 
25  For questions and suggestions, see:
26  https://github.com/LanguageMachines/frog/issues
27  or send mail to:
28  lamasoftware (at ) science.ru.nl
29 
30 */
31 
32 #ifndef MBLEM_MOD_H
33 #define MBLEM_MOD_H
34 
35 #include "libfolia/folia.h"
36 #include "ticcutils/LogStream.h"
37 #include "ticcutils/Configuration.h"
38 #include "ticcutils/Unicode.h"
39 #include "timbl/TimblAPI.h"
40 #include "frog/FrogData.h"
41 
42 class mblemData {
43  public:
44  mblemData( const std::string& l, const std::string& t ):
45  lemma( l ),
46  tag( t ) { };
47  std::string getLemma() const { return lemma; };
48  std::string getTag() const { return tag; };
49  private:
50  std::string lemma;
51  std::string tag;
52 };
53 
54 class Mblem {
55  public:
56  explicit Mblem( TiCC::LogStream *, TiCC::LogStream * =0 );
57  ~Mblem();
58  bool init( const TiCC::Configuration& );
59  void add_provenance( folia::Document&, folia::processor * ) const;
60  void Classify( frog_record& );
61  void Classify( const icu::UnicodeString& );
62  std::vector<std::pair<std::string,std::string> > getResult() const;
63  std::string getTagset() const { return tagset; };
64  std::string version() const { return _version; };
65  void filterTag( const std::string& );
66  void makeUnique();
67  void add_lemmas( const std::vector<folia::Word*>&,
68  const frog_data& ) const;
69  private:
70  std::string call_server( const std::string& );
71  void read_transtable( const std::string& );
72  void create_MBlem_defaults();
73  bool readsettings( const std::string& dir, const std::string& fname );
74  bool fill_ts_map( const std::string& );
75  bool fill_eq_set( const std::string& );
76  std::string make_instance( const icu::UnicodeString& in );
77  Timbl::TimblAPI *myLex;
78  std::string punctuation;
79  size_t history;
80  int debug;
81  bool keep_case;
82  std::map<std::string,std::string> classMap;
83  std::map<std::string, std::map<std::string, int>> token_strip_map;
84  std::set<std::string> one_one_tags;
85  std::vector<mblemData> mblemResult;
86  std::string _version;
87  std::string tagset;
88  std::string POS_tagset;
89  std::string textclass;
90  std::string _host;
91  std::string _port;
92  std::string _base;
93  TiCC::LogStream *errLog;
94  TiCC::LogStream *dbgLog;
95  TiCC::UniFilter *filter;
96 };
97 
98 #endif
Mblem::Classify
void Classify(frog_record &)
Definition: mblem_mod.cxx:383
Mblem::filterTag
void filterTag(const std::string &)
Definition: mblem_mod.cxx:293
mblemData::mblemData
mblemData(const std::string &l, const std::string &t)
Definition: mblem_mod.h:44
Mblem::~Mblem
~Mblem()
Definition: mblem_mod.cxx:253
mblemData::getLemma
std::string getLemma() const
Definition: mblem_mod.h:47
mblemData
Definition: mblem_mod.h:42
frog_data
a datastructure to hold all frogged information of one Sentence
Definition: FrogData.h:76
Mblem::Mblem
Mblem(TiCC::LogStream *, TiCC::LogStream *=0)
create a Timbl based lemmatizer
Definition: mblem_mod.cxx:55
Mblem::getResult
std::vector< std::pair< std::string, std::string > > getResult() const
Definition: mblem_mod.cxx:696
Mblem::version
std::string version() const
Definition: mblem_mod.h:64
Mblem::getTagset
std::string getTagset() const
Definition: mblem_mod.h:63
frog_record
a simple datastructure to hold all frogged information of one word
Definition: FrogData.h:47
Mblem::init
bool init(const TiCC::Configuration &)
Definition: mblem_mod.cxx:115
Mblem::add_lemmas
void add_lemmas(const std::vector< folia::Word * > &, const frog_data &) const
Definition: mblem_mod.cxx:706
FrogData.h
Mblem
Definition: mblem_mod.h:54
Mblem::add_provenance
void add_provenance(folia::Document &, folia::processor *) const
Definition: mblem_mod.cxx:361
Mblem::makeUnique
void makeUnique()
Definition: mblem_mod.cxx:324
mblemData::getTag
std::string getTag() const
Definition: mblem_mod.h:48