Frog
mwu_chunker_mod.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog:
8 
9  A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for
10  several languages
11 
12  frog is free software; you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation; either version 3 of the License, or
15  (at your option) any later version.
16 
17  frog is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program. If not, see <http://www.gnu.org/licenses/>.
24 
25  For questions and suggestions, see:
26  https://github.com/LanguageMachines/frog/issues
27  or send mail to:
28  lamasoftware (at ) science.ru.nl
29 
30 */
31 
32 #ifndef MWU_CHUNKER_H
33 #define MWU_CHUNKER_H
34 
35 #include <ostream>
36 #include <string>
37 #include "ticcutils/LogStream.h"
38 #include "ticcutils/Configuration.h"
39 #include "ticcutils/Unicode.h"
40 #include "libfolia/folia.h"
41 #include "frog/FrogData.h"
42 
43 class mwuAna {
44  friend std::ostream& operator<< (std::ostream&, const mwuAna& );
45  public:
46  mwuAna( const std::string&, bool, size_t );
47  virtual ~mwuAna() {};
48 
49  void merge( const mwuAna * );
50 
51  std::string getWord() const {
52  return word;
53  }
54 
55  bool isSpec(){ return spec; };
56 
57  size_t mwu_start;
58  size_t mwu_end;
59 
60  protected:
61  mwuAna(){};
62  std::string word;
63  bool spec;
64 };
65 
66 #define mymap2 std::multimap<std::string, std::vector<std::string> >
67 
68 class Mwu {
69  friend std::ostream& operator<< (std::ostream&, const Mwu& );
70  public:
71  explicit Mwu( TiCC::LogStream*, TiCC::LogStream* );
72  ~Mwu();
73  void reset();
74  bool init( const TiCC::Configuration& );
75  void add_provenance( folia::Document&, folia::processor * ) const;
76  void Classify( frog_data& );
77  void add( frog_record& );
78  void add_result( const frog_data&,
79  const std::vector<folia::Word*>& ) const;
81  std::string getTagset() const { return mwu_tagset; };
82  std::string version() const { return _version; };
83  private:
84  bool readsettings( const std::string&, const std::string&);
85  bool read_mwus( const std::string& );
86  void Classify();
87  int debug;
88  std::string mwuFileName;
89  std::vector<mwuAna*> mWords;
90  mymap2 MWUs;
91  TiCC::LogStream *errLog;
92  TiCC::LogStream *dbgLog;
93  std::string _version;
94  std::string textclass;
95  std::string mwu_tagset;
96  std::string glue_tag;
97  TiCC::UniFilter *filter;
98 };
99 
100 #endif
Mwu::operator<<
friend std::ostream & operator<<(std::ostream &, const Mwu &)
Definition: mwu_chunker_mod.cxx:217
mwuAna::spec
bool spec
Definition: mwu_chunker_mod.h:63
Mwu::Classify
void Classify(frog_data &)
Definition: mwu_chunker_mod.cxx:249
mwuAna::~mwuAna
virtual ~mwuAna()
Definition: mwu_chunker_mod.h:47
frog_data
a datastructure to hold all frogged information of one Sentence
Definition: FrogData.h:76
mwuAna::mwu_end
size_t mwu_end
Definition: mwu_chunker_mod.h:58
mwuAna::word
std::string word
Definition: mwu_chunker_mod.h:61
mwuAna::isSpec
bool isSpec()
Definition: mwu_chunker_mod.h:55
Mwu::init
bool init(const TiCC::Configuration &)
Definition: mwu_chunker_mod.cxx:149
mwuAna::getWord
std::string getWord() const
Definition: mwu_chunker_mod.h:51
mymap2
#define mymap2
Definition: mwu_chunker_mod.h:66
frog_record
a simple datastructure to hold all frogged information of one word
Definition: FrogData.h:47
Mwu::getTagset
std::string getTagset() const
return the value for mwu_tagset. (set via Configuration)
Definition: mwu_chunker_mod.h:81
Mwu::add_result
void add_result(const frog_data &, const std::vector< folia::Word * > &) const
Definition: mwu_chunker_mod.cxx:385
Mwu
Definition: mwu_chunker_mod.h:68
mwuAna
Definition: mwu_chunker_mod.h:43
Mwu::add_provenance
void add_provenance(folia::Document &, folia::processor *) const
Definition: mwu_chunker_mod.cxx:227
Mwu::add
void add(frog_record &)
Definition: mwu_chunker_mod.cxx:107
mwuAna::mwuAna
mwuAna()
Definition: mwu_chunker_mod.h:61
Mwu::~Mwu
~Mwu()
Definition: mwu_chunker_mod.cxx:91
Mwu::version
std::string version() const
Definition: mwu_chunker_mod.h:82
mwuAna::operator<<
friend std::ostream & operator<<(std::ostream &, const mwuAna &)
Mwu::reset
void reset()
Definition: mwu_chunker_mod.cxx:99
mwuAna::mwu_start
size_t mwu_start
Definition: mwu_chunker_mod.h:55
FrogData.h
mwuAna::merge
void merge(const mwuAna *)
Definition: mwu_chunker_mod.cxx:66
Mwu::Mwu
Mwu(TiCC::LogStream *, TiCC::LogStream *)
Definition: mwu_chunker_mod.cxx:78