Frog
mbma_brackets.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog:
8 
9  A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for
10  several languages
11 
12  frog is free software; you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation; either version 3 of the License, or
15  (at your option) any later version.
16 
17  frog is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program. If not, see <http://www.gnu.org/licenses/>.
24 
25  For questions and suggestions, see:
26  https://github.com/LanguageMachines/frog/issues
27  or send mail to:
28  lamasoftware (at ) science.ru.nl
29 
30 */
31 
32 #ifndef MBMA_BRACKETS_H
33 #define MBMA_BRACKETS_H
34 
35 #include <vector>
36 #include <list>
37 #include "unicode/unistr.h"
38 #include "ticcutils/LogStream.h"
39 #include "frog/clex.h"
40 
42 enum Status {
43  INFO,
46  STEM,
51 };
52 
55 namespace Compound {
56  enum Type : int {
57  // NB and PB compounds don't exist
59  NN, NA, NB, NP, NV,
60  AN, AA, AB, AP, AV,
61  BN, BA, BB, BP, BV,
62  PN, PA, PB, PP, PV,
63  VN, VA, VB, VP, VV,
64  NNN, NVN };
65 }
66 
67 namespace folia {
68  class Document;
69  class Morpheme;
70 }
71 
72 class RulePart;
73 
74 class BaseBracket {
75  public:
76  BaseBracket( CLEX::Type t, const std::vector<CLEX::Type>& R, int flag,
77  TiCC::LogStream& l ):
78  RightHand(R),
79  cls(t),
80  _status( FAILED ),
81  debugFlag(flag),
82  myLog(l)
83  {};
84  BaseBracket( CLEX::Type t, int flag, TiCC::LogStream& l ):
85  cls(t),
86  _status( FAILED ),
87  debugFlag(flag),
88  myLog(l)
89  {};
90  virtual ~BaseBracket() {};
91  virtual BaseBracket *clone() const = 0;
92  Status status() const { return _status; };
93  void set_status( const Status s ) { _status = s; };
94  virtual icu::UnicodeString morpheme() const { return "";};
95  virtual std::string inflection() const { return ""; };
96  virtual std::string original() const { return ""; };
97  virtual int infixpos() const { return -1; };
98  virtual bool isglue() const { return false; };
99  virtual icu::UnicodeString put( bool = true ) const;
100  virtual icu::UnicodeString pretty_put() const;
101  virtual BaseBracket *append( BaseBracket * ){ abort(); };
102  virtual bool isNested() { return false; };
103  virtual void resolveGlue(){ abort(); };
104  virtual void resolveLead(){ abort(); };
105  virtual void resolveTail(){ abort(); };
106  virtual void resolveMiddle(){ abort(); };
107  virtual void clearEmptyNodes() { abort(); };
108  virtual folia::Morpheme *createMorpheme( folia::Document * ) const = 0;
109  virtual folia::Morpheme *createMorpheme( folia::Document *,
110  std::string&, int& ) const = 0;
111  virtual Compound::Type compound() const { return Compound::Type::NONE; };
112  virtual Compound::Type getCompoundType() { return compound(); };
113  CLEX::Type tag() const { return cls; };
114  void setTag( CLEX::Type t ) { cls = t; };
115  std::vector<CLEX::Type> RightHand;
116  protected:
120  TiCC::LogStream& myLog;
121 };
122 
123 class BracketLeaf: public BaseBracket {
124 public:
125  BracketLeaf( const RulePart&, int, TiCC::LogStream& );
126  BracketLeaf( CLEX::Type, const icu::UnicodeString&, int, TiCC::LogStream& );
127  BracketLeaf *clone() const;
128  icu::UnicodeString put( bool = true ) const;
129  icu::UnicodeString pretty_put() const;
130  icu::UnicodeString morpheme() const {
132  return morph;
133  };
134  std::string inflection() const {
136  return inflect;
137  };
138  std::string original() const {
140  return orig;
141  };
142  int infixpos() const {
144  return ifpos;
145  };
146  bool isglue() const {
148  return glue;
149  };
150  folia::Morpheme *createMorpheme( folia::Document * ) const;
151  folia::Morpheme *createMorpheme( folia::Document *,
152  std::string&, int& ) const;
153 private:
154  int ifpos;
155  bool glue;
156  icu::UnicodeString morph;
157  std::string orig;
158  std::string inflect;
159 };
160 
161 class BracketNest: public BaseBracket {
162  public:
163  BracketNest( CLEX::Type, Compound::Type, int, TiCC::LogStream& );
165  BracketNest *clone() const;
166  ~BracketNest();
167  bool isNested() { return true; };
168  void clearEmptyNodes();
169  icu::UnicodeString put( bool = true ) const;
170  icu::UnicodeString pretty_put() const;
171  bool testMatch( std::list<BaseBracket*>& result,
172  const std::list<BaseBracket*>::iterator& rpos,
173  std::list<BaseBracket*>::iterator& bpos );
174  std::list<BaseBracket*>::iterator glue( std::list<BaseBracket*>&,
175  const std::list<BaseBracket*>::iterator& );
176  std::list<BaseBracket*>::iterator resolveAffix( std::list<BaseBracket*>&,
177  const std::list<BaseBracket*>::iterator& );
178  void resolveGlue();
179  void resolveNouns();
180  void resolveLead();
181  void resolveTail();
182  void resolveMiddle();
185  folia::Morpheme *createMorpheme( folia::Document * ) const;
186  folia::Morpheme *createMorpheme( folia::Document *,
187  std::string&, int& ) const;
188  std::list<BaseBracket *> parts;
189  Compound::Type compound() const { return _compound; };
190  private:
191  Compound::Type _compound;
192 };
193 
194 std::string toString( const Compound::Type& );
195 std::ostream& operator<<( std::ostream&, const Status& );
196 std::ostream& operator<<( std::ostream&, const Compound::Type& );
197 std::ostream& operator<<( std::ostream&, const BaseBracket& );
198 std::ostream& operator<<( std::ostream&, const BaseBracket * );
199 
200 #endif // MBMA_BRACKETS_H
BracketNest::glue
std::list< BaseBracket * >::iterator glue(std::list< BaseBracket * > &, const std::list< BaseBracket * >::iterator &)
Definition: mbma_brackets.cxx:1198
BracketLeaf::createMorpheme
folia::Morpheme * createMorpheme(folia::Document *) const
Definition: mbma_brackets.cxx:805
BracketNest::clearEmptyNodes
void clearEmptyNodes()
Definition: mbma_brackets.cxx:1374
BracketLeaf::isglue
bool isglue() const
Definition: mbma_brackets.h:146
BracketNest::getCompoundType
Compound::Type getCompoundType()
Definition: mbma_brackets.cxx:591
BaseBracket::resolveLead
virtual void resolveLead()
Definition: mbma_brackets.h:104
BracketNest::resolveNouns
void resolveNouns()
Definition: mbma_brackets.cxx:1154
Compound::PA
@ PA
Definition: mbma_brackets.h:62
Compound::VP
@ VP
Definition: mbma_brackets.h:63
BaseBracket::tag
CLEX::Type tag() const
Definition: mbma_brackets.h:113
BaseBracket::isNested
virtual bool isNested()
Definition: mbma_brackets.h:102
BracketNest::getFinalTag
CLEX::Type getFinalTag()
Definition: mbma_brackets.cxx:1409
BaseBracket::infixpos
virtual int infixpos() const
Definition: mbma_brackets.h:97
operator<<
std::ostream & operator<<(std::ostream &, const Status &)
Definition: mbma_brackets.cxx:236
Compound::NONE
@ NONE
Definition: mbma_brackets.h:58
BracketNest::clone
BracketNest * clone() const
Definition: mbma_brackets.cxx:338
Compound::BB
@ BB
Definition: mbma_brackets.h:61
Compound::AN
@ AN
Definition: mbma_brackets.h:60
Compound::NP
@ NP
Definition: mbma_brackets.h:59
Compound::AP
@ AP
Definition: mbma_brackets.h:60
BracketNest::resolveMiddle
void resolveMiddle()
Definition: mbma_brackets.cxx:1346
BaseBracket::original
virtual std::string original() const
Definition: mbma_brackets.h:96
BaseBracket::status
Status status() const
Definition: mbma_brackets.h:92
Compound::NN
@ NN
Definition: mbma_brackets.h:59
BracketNest::resolveAffix
std::list< BaseBracket * >::iterator resolveAffix(std::list< BaseBracket * > &, const std::list< BaseBracket * >::iterator &)
Definition: mbma_brackets.cxx:1103
BaseBracket::isglue
virtual bool isglue() const
Definition: mbma_brackets.h:98
BracketNest::parts
std::list< BaseBracket * > parts
Definition: mbma_brackets.h:188
Compound::PN
@ PN
Definition: mbma_brackets.h:62
BaseBracket::setTag
void setTag(CLEX::Type t)
Definition: mbma_brackets.h:114
Compound::NA
@ NA
Definition: mbma_brackets.h:59
BracketNest::put
icu::UnicodeString put(bool=true) const
Definition: mbma_brackets.cxx:437
BaseBracket::BaseBracket
BaseBracket(CLEX::Type t, int flag, TiCC::LogStream &l)
Definition: mbma_brackets.h:84
BaseBracket::append
virtual BaseBracket * append(BaseBracket *)
Definition: mbma_brackets.h:101
BaseBracket
Definition: mbma_brackets.h:74
Compound::BN
@ BN
Definition: mbma_brackets.h:61
clex.h
BracketNest::~BracketNest
~BracketNest()
Definition: mbma_brackets.cxx:353
CLEX::Type
Type
all possible CELEX tags and action properties
Definition: clex.h:41
Compound::VN
@ VN
Definition: mbma_brackets.h:63
BracketLeaf
Definition: mbma_brackets.h:123
BracketLeaf::inflection
std::string inflection() const
Definition: mbma_brackets.h:134
RulePart
Definition: mbma_rule.h:46
BracketNest::testMatch
bool testMatch(std::list< BaseBracket * > &result, const std::list< BaseBracket * >::iterator &rpos, std::list< BaseBracket * >::iterator &bpos)
Definition: mbma_brackets.cxx:495
Compound::PP
@ PP
Definition: mbma_brackets.h:62
Compound
Definition: mbma_brackets.h:55
STEM
@ STEM
The structure describes the Stem.
Definition: mbma_brackets.h:46
BracketNest::createMorpheme
folia::Morpheme * createMorpheme(folia::Document *) const
Definition: mbma_brackets.cxx:1007
Status
Status
The state of the MBMA structure.
Definition: mbma_brackets.h:42
Compound::VV
@ VV
Definition: mbma_brackets.h:63
Compound::PB
@ PB
Definition: mbma_brackets.h:62
Compound::AV
@ AV
Definition: mbma_brackets.h:60
BracketLeaf::original
std::string original() const
Definition: mbma_brackets.h:138
Compound::NB
@ NB
Definition: mbma_brackets.h:59
DERIVATIONAL
@ DERIVATIONAL
The structure describes a Drivational rule.
Definition: mbma_brackets.h:49
BaseBracket::debugFlag
int debugFlag
Definition: mbma_brackets.h:119
Compound::NNN
@ NNN
Definition: mbma_brackets.h:64
BracketNest::compound
Compound::Type compound() const
Definition: mbma_brackets.h:189
BracketNest::isNested
bool isNested()
Definition: mbma_brackets.h:167
BaseBracket::RightHand
std::vector< CLEX::Type > RightHand
Definition: mbma_brackets.h:114
BaseBracket::set_status
void set_status(const Status s)
Definition: mbma_brackets.h:93
BaseBracket::put
virtual icu::UnicodeString put(bool=true) const
Definition: mbma_brackets.cxx:359
PARTICLE
@ PARTICLE
The structure describes a Particle.
Definition: mbma_brackets.h:44
BaseBracket::resolveMiddle
virtual void resolveMiddle()
Definition: mbma_brackets.h:106
Compound::VB
@ VB
Definition: mbma_brackets.h:63
BaseBracket::~BaseBracket
virtual ~BaseBracket()
Definition: mbma_brackets.h:90
FAILED
@ FAILED
The structure describes a Failed state.
Definition: mbma_brackets.h:50
BaseBracket::cls
CLEX::Type cls
Definition: mbma_brackets.h:117
BracketLeaf::morpheme
icu::UnicodeString morpheme() const
Definition: mbma_brackets.h:130
INFO
@ INFO
The structure contains additional Information.
Definition: mbma_brackets.h:43
BracketLeaf::BracketLeaf
BracketLeaf(const RulePart &, int, TiCC::LogStream &)
Definition: mbma_brackets.cxx:242
BracketNest::pretty_put
icu::UnicodeString pretty_put() const
Definition: mbma_brackets.cxx:458
BracketNest::resolveTail
void resolveTail()
Definition: mbma_brackets.cxx:1313
BracketNest::resolveLead
void resolveLead()
Definition: mbma_brackets.cxx:1287
Compound::AA
@ AA
Definition: mbma_brackets.h:60
Compound::BV
@ BV
Definition: mbma_brackets.h:61
BaseBracket::clearEmptyNodes
virtual void clearEmptyNodes()
Definition: mbma_brackets.h:107
BaseBracket::getCompoundType
virtual Compound::Type getCompoundType()
Definition: mbma_brackets.h:112
BaseBracket::morpheme
virtual icu::UnicodeString morpheme() const
Definition: mbma_brackets.h:94
BracketNest
Definition: mbma_brackets.h:161
BracketNest::append
BaseBracket * append(BaseBracket *)
Definition: mbma_brackets.cxx:347
BracketLeaf::clone
BracketLeaf * clone() const
Definition: mbma_brackets.cxx:316
toString
std::string toString(const Compound::Type &)
Definition: mbma_brackets.cxx:52
folia
Definition: mbma_brackets.h:67
BaseBracket::inflection
virtual std::string inflection() const
Definition: mbma_brackets.h:95
Compound::NVN
@ NVN
Definition: mbma_brackets.h:64
BracketLeaf::infixpos
int infixpos() const
Definition: mbma_brackets.h:142
Compound::VA
@ VA
Definition: mbma_brackets.h:63
BaseBracket::compound
virtual Compound::Type compound() const
Definition: mbma_brackets.h:111
BracketLeaf::put
icu::UnicodeString put(bool=true) const
Definition: mbma_brackets.cxx:377
Compound::BA
@ BA
Definition: mbma_brackets.h:61
Compound::NV
@ NV
Definition: mbma_brackets.h:59
BaseBracket::resolveTail
virtual void resolveTail()
Definition: mbma_brackets.h:105
Compound::AB
@ AB
Definition: mbma_brackets.h:60
BaseBracket::clone
virtual BaseBracket * clone() const =0
BracketNest::resolveGlue
void resolveGlue()
Definition: mbma_brackets.cxx:1270
COMPLEX
@ COMPLEX
The structure describes a Complex Rule.
Definition: mbma_brackets.h:47
Compound::PV
@ PV
Definition: mbma_brackets.h:62
BaseBracket::myLog
TiCC::LogStream & myLog
Definition: mbma_brackets.h:120
Compound::Type
Type
Definition: mbma_brackets.h:56
PARTICIPLE
@ PARTICIPLE
The structure describes a Participle.
Definition: mbma_brackets.h:45
BaseBracket::_status
Status _status
Definition: mbma_brackets.h:118
BaseBracket::createMorpheme
virtual folia::Morpheme * createMorpheme(folia::Document *) const =0
BaseBracket::resolveGlue
virtual void resolveGlue()
Definition: mbma_brackets.h:103
BaseBracket::BaseBracket
BaseBracket(CLEX::Type t, const std::vector< CLEX::Type > &R, int flag, TiCC::LogStream &l)
Definition: mbma_brackets.h:76
BracketNest::BracketNest
BracketNest(CLEX::Type, Compound::Type, int, TiCC::LogStream &)
Definition: mbma_brackets.cxx:321
BaseBracket::pretty_put
virtual icu::UnicodeString pretty_put() const
Definition: mbma_brackets.cxx:369
BracketLeaf::pretty_put
icu::UnicodeString pretty_put() const
Definition: mbma_brackets.cxx:402
Compound::BP
@ BP
Definition: mbma_brackets.h:61
INFLECTION
@ INFLECTION
The structure describes an Inflection Rule.
Definition: mbma_brackets.h:48