Frog
FrogData.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog:
8 
9  A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for
10  several languages
11 
12  frog is free software; you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation; either version 3 of the License, or
15  (at your option) any later version.
16 
17  frog is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program. If not, see <http://www.gnu.org/licenses/>.
24 
25  For questions and suggestions, see:
26  https://github.com/LanguageMachines/frog/issues
27  or send mail to:
28  lamasoftware (at ) science.ru.nl
29 
30 */
31 
32 #ifndef FROGDATA_H
33 #define FROGDATA_H
34 
35 #include <string>
36 #include <vector>
37 #include <map>
38 #include <set>
39 #include "ticcutils/json.hpp"
40 
41 class BaseBracket;
42 namespace Tokenizer {
43  class Token;
44 }
45 
47 class frog_record {
48  public:
49  frog_record();
50  ~frog_record();
51  nlohmann::json to_json() const;
52  std::string word;
53  std::string clean_word;
54  std::string token_class;
55  std::string language;
56  bool no_space;
58  std::string tag;
59  double tag_confidence;
60  std::string iob_tag;
61  double iob_confidence;
62  std::string ner_tag;
63  double ner_confidence;
64  std::vector<std::string> lemmas;
65  std::vector<std::vector<std::string>> morphs;
66  std::vector<const BaseBracket*> deep_morphs;
67  std::string compound_string;
68  std::string morph_string;
69  std::string deep_morph_string;
71  std::string parse_role;
72  std::set<size_t> parts;
73 };
74 
76 class frog_data {
77  friend frog_data extract_fd( std::vector<Tokenizer::Token>& );
78  public:
79  size_t size() const { return units.size(); };
80  bool empty() const { return units.size() == 0; };
81  void resolve_mwus();
82  void append( const frog_record& );
83  std::string get_language() const;
84  std::string sentence( bool = false ) const;
85  std::vector<frog_record> units;
86  std::vector<frog_record> mw_units;
87  std::map<size_t,size_t> mwus;
88 };
89 
90 std::ostream& operator<<( std::ostream& os, const frog_record& fr);
91 std::ostream& operator<<( std::ostream& os, const frog_data& fd);
92 
93 #endif
frog_data::extract_fd
friend frog_data extract_fd(std::vector< Tokenizer::Token > &)
frog_data
a datastructure to hold all frogged information of one Sentence
Definition: FrogData.h:76
frog_record::lemmas
std::vector< std::string > lemmas
a list of possible lemma's
Definition: FrogData.h:64
frog_record::compound_string
std::string compound_string
string representation of first compound
Definition: FrogData.h:67
frog_record::iob_confidence
double iob_confidence
the confidence of the IOB tag
Definition: FrogData.h:61
frog_data::sentence
std::string sentence(bool=false) const
Definition: FrogData.cxx:232
frog_record::tag
std::string tag
the assigned POS tag
Definition: FrogData.h:58
frog_record::word
std::string word
the word in UTF8
Definition: FrogData.h:52
frog_record::no_space
bool no_space
was there a space after the word?
Definition: FrogData.h:56
frog_record::language
std::string language
the deteected language of the word
Definition: FrogData.h:55
frog_record::frog_record
frog_record()
default constructor
Definition: FrogData.cxx:44
frog_record::tag_confidence
double tag_confidence
the confidence of the POS tag
Definition: FrogData.h:59
BaseBracket
Definition: mbma_brackets.h:74
frog_record::parse_role
std::string parse_role
role of the dependency
Definition: FrogData.h:71
frog_record::clean_word
std::string clean_word
lowercased word (MBMA only)
Definition: FrogData.h:53
frog_data::append
void append(const frog_record &)
Definition: FrogData.cxx:292
frog_record
a simple datastructure to hold all frogged information of one word
Definition: FrogData.h:47
frog_record::morph_string
std::string morph_string
string representation of first morph
Definition: FrogData.h:68
frog_record::to_json
nlohmann::json to_json() const
Definition: FrogData.cxx:63
frog_data::empty
bool empty() const
Definition: FrogData.h:80
frog_record::ner_tag
std::string ner_tag
the assigned NER tag
Definition: FrogData.h:62
frog_record::parse_index
int parse_index
label of the dependency
Definition: FrogData.h:70
Tokenizer
Definition: FrogData.h:42
frog_record::parts
std::set< size_t > parts
set of indeces a MWU is made of (MWU only)
Definition: FrogData.h:72
frog_record::token_class
std::string token_class
the assigned token class of the word
Definition: FrogData.h:54
frog_data::units
std::vector< frog_record > units
the records that make up the sentence
Definition: FrogData.h:85
frog_record::~frog_record
~frog_record()
default destructor
Definition: FrogData.cxx:57
frog_data::mwus
std::map< size_t, size_t > mwus
Definition: FrogData.h:87
frog_data::resolve_mwus
void resolve_mwus()
Definition: FrogData.cxx:250
frog_data::get_language
std::string get_language() const
Definition: FrogData.cxx:300
frog_record::new_paragraph
bool new_paragraph
did the tokenizer detect a paragraph here?
Definition: FrogData.h:57
operator<<
std::ostream & operator<<(std::ostream &os, const frog_record &fr)
Definition: FrogData.cxx:123
frog_record::iob_tag
std::string iob_tag
the assigned IOB tag
Definition: FrogData.h:60
frog_data::size
size_t size() const
Definition: FrogData.h:79
frog_data::mw_units
std::vector< frog_record > mw_units
the MWU records that make up the sentence
Definition: FrogData.h:86
frog_record::ner_confidence
double ner_confidence
the confidence of the NER tag
Definition: FrogData.h:63
frog_record::morphs
std::vector< std::vector< std::string > > morphs
the morph analysis
Definition: FrogData.h:65
frog_record::deep_morphs
std::vector< const BaseBracket * > deep_morphs
pointers to the deep morphemes
Definition: FrogData.h:66
frog_record::deep_morph_string
std::string deep_morph_string
string representation of first deep_morph
Definition: FrogData.h:69