Frog
FrogAPI.h
Go to the documentation of this file.
1 /* ex: set tabstop=8 expandtab: */
2 /*
3  Copyright (c) 2006 - 2020
4  CLST - Radboud University
5  ILK - Tilburg University
6 
7  This file is part of frog
8 
9  frog is free software; you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation; either version 3 of the License, or
12  (at your option) any later version.
13 
14  frog is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>.
21 
22  For questions and suggestions, see:
23  https://github.com/LanguageMachines/timblserver/issues
24  or send mail to:
25  lamasoftware (at ) science.ru.nl
26 */
27 
28 
29 #ifndef FROGAPI_H
30 #define FROGAPI_H
31 
32 #include <vector>
33 #include <string>
34 #include <iostream>
35 
36 #include "timbl/TimblAPI.h"
37 
38 #include "ticcutils/Configuration.h"
39 #include "ticcutils/LogStream.h"
40 #include "ticcutils/FdStream.h"
41 #include "ticcutils/ServerBase.h"
42 
43 #include "libfolia/folia.h"
44 #include "ucto/tokenize.h"
45 
46 #include "frog/Frog-util.h"
47 #include "frog/FrogData.h"
48 
49 class UctoTokenizer;
50 class Mbma;
51 class Mblem;
52 class Mwu;
53 class ParserBase;
54 class CGNTagger;
55 class IOBTagger;
56 class NERTagger;
57 
59 class FrogOptions {
60  public:
61  bool doTok;
62  bool doLemma;
63  bool doMorph;
64  bool doDeepMorph;
65  bool doMwu;
66  bool doIOB;
67  bool doNER;
68  bool doParse;
69  bool doTagger;
71 
75 
78  bool doRetry;
79 
82  bool noStdOut;
83  bool doXMLin;
84  bool doXMLout;
85  bool doJSONin;
86 
89  bool doJSONout;
90  bool doServer;
91 
93  bool doKanon;
94 
97  bool test_API;
98 
101  bool hide_timers;
102 
105  bool interactive;
107 
110  bool doAlpino;
111 
115  int debugFlag;
116 
119  int JSON_pp;
120 
124  std::string encoding;
125 
128  std::string uttmark;
129  std::string listenport;
130  std::string docid;
131  std::string inputclass;
132  std::string outputclass;
133  std::string default_language;
134  std::set<std::string> languages;
135  /*< This set of languages will be handled over to the ucto tokenizer
136  */
137  std::string textredundancy;
138 
151 
154  unsigned int maxParserTokens;
155  /*< The Parser may 'explode' on VERY long sentences. So we limit it to a
156 maximum of 500 words PER SENTENC. Which is already a lot!
157  */
158  std::string command;
159 
160  FrogOptions();
161  private:
162  FrogOptions( const FrogOptions & );
163 };
164 
167 class FrogAPI {
168  public:
170  const TiCC::Configuration&,
171  TiCC::LogStream *,
172  TiCC::LogStream * );
173  ~FrogAPI();
174  static std::string defaultConfigDir( const std::string& ="" );
175  static std::string defaultConfigFile( const std::string& ="" );
176  folia::Document *FrogFile( const std::string&, std::ostream& );
177  void FrogServer( Sockets::ClientSocket &conn );
178  void FrogInteractive();
179  frog_data frog_sentence( std::vector<Tokenizer::Token>&,
180  const size_t );
181  std::string Frogtostring( const std::string& );
182  std::string Frogtostringfromfile( const std::string& );
183  void run_api_tests( const std::string&, std::ostream& );
184 
185  private:
186  folia::Document *run_folia_engine( const std::string&,
187  std::ostream& );
188  folia::Document *run_text_engine( const std::string&,
189  std::ostream& );
190  folia::FoliaElement* start_document( const std::string&,
191  folia::Document *& ) const;
192  folia::FoliaElement *append_to_folia( folia::FoliaElement *,
193  const frog_data&,
194  unsigned int& ) const;
195  void add_ner_result( folia::Sentence *,
196  const frog_data&,
197  const std::vector<folia::Word*>& ) const;
198  void add_iob_result( folia::Sentence *,
199  const frog_data&,
200  const std::vector<folia::Word*>& ) const;
201  void add_mwu_result( folia::Sentence *,
202  const frog_data&,
203  const std::vector<folia::Word*>& ) const;
204  void add_parse_result( folia::Sentence *,
205  const frog_data&,
206  const std::vector<folia::Word*>& ) const;
207  folia::processor *add_provenance( folia::Document& ) const;
208  void test_version( const std::string&, double );
209  // functions
210  void FrogStdin( bool prompt );
211  void output_tabbed( std::ostream&,
212  const frog_record& ) const;
213  void output_JSON( std::ostream& os,
214  const frog_data& fd,
215  int = 0 ) const;
216  void show_results( std::ostream&,
217  const frog_data& ) const;
218  void handle_one_paragraph( std::ostream&,
219  folia::Paragraph*,
220  int& );
221  void handle_one_text_parent( std::ostream&,
222  folia::FoliaElement *e,
223  int& );
224  void handle_one_sentence( std::ostream&,
225  folia::Sentence *,
226  const size_t );
227  void append_to_sentence( folia::Sentence *, const frog_data& ) const;
228  void append_to_words( const std::vector<folia::Word*>&,
229  const frog_data& ) const;
230  void handle_word_vector( std::ostream&,
231  const std::vector<folia::Word*>&,
232  const size_t );
233  // data
234  const TiCC::Configuration& configuration;
235  FrogOptions& options;
236  TiCC::LogStream *theErrLog;
237  TiCC::LogStream *theDbgLog;
238  TimerBlock timers;
239  Mbma *myMbma;
240  Mblem *myMblem;
241  Mwu *myMwu;
242  ParserBase *myParser;
243  CGNTagger *myCGNTagger;
244  IOBTagger *myIOBTagger;
245  NERTagger *myNERTagger;
246  UctoTokenizer *tokenizer;
247 };
248 
249 std::vector<std::string> get_full_morph_analysis( folia::Word *word,
250  bool make_flat=false );
251 std::vector<std::string> get_compound_analysis( folia::Word *word );
252 
253 #endif
CGNTagger
Definition: cgn_tagger_mod.h:38
FrogAPI::Frogtostringfromfile
std::string Frogtostringfromfile(const std::string &)
frog_data
a datastructure to hold all frogged information of one Sentence
Definition: FrogData.h:76
FrogOptions::encoding
std::string encoding
which input-encoding do we expect
Definition: FrogAPI.h:124
FrogOptions::doMwu
bool doMwu
should we resolve Multi Word Units?
Definition: FrogAPI.h:65
ParserBase
Definition: Parser.h:51
FrogOptions::correct_words
bool correct_words
should we allow the tokenizer to correct words?
Definition: FrogAPI.h:150
get_full_morph_analysis
std::vector< std::string > get_full_morph_analysis(folia::Word *word, bool make_flat=false)
UctoTokenizer
Definition: ucto_tokenizer_mod.h:36
FrogOptions::textredundancy
std::string textredundancy
determines how much text is added in the FoLiA
Definition: FrogAPI.h:137
FrogOptions::doQuoteDetection
bool doQuoteDetection
enable quote detection (NOT USED)
Definition: FrogAPI.h:74
FrogOptions::test_API
bool test_API
do we want to run some tests?
Definition: FrogAPI.h:97
Mbma
Definition: mbma_mod.h:53
FrogOptions::docid
std::string docid
the FoLiA document ID on output.
Definition: FrogAPI.h:130
Frog-util.h
FrogOptions::doAlpino
bool doAlpino
should we directly run Alpino?
Definition: FrogAPI.h:110
FrogAPI::FrogServer
void FrogServer(Sockets::ClientSocket &conn)
FrogOptions::doSentencePerLine
bool doSentencePerLine
do we want a sentence per line?
Definition: FrogAPI.h:70
FrogOptions::doXMLin
bool doXMLin
do we have FoLiA input?
Definition: FrogAPI.h:83
FrogOptions::hide_timers
bool hide_timers
should we output timing information?
Definition: FrogAPI.h:101
FrogOptions::languages
std::set< std::string > languages
all languages to take into account
Definition: FrogAPI.h:134
FrogOptions
this class holds the runtime settings for Frog
Definition: FrogAPI.h:59
frog_record
a simple datastructure to hold all frogged information of one word
Definition: FrogData.h:47
FrogOptions::doJSONin
bool doJSONin
do we have JSON input?
Definition: FrogAPI.h:85
FrogOptions::doLemma
bool doLemma
should we run the lemmatizer?
Definition: FrogAPI.h:62
FrogAPI
Definition: FrogAPI.h:167
FrogOptions::FrogOptions
FrogOptions()
Definition: FrogAPI.cxx:128
FrogAPI::FrogFile
folia::Document * FrogFile(const std::string &, std::ostream &)
FrogOptions::doTagger
bool doTagger
should we run the Dependency Parser?
Definition: FrogAPI.h:69
FrogOptions::doKanon
bool doKanon
do we want FoLiA to be output in a canonical way?
Definition: FrogAPI.h:93
FrogAPI::FrogInteractive
void FrogInteractive()
get_compound_analysis
std::vector< std::string > get_compound_analysis(folia::Word *word)
FrogOptions::interactive
bool interactive
are we running from the command line?
Definition: FrogAPI.h:105
FrogOptions::doRetry
bool doRetry
set retry mode (MISNOMER)
Definition: FrogAPI.h:78
FrogOptions::doServer
bool doServer
do we want to run as a server?
Definition: FrogAPI.h:90
FrogAPI::Frogtostring
std::string Frogtostring(const std::string &)
FrogOptions::uttmark
std::string uttmark
the string which separates Utterances
Definition: FrogAPI.h:128
FrogOptions::doJSONout
bool doJSONout
do we want JSON output?
Definition: FrogAPI.h:89
Mwu
Definition: mwu_chunker_mod.h:68
FrogOptions::JSON_pp
int JSON_pp
for JSON output, use this value to format.
Definition: FrogAPI.h:119
FrogOptions::listenport
std::string listenport
determines the port to run the Frog Server on
Definition: FrogAPI.h:129
FrogAPI::defaultConfigFile
static std::string defaultConfigFile(const std::string &="")
Definition: FrogAPI.cxx:117
FrogOptions::noStdOut
bool noStdOut
do we want output to stdout?
Definition: FrogAPI.h:82
FrogOptions::command
std::string command
stores the original command that invoked Frog
Definition: FrogAPI.h:158
FrogOptions::doAlpinoServer
bool doAlpinoServer
should we try to connect to an Alpino server?
Definition: FrogAPI.h:106
NERTagger
Definition: ner_tagger_mod.h:44
FrogOptions::doTok
bool doTok
should we run the tokenizer?
Definition: FrogAPI.h:61
FrogAPI::~FrogAPI
~FrogAPI()
FrogAPI::defaultConfigDir
static std::string defaultConfigDir(const std::string &="")
Definition: FrogAPI.cxx:101
FrogOptions::numThreads
int numThreads
limit for the number of threads
Definition: FrogAPI.h:114
FrogOptions::doXMLout
bool doXMLout
do we want FoLiA output?
Definition: FrogAPI.h:84
FrogOptions::doMorph
bool doMorph
should we run the morphological analyzer?
Definition: FrogAPI.h:63
FrogOptions::doNER
bool doNER
should we run the Named Entity recognizer?
Definition: FrogAPI.h:67
FrogAPI::frog_sentence
frog_data frog_sentence(std::vector< Tokenizer::Token > &, const size_t)
FrogOptions::doIOB
bool doIOB
should we run the IOB Chuker?
Definition: FrogAPI.h:66
FrogOptions::debugFlag
int debugFlag
value for the generic debug level
Definition: FrogAPI.h:115
FrogData.h
Mblem
Definition: mblem_mod.h:54
FrogOptions::maxParserTokens
unsigned int maxParserTokens
limit the number of words to Parse
Definition: FrogAPI.h:154
FrogAPI::FrogAPI
FrogAPI(FrogOptions &, const TiCC::Configuration &, TiCC::LogStream *, TiCC::LogStream *)
FrogOptions::inputclass
std::string inputclass
the textclass to use on FoLiA input
Definition: FrogAPI.h:131
FrogOptions::doDeepMorph
bool doDeepMorph
do we want a deep morphological analysis?
Definition: FrogAPI.h:64
FrogAPI::run_api_tests
void run_api_tests(const std::string &, std::ostream &)
FrogOptions::doParse
bool doParse
should we run the Dependency Parser?
Definition: FrogAPI.h:68
TimerBlock
Definition: Frog-util.h:51
IOBTagger
Definition: iob_tagger_mod.h:37
FrogOptions::outputclass
std::string outputclass
the textclass to use on FoLiA output
Definition: FrogAPI.h:132
FrogOptions::default_language
std::string default_language
what is our default language
Definition: FrogAPI.h:133