Import Upstream version 3.8.2
Tino Didriksen
1 year, 9 months ago
137 | 137 | unigram_tagger.cc \ |
138 | 138 | xml_reader.cc |
139 | 139 | |
140 | library_includedir = $(includedir)/$(PACKAGE_NAME)-$(VERSION_API)/$(PACKAGE_NAME) | |
140 | if WINDOWS | |
141 | cc_sources += win32/regex.c | |
142 | endif | |
143 | ||
144 | library_includedir = $(includedir)/$(PACKAGE_NAME) | |
141 | 145 | library_include_HEADERS = $(h_sources) |
142 | 146 | |
143 | 147 | GENERATEDSCRIPTS = apertium-gen-deformat apertium-gen-reformat \ |
149 | 153 | apertium-validate-acx \ |
150 | 154 | apertium-utils-fixlatex |
151 | 155 | |
152 | lib_LTLIBRARIES = libapertium3.la | |
153 | libapertium3_la_SOURCES = $(h_sources) $(cc_sources) | |
154 | libapertium3_la_LDFLAGS = -version-info $(SOVERSION) -release $(VERSION_API) | |
156 | lib_LTLIBRARIES = libapertium.la | |
157 | libapertium_la_SOURCES = $(h_sources) $(cc_sources) | |
158 | libapertium_la_LDFLAGS = -version-info $(VERSION_ABI) | |
155 | 159 | |
156 | 160 | bin_PROGRAMS = apertium-cleanstream \ |
157 | 161 | apertium-deshtml \ |
206 | 210 | instdir = apertium |
207 | 211 | |
208 | 212 | apertiumdir = $(prefix)/share/apertium |
209 | apertiuminclude = $(prefix)/include/apertium-$(VERSION_API) | |
213 | apertiuminclude = $(prefix)/include | |
210 | 214 | apertiumlib = $(prefix)/lib |
211 | 215 | apertiumsysconf = $(prefix)/etc/apertium |
212 | 216 | |
215 | 219 | tagger.dtd interchunk.dtd format.dtd transfer.dtd postchunk.dtd modes.dtd \ |
216 | 220 | tagger.rnc interchunk.rnc format.rnc transfer.rnc postchunk.rnc modes.rnc |
217 | 221 | |
222 | LDADD = -lapertium $(lib_LTLIBRARIES) | |
223 | ||
218 | 224 | apertium_cleanstream_SOURCES = apertium_cleanstream.cc |
219 | 225 | apertium_pretransfer_SOURCES = apertium_pretransfer.cc |
220 | apertium_pretransfer_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
221 | 226 | apertium_posttransfer_SOURCES = apertium_posttransfer.cc |
222 | apertium_posttransfer_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
223 | 227 | apertium_multiple_translations_SOURCES = apertium-multiple-translations.cc |
224 | apertium_multiple_translations_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
225 | 228 | apertium_destxt_SOURCES = apertium_destxt.cc |
226 | 229 | apertium_retxt_SOURCES = apertium_retxt.cc |
227 | 230 | apertium_deshtml_SOURCES = apertium_deshtml.cc |
247 | 250 | apertium_desmediawiki_SOURCES = apertium_desmediawiki.cc |
248 | 251 | apertium_remediawiki_SOURCES = apertium_remediawiki.cc |
249 | 252 | apertium_prelatex_SOURCES = apertium_prelatex.cc |
250 | apertium_prelatex_LDADD= -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
251 | 253 | apertium_postlatex_SOURCES = apertium_postlatex.cc |
252 | apertium_postlatex_LDADD= -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
253 | 254 | apertium_postlatex_raw_SOURCES = apertium_postlatex_raw.cc |
254 | apertium_postlatex_raw_LDADD= -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
255 | 255 | |
256 | 256 | apertium_tagger_SOURCES = apertium_tagger.cc |
257 | apertium_tagger_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
258 | 257 | |
259 | 258 | apertium_perceptron_trace_SOURCES = apertium_perceptron_trace.cc |
260 | apertium_perceptron_trace_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
261 | 259 | |
262 | 260 | apertium_tmxbuild_SOURCES = apertium_tmxbuild.cc |
263 | apertium_tmxbuild_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
264 | 261 | |
265 | 262 | apertium_preprocess_transfer_SOURCES = transferpp.cc |
266 | apertium_preprocess_transfer_LDADD = \ | |
267 | -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
268 | 263 | |
269 | 264 | apertium_filter_ambiguity_SOURCES = apertium_filter_ambiguity.cc |
270 | apertium_filter_ambiguity_LDADD = \ | |
271 | -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
272 | 265 | |
273 | 266 | apertium_transfer_SOURCES = apertium_transfer.cc |
274 | apertium_transfer_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
275 | 267 | |
276 | 268 | apertium_interchunk_SOURCES = apertium_interchunk.cc |
277 | apertium_interchunk_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
278 | 269 | |
279 | 270 | apertium_postchunk_SOURCES = apertium_postchunk.cc |
280 | apertium_postchunk_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
281 | 271 | |
282 | 272 | apertium_tagger_apply_new_rules_SOURCES = apertium_tagger_apply_new_rules.cc |
283 | apertium_tagger_apply_new_rules_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
284 | 273 | |
285 | 274 | apertium_tagger_readwords_SOURCES = apertium_tagger_readwords.cc |
286 | apertium_tagger_readwords_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
287 | 275 | |
288 | 276 | apertium_wblank_attach_SOURCES = wblank-attach.cc |
289 | 277 | apertium_wblank_detach_SOURCES = wblank-detach.cc |
290 | 278 | apertium_wblank_mode_SOURCES = wblank-mode.cc |
291 | 279 | |
292 | 280 | apertium_adapt_docx_SOURCES = adapt_docx.cc |
293 | apertium_adapt_docx_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) $(ICU_LIBS) | |
294 | 281 | |
295 | 282 | apertium_gen_modes_SOURCES = gen_modes.cc |
296 | apertium_gen_modes_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES) | |
297 | 283 | |
298 | 284 | if WINDOWS |
299 | 285 | AM_CPPFLAGS = -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS) |
4 | 4 | #include <apertium/perceptron_tagger.h> |
5 | 5 | #include <apertium/shell_utils.h> |
6 | 6 | #include <apertium/stream_tagger.h> |
7 | #include <lttoolbox/lt_locale.h> | |
7 | 8 | |
8 | 9 | namespace Apertium { |
9 | 10 | |
103 | 104 | |
104 | 105 | |
105 | 106 | int main(int argc, char* argv[]) { |
107 | LtLocale::tryToSetLocale(); | |
106 | 108 | return Apertium::perceptron_trace(argc, argv); |
107 | 109 | } |
13 | 13 | // along with this program; if not, see <https://www.gnu.org/licenses/>. |
14 | 14 | |
15 | 15 | #include <apertium/tagger.h> |
16 | #include <lttoolbox/lt_locale.h> | |
16 | 17 | |
17 | 18 | #include "getopt_long.h" |
18 | 19 | #include <cerrno> |
29 | 30 | #include <unistd.h> |
30 | 31 | |
31 | 32 | int main(int argc, char **argv) { |
33 | LtLocale::tryToSetLocale(); | |
32 | 34 | try { |
33 | 35 | Apertium::apertium_tagger(argc, argv); |
34 | 36 | } catch (const Apertium::Exception::apertium_tagger::err_Exception &err_Exception_) { |
26 | 26 | #include <apertium/tagger_data_hmm.h> |
27 | 27 | #include <apertium/tsx_reader.h> |
28 | 28 | #include <lttoolbox/string_utils.h> |
29 | #include <lttoolbox/lt_locale.h> | |
29 | 30 | |
30 | 31 | using namespace Apertium; |
31 | 32 | |
55 | 56 | } |
56 | 57 | |
57 | 58 | int main(int argc, char* argv[]) { |
59 | LtLocale::tryToSetLocale(); | |
58 | 60 | string filein=""; |
59 | 61 | string fileout=""; |
60 | 62 | string filetsx=""; |
83 | 83 | |
84 | 84 | |
85 | 85 | int main(int argc, char* argv[]) { |
86 | LtLocale::tryToSetLocale(); | |
86 | 87 | string tsxfile=""; |
87 | 88 | string probfile=""; |
88 | 89 | int corpus_length=-1; |
35 | 35 | void endProgram(char *name) |
36 | 36 | { |
37 | 37 | cout << basename(name) << ": " << endl; |
38 | cout << "USAGE: " << basename(name) << " [-fvh] modes.xml [install_path]" << endl; | |
38 | cout << "USAGE: " << basename(name) << " [-flvh] modes.xml [install_path]" << endl; | |
39 | 39 | cout << " -f, --full: expect absolute installation path" << endl; |
40 | cout << " -l, --local: output to current directory rather than directory of modes.xml" << endl; | |
40 | 41 | cout << " -v, --verbose: print more detailed messages" << endl; |
41 | 42 | cout << " -h, --help: display this help" << endl; |
42 | 43 | exit(EXIT_FAILURE); |
135 | 136 | cmd.debug_suffix.push_back("-tagger"); |
136 | 137 | } else if (starts_with(c, "apertium-pretransfer")) { |
137 | 138 | cmd.debug_suffix.push_back("-pretransfer"); |
139 | } else if (starts_with(c, "apertium-posttransfer")) { | |
140 | cmd.debug_suffix.push_back("-posttransfer"); | |
138 | 141 | } else if (starts_with(c, "lrx-proc")) { |
139 | 142 | cmd.debug_suffix.push_back("-lex"); |
140 | 143 | cmd.debug_suffix.push_back("-lextor"); |
308 | 311 | static struct option long_options[] = |
309 | 312 | { |
310 | 313 | {"full", 0, 0, 'f'}, |
314 | {"local", 0, 0, 'l'}, | |
311 | 315 | {"verbose", 0, 0, 'v'}, |
312 | 316 | {"help", 0, 0, 'h'} |
313 | 317 | }; |
314 | 318 | #endif |
315 | 319 | |
316 | 320 | bool full = false; |
321 | bool local = false; | |
317 | 322 | bool verbose = false; |
318 | 323 | |
319 | 324 | while(true) { |
320 | 325 | #if HAVE_GETOPT_LONG |
321 | 326 | int option_index; |
322 | int c = getopt_long(argc, argv, "fvh", long_options, &option_index); | |
327 | int c = getopt_long(argc, argv, "flvh", long_options, &option_index); | |
323 | 328 | #else |
324 | int c = getopt(argc, argv, "fvh"); | |
329 | int c = getopt(argc, argv, "flvh"); | |
325 | 330 | #endif |
326 | 331 | |
327 | 332 | if (c == -1) { |
331 | 336 | switch(c) { |
332 | 337 | case 'f': |
333 | 338 | full = true; |
339 | break; | |
340 | ||
341 | case 'l': | |
342 | local = true; | |
334 | 343 | break; |
335 | 344 | |
336 | 345 | case 'v': |
362 | 371 | } |
363 | 372 | } |
364 | 373 | |
374 | if (local) { | |
375 | dev_dir = "."; | |
376 | } | |
377 | ||
365 | 378 | fs::create_directories(dev_dir / "modes"); |
366 | 379 | |
367 | 380 | xmlDoc* doc = xmlReadFile(xml_path.string().c_str(), nullptr, 0); |
17 | 17 | mychar = input.get(); |
18 | 18 | if(input.eof()) |
19 | 19 | { |
20 | cerr << "ERROR: Unexpected EOF" << endl; | |
20 | std::cerr << "ERROR: Unexpected EOF" << std::endl; | |
21 | 21 | exit(EXIT_FAILURE); |
22 | 22 | } |
23 | 23 | |
54 | 54 | { |
55 | 55 | if(input.eof()) |
56 | 56 | { |
57 | cerr << "ERROR: Unexpected EOF" << endl; | |
57 | std::cerr << "ERROR: Unexpected EOF" << std::endl; | |
58 | 58 | exit(EXIT_FAILURE); |
59 | 59 | } |
60 | 60 | u_fputc(mychar, output); |
84 | 84 | { |
85 | 85 | if(input.eof()) |
86 | 86 | { |
87 | cerr << "ERROR: Unexpected EOF" << endl; | |
87 | std::cerr << "ERROR: Unexpected EOF" << std::endl; | |
88 | 88 | exit(EXIT_FAILURE); |
89 | 89 | } |
90 | 90 | |
179 | 179 | } |
180 | 180 | else |
181 | 181 | { |
182 | cerr << "ERROR: Wordbound blank isn't immediately followed by the Lexical Unit." << endl; | |
182 | std::cerr << "ERROR: Wordbound blank isn't immediately followed by the Lexical Unit." << std::endl; | |
183 | 183 | exit(EXIT_FAILURE); |
184 | 184 | } |
185 | 185 | } |
15 | 15 | #include "stream.h" |
16 | 16 | |
17 | 17 | #include "exception.h" |
18 | ||
19 | #include <utility> | |
18 | 20 | |
19 | 21 | namespace Apertium { |
20 | 22 | Stream::Stream(TaggerFlags &Flags_) |
17 | 17 | #include "apertium_config.h" |
18 | 18 | |
19 | 19 | #include "align.h" |
20 | #include <lttoolbox/exception.h> | |
20 | 21 | #include "exception.h" |
21 | 22 | #include "linebreak.h" |
22 | 23 | #include "unigram_tagger.h" |
162 | 163 | switch (*TheFunctionType) { |
163 | 164 | case Tagger: |
164 | 165 | if (!TheFunctionTypeType) { |
165 | HMM HiddenMarkovModelTagger_(TheFlags); | |
166 | g_FILE_Tagger(HiddenMarkovModelTagger_); | |
166 | try { | |
167 | PerceptronTagger percep(TheFlags); | |
168 | g_StreamTagger(percep); | |
169 | } catch (DeserialisationException) { | |
170 | HMM HiddenMarkovModelTagger_(TheFlags); | |
171 | g_FILE_Tagger(HiddenMarkovModelTagger_); | |
172 | } | |
167 | 173 | break; |
168 | 174 | } |
169 | 175 | switch (*TheFunctionTypeType) { |
314 | 314 | |
315 | 315 | in_lu = false; |
316 | 316 | |
317 | if(last_lword == 1) { | |
317 | // if word == nullptr then we're outputting a word | |
318 | // inside an npar=0 macro and there's not much we can do | |
319 | if(last_lword == 1 && word != nullptr) { | |
318 | 320 | out_wblank = word[0]->getWblank(); |
319 | 321 | } |
320 | 322 |
120 | 120 | void |
121 | 121 | TRXReader::parse() |
122 | 122 | { |
123 | procDefCats(); | |
124 | step(); | |
125 | while(name == "#text"_u || name == "#comment"_u) | |
126 | { | |
127 | step(); | |
123 | stepToNextTag(); | |
124 | if (name == "transfer"_u || name == "interchunk"_u || name == "postchunk"_u) { | |
125 | stepToNextTag(); | |
126 | } else { | |
127 | unexpectedTag(); | |
128 | } | |
129 | ||
130 | if (name == "section-def-cats"_u) { | |
131 | procDefCats(); | |
132 | stepToNextTag(); | |
133 | } else { | |
134 | unexpectedTag(); | |
128 | 135 | } |
129 | 136 | |
130 | 137 | if(name == "section-def-attrs"_u) |
131 | 138 | { |
132 | 139 | procDefAttrs(); |
133 | step(); | |
134 | while(name == "#text"_u || name == "#comment"_u) | |
135 | { | |
136 | step(); | |
137 | } | |
140 | stepToNextTag(); | |
138 | 141 | } |
139 | 142 | |
140 | 143 | if(name == "section-def-vars"_u) |
141 | 144 | { |
142 | 145 | procDefVars(); |
143 | step(); | |
144 | while(name == "#text"_u || name == "#comment"_u) | |
145 | { | |
146 | step(); | |
147 | } | |
146 | stepToNextTag(); | |
148 | 147 | } |
149 | 148 | |
150 | 149 | if(name == "section-def-lists"_u) |
151 | 150 | { |
152 | 151 | procDefLists(); |
153 | step(); | |
154 | while(name == "#text"_u || name == "#comment"_u) | |
155 | { | |
156 | step(); | |
157 | } | |
152 | stepToNextTag(); | |
158 | 153 | } |
159 | 154 | |
160 | 155 | if(name == "section-def-macros"_u) |
161 | 156 | { |
162 | 157 | procDefMacros(); |
163 | step(); | |
164 | while(name == "#text"_u || name == "#comment"_u) | |
165 | { | |
166 | step(); | |
167 | } | |
158 | stepToNextTag(); | |
168 | 159 | } |
169 | 160 | |
170 | 161 | if(name == "section-rules"_u) |
171 | 162 | { |
172 | 163 | procRules(); |
173 | step(); | |
174 | while(name == "#text"_u || name == "#comment"_u) | |
175 | { | |
176 | step(); | |
177 | } | |
164 | stepToNextTag(); | |
165 | } | |
166 | } | |
167 | ||
168 | void | |
169 | TRXReader::checkClip() | |
170 | { | |
171 | UString part = attrib("part"_u); | |
172 | auto& attrs = td.getAttrItems(); | |
173 | if (part.empty()) { | |
174 | parseError("<clip> missing attribute part"_u); | |
175 | } else if (attrs.find(part) == attrs.end()) { | |
176 | parseError("Undefined attr-item "_u + part); | |
178 | 177 | } |
179 | 178 | } |
180 | 179 | |
204 | 203 | } |
205 | 204 | else |
206 | 205 | { |
207 | for(set<int>::iterator it = alive_states.begin(), limit = alive_states.end(); | |
208 | it != limit; it++) | |
209 | { | |
210 | if(td.seen_rules.find(*it) == td.seen_rules.end()) | |
206 | for (auto& it : alive_states) { | |
207 | if(td.seen_rules.find(it) == td.seen_rules.end()) | |
211 | 208 | { |
212 | 209 | const int symbol = td.countToFinalSymbol(count); |
213 | const int fin = td.getTransducer().insertSingleTransduction(symbol, *it); | |
210 | const int fin = td.getTransducer().insertSingleTransduction(symbol, it); | |
214 | 211 | td.getTransducer().setFinal(fin); |
215 | td.seen_rules[*it] = count; | |
212 | td.seen_rules[it] = count; | |
216 | 213 | } |
217 | 214 | else |
218 | 215 | { |
219 | cerr << "Warning (" << xmlTextReaderGetParserLineNumber(reader); | |
220 | cerr << "): " | |
221 | << "Paths to rule " << count << " blocked by rule " << td.seen_rules[*it] | |
222 | << "." << endl; | |
223 | ||
216 | warnAtLoc(); | |
217 | cerr << "Paths to rule " << count | |
218 | << " blocked by rule " << td.seen_rules[it] | |
219 | << "." << endl; | |
224 | 220 | } |
225 | 221 | } |
226 | 222 | } |
245 | 241 | |
246 | 242 | for(; range.first != range.second; range.first++) |
247 | 243 | { |
248 | for(set<int>::iterator it = alive_states.begin(), limit = alive_states.end(); | |
249 | it != limit; it++) | |
250 | { | |
244 | for (auto& it : alive_states) { | |
251 | 245 | // mark of begin of word |
252 | int tmp = td.getTransducer().insertSingleTransduction('^', *it); | |
253 | if(*it != td.getTransducer().getInitial()) | |
246 | int tmp = td.getTransducer().insertSingleTransduction('^', it); | |
247 | if(it != td.getTransducer().getInitial()) | |
254 | 248 | { |
255 | 249 | // insert optional blank between two words |
256 | int alt = td.getTransducer().insertSingleTransduction(' ', *it); | |
250 | int alt = td.getTransducer().insertSingleTransduction(' ', it); | |
257 | 251 | td.getTransducer().linkStates(alt, tmp, '^'); |
258 | 252 | } |
259 | 253 | |
275 | 269 | } |
276 | 270 | else if(name == "let"_u) |
277 | 271 | { |
278 | int count = 0; | |
279 | 272 | int lineno = xmlTextReaderGetParserLineNumber(reader); |
280 | 273 | while(name != "let"_u || type != XML_READER_TYPE_END_ELEMENT) |
281 | 274 | { |
282 | step(); | |
275 | stepToNextTag(); | |
283 | 276 | if(type == XML_ELEMENT_NODE) |
284 | 277 | { |
285 | count++; | |
286 | ||
287 | if(name == "clip"_u && attrib("side"_u) == "sl"_u) | |
288 | { | |
289 | cerr << "Warning (" << lineno; | |
290 | cerr << "): assignment to 'sl' side has no effect." << endl; | |
278 | if(name == "clip"_u) { | |
279 | checkClip(); | |
280 | if (attrib("side"_u) == "sl"_u) { | |
281 | cerr << "Warning (" << lineno; | |
282 | cerr << "): assignment to 'sl' side has no effect." << endl; | |
283 | } | |
291 | 284 | } |
292 | } | |
293 | ||
294 | if(count != 0) | |
295 | { | |
296 | 285 | break; |
297 | 286 | } |
298 | 287 | } |
299 | 288 | |
289 | } | |
290 | else if(name == "clip"_u) { | |
291 | checkClip(); | |
300 | 292 | } |
301 | 293 | } |
302 | 294 | } |
326 | 318 | while(type != XML_READER_TYPE_END_ELEMENT || |
327 | 319 | name != "section-def-attrs"_u) |
328 | 320 | { |
329 | step(); | |
321 | stepToNextTag(); | |
330 | 322 | if(name == "attr-item"_u) |
331 | 323 | { |
332 | 324 | if(type != XML_READER_TYPE_END_ELEMENT) |
350 | 342 | attrname.clear(); |
351 | 343 | } |
352 | 344 | } |
353 | else if(name == "#text"_u) | |
345 | else if(name == "section-def-attrs"_u) | |
354 | 346 | { |
355 | 347 | // do nothing |
356 | 348 | } |
357 | else if(name == "#comment"_u) | |
358 | { | |
359 | // do nothing | |
360 | } | |
361 | else if(name == "section-def-attrs"_u) | |
362 | { | |
363 | // do nothing | |
364 | } | |
365 | 349 | else |
366 | 350 | { |
367 | 351 | unexpectedTag(); |
372 | 356 | void |
373 | 357 | TRXReader::procDefCats() |
374 | 358 | { |
375 | while(type == XML_READER_TYPE_END_ELEMENT || !(name == "transfer"_u || name == "interchunk"_u || name == "postchunk"_u)) | |
376 | { | |
377 | step(); | |
378 | if(name != "#text"_u && name != "transfer"_u && name != "interchunk"_u && | |
379 | name != "postchunk"_u && name != "section-def-cats"_u && name != "#comment"_u) | |
380 | { | |
381 | unexpectedTag(); | |
382 | } | |
383 | } | |
384 | ||
385 | 359 | UString catname; |
386 | 360 | |
387 | 361 | while(type != XML_READER_TYPE_END_ELEMENT || |
388 | 362 | name != "section-def-cats"_u) |
389 | 363 | { |
390 | step(); | |
364 | stepToNextTag(); | |
391 | 365 | if(name == "cat-item"_u) |
392 | 366 | { |
393 | 367 | if(type != XML_READER_TYPE_END_ELEMENT) |
412 | 386 | { |
413 | 387 | catname.clear(); |
414 | 388 | } |
415 | } | |
416 | else if(name == "#text"_u) | |
417 | { | |
418 | // do nothing | |
419 | } | |
420 | else if(name == "#comment"_u) | |
421 | { | |
422 | // do nothing | |
423 | 389 | } |
424 | 390 | else if(name == "section-def-cats"_u) |
425 | 391 | { |
438 | 404 | while(type != XML_READER_TYPE_END_ELEMENT || |
439 | 405 | name != "section-def-vars"_u) |
440 | 406 | { |
441 | step(); | |
407 | stepToNextTag(); | |
442 | 408 | if(name == "def-var"_u) |
443 | 409 | { |
444 | 410 | if(type != XML_READER_TYPE_END_ELEMENT) |
445 | 411 | { |
446 | 412 | createVar(attrib("n"_u), attrib("v"_u)); |
447 | 413 | } |
448 | } | |
449 | else if(name == "#text"_u) | |
450 | { | |
451 | // do nothing | |
452 | } | |
453 | else if(name == "#comment"_u) | |
454 | { | |
455 | // do nothing | |
456 | 414 | } |
457 | 415 | else if(name == "section-def-vars"_u) |
458 | 416 | { |
473 | 431 | while(type != XML_READER_TYPE_END_ELEMENT || |
474 | 432 | name != "section-def-lists"_u) |
475 | 433 | { |
476 | step(); | |
434 | stepToNextTag(); | |
477 | 435 | if(name == "list-item"_u) |
478 | 436 | { |
479 | 437 | if(type != XML_READER_TYPE_END_ELEMENT) |
491 | 449 | { |
492 | 450 | listname.clear(); |
493 | 451 | } |
494 | } | |
495 | else if(name == "#text"_u) | |
496 | { | |
497 | // do nothing | |
498 | } | |
499 | else if(name == "#comment"_u) | |
500 | { | |
501 | // do nothing | |
502 | 452 | } |
503 | 453 | else if(name == "section-def-lists"_u) |
504 | 454 | { |
518 | 468 | while(type != XML_READER_TYPE_END_ELEMENT || |
519 | 469 | name != "section-def-macros"_u) |
520 | 470 | { |
521 | step(); | |
471 | stepToNextTag(); | |
522 | 472 | if(name == "def-macro"_u) |
523 | 473 | { |
524 | 474 | if(type != XML_READER_TYPE_END_ELEMENT) |
48 | 48 | void procDefMacros(); |
49 | 49 | void procRules(); |
50 | 50 | |
51 | void checkClip(); | |
52 | ||
51 | 53 | void insertCatItem(UString const &name, UString const &lemma, |
52 | 54 | UString const &tags); |
53 | 55 | void createVar(UString const &name, UString const &initial_value); |
39 | 39 | #define fread_unlocked fread |
40 | 40 | #endif |
41 | 41 | |
42 | #ifdef _WIN32 | |
43 | #include <utf8_fwrap.h> | |
44 | 42 | #endif |
45 | ||
46 | #endif |
29 | 29 | #include <sys/types.h> |
30 | 30 | |
31 | 31 | #ifdef HAVE_CONFIG_H |
32 | #include "config.h" | |
32 | #include "apertium_config.h" | |
33 | 33 | #endif |
34 | 34 | |
35 | 35 | /* The `emacs' switch turns on certain matching commands |
4 | 4 | |
5 | 5 | #define fileno _fileno |
6 | 6 | |
7 | #if defined(_WIN32) && defined(isatty) | |
8 | #undef isatty | |
7 | #if defined(_WIN32) | |
8 | #include <io.h> | |
9 | 9 | #define isatty _isatty |
10 | 10 | #endif |
11 | 11 |
93 | 93 | } |
94 | 94 | |
95 | 95 | void |
96 | XMLReader::warnAtLoc() | |
97 | { | |
98 | cerr << "Warning at line " << xmlTextReaderGetParserLineNumber(reader) | |
99 | << ", column " << xmlTextReaderGetParserColumnNumber(reader) << ": "; | |
100 | } | |
101 | ||
102 | void | |
96 | 103 | XMLReader::unexpectedTag() |
97 | 104 | { |
98 | 105 | parseError("unexpected '<"_u + name + ">' tag"_u); |
36 | 36 | string attrib_str(const UString& name); |
37 | 37 | void parseError(UString const &message); |
38 | 38 | void parseError(const string& message); |
39 | void warnAtLoc(); | |
39 | 40 | void unexpectedTag(); |
40 | 41 | void stepToTag(); |
41 | 42 | void stepPastSelfClosingTag(UString const &tag); |
71 | 71 | ]) |
72 | 72 | |
73 | 73 | |
74 | # AP_SHARED() | |
75 | # | |
76 | # Check that APERTIUM_SHARED exists, and if so sets AP_SHSRC. | |
77 | # | |
78 | # Also sets up --with-shared if the user wants to use the source | |
79 | # code checkout instead of installed files. | |
80 | AC_DEFUN([AP_SHARED], | |
81 | [ | |
82 | AC_ARG_VAR([AP_SHSRC], [Path to apertium-shared sources]) | |
83 | AC_ARG_WITH([shared], | |
84 | [dnl | |
85 | AS_HELP_STRING([--with-shared],dnl | |
86 | [Uninstalled source directory for apertium-shared, defines AP_SHSRC for Makefile, otherwise these are set to paths of installed files.]) | |
87 | ], | |
88 | [ | |
89 | # I actually don't know what to do if the user decides to build | |
90 | # in a directory other than src/, but it looks like AP_CHECK_LING | |
91 | # doesn't handle that case either. | |
92 | AP_SHSRC="$withval/src" | |
93 | echo "Using apertium-shared from $withval" | |
94 | ], | |
95 | [ | |
96 | if test x"$1" = x; then | |
97 | PKG_CHECK_MODULES([APERTIUM_SHARED], [apertium-shared]) | |
98 | else | |
99 | PKG_CHECK_MODULES([APERTIUM_SHARED], [apertium-shared >= $1]) | |
100 | fi | |
101 | AP_SHSRC=`pkg-config --variable=srcdir apertium-shared` | |
102 | ]) | |
103 | if test -z "$AP_SHSRC" || ! test -d "$AP_SHSRC"; then | |
104 | AC_MSG_ERROR([Could not find sources dir for apertium-shared (AP_SHSRC="$AP_SHSRC")]) | |
105 | fi | |
106 | ]) | |
107 | ||
74 | 108 | # AP_MKINCLUDE() |
75 | 109 | # |
76 | 110 | # Creates the file ap_include.am and sets the variable ap_include to |
113 | 147 | |
114 | 148 | apertium_modesdir=\$(prefix)/share/apertium/modes/ |
115 | 149 | install-modes: modes.xml |
116 | apertium-gen-modes -f \@S|@< \$(prefix)/share/apertium/\$(BASENAME) | |
150 | apertium-gen-modes -f -l \@S|@< \$(prefix)/share/apertium/\$(BASENAME) | |
117 | 151 | \$(MKDIR_P) \$(DESTDIR)\$(apertium_modesdir) |
118 | 152 | modes=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' \@S|@< | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\ |
119 | 153 | if test -n "\$\$modes"; then \\ |
124 | 158 | uninstall-modes: modes.xml |
125 | 159 | files=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' \@S|@< | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\ |
126 | 160 | if test -n "\$\$files"; then \\ |
127 | dir=\$(DESTDIR)\$(apertium_modesdir); \$(am__uninstall_files_from_dir) | |
161 | dir=\$(DESTDIR)\$(apertium_modesdir); \\ | |
162 | \$(am__uninstall_files_from_dir); \\ | |
128 | 163 | fi |
129 | 164 | |
130 | 165 | .deps/.d: |
5 | 5 | Name: apertium |
6 | 6 | Description: rule-based machine translation system |
7 | 7 | Version: @VERSION@ |
8 | Libs: -L${libdir} -l@PACKAGE_NAME@@VERSION_MAJOR@ | |
9 | Cflags: -I${includedir}/@PACKAGE_NAME@-@VERSION_API@ -I${libdir}/@PACKAGE_NAME@-@VERSION_API@/include | |
8 | Libs: -L${libdir} -l@PACKAGE_NAME@ |
1 | 1 | |
2 | 2 | m4_define([PKG_VERSION_MAJOR], [3]) |
3 | 3 | m4_define([PKG_VERSION_MINOR], [8]) |
4 | m4_define([PKG_VERSION_PATCH], [1]) | |
4 | m4_define([PKG_VERSION_PATCH], [2]) | |
5 | ||
6 | # Bump if the ABI (not API) changed in a backwards-incompatible manner | |
7 | m4_define([PKG_VERSION_ABI], [3]) | |
5 | 8 | |
6 | 9 | AC_INIT([apertium], [PKG_VERSION_MAJOR.PKG_VERSION_MINOR.PKG_VERSION_PATCH], [apertium-stuff@lists.sourceforge.net], [apertium], [https://wiki.apertium.org/]) |
7 | 10 | AC_CONFIG_HEADER([apertium/apertium_config.h]) |
8 | 11 | |
9 | 12 | VERSION=$PACKAGE_VERSION |
10 | VERSION_MAJOR=PKG_VERSION_MAJOR | |
11 | VERSION_API=PKG_VERSION_MAJOR.PKG_VERSION_MINOR | |
12 | SOVERSION=1:0:0 | |
13 | VERSION_ABI=PKG_VERSION_ABI | |
13 | 14 | |
14 | 15 | AC_SUBST(PACKAGE_NAME) |
15 | 16 | AC_SUBST(PACKAGE_VERSION) |
16 | AC_SUBST(VERSION_MAJOR) | |
17 | AC_SUBST(VERSION_API) | |
18 | AC_SUBST(SOVERSION) | |
17 | AC_SUBST(VERSION_ABI) | |
19 | 18 | |
20 | 19 | AM_INIT_AUTOMAKE |
21 | 20 | AC_CONFIG_MACRO_DIR([m4]) |
79 | 78 | AC_HEADER_STDC |
80 | 79 | AC_CHECK_HEADERS([stdlib.h string.h unistd.h stddef.h filesystem string_view]) |
81 | 80 | AC_CHECK_LIB([stdc++fs], [_ZNSt12experimental10filesystem2v112current_pathEv]) |
82 | AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])]) | |
81 | AC_CHECK_HEADER([utf8cpp/utf8.h], [CPPFLAGS="-I/usr/include/utf8cpp/ $CPPFLAGS"], [ | |
82 | AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])]) | |
83 | ]) | |
83 | 84 | |
84 | 85 | AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt, getopt_long]) |
85 | 86 | AC_CHECK_FUNCS([setlocale strdup getopt snprintf]) |
89 | 90 | #AS_IF([test x$version_type = xwindows], [AC_DEFINE(HAVE_GETOPT_LONG,0)], []) |
90 | 91 | |
91 | 92 | # Checks for highest supported C++ standard |
92 | AX_CHECK_COMPILE_FLAG([-std=c++20], [CXXFLAGS="$CXXFLAGS -std=c++20"], [ | |
93 | AX_CHECK_COMPILE_FLAG([-std=c++2a], [CXXFLAGS="$CXXFLAGS -std=c++2a"], [ | |
94 | AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXFLAGS="$CXXFLAGS -std=c++17"], [ | |
95 | AX_CHECK_COMPILE_FLAG([-std=c++1z], [CXXFLAGS="$CXXFLAGS -std=c++1z"], [ | |
96 | AX_CHECK_COMPILE_FLAG([-std=c++14], [CXXFLAGS="$CXXFLAGS -std=c++14"], [ | |
97 | AX_CHECK_COMPILE_FLAG([-std=c++1y], [CXXFLAGS="$CXXFLAGS -std=c++1y"], []) | |
98 | ]) | |
99 | ]) | |
100 | ]) | |
101 | ]) | |
93 | for version in 23 2b 20 2a 17 1z 14 1y; do | |
94 | version_flag="-std=c++${version}" | |
95 | AX_CHECK_COMPILE_FLAG([${version_flag}], [break], [version_flag=none]) | |
96 | done | |
97 | AS_IF([test "$version_flag" == none], [ | |
98 | AC_MSG_ERROR([Could not enable at least C++1y (C++14) - upgrade your compiler]) | |
102 | 99 | ]) |
100 | CXXFLAGS="$CXXFLAGS ${version_flag}" | |
103 | 101 | |
104 | 102 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ |
105 | 103 | #include <cstddef> |
18 | 18 | swig_opts=['-c++', '-I..', '-I@top_srcdir@/apertium', '-Wall'], |
19 | 19 | include_dirs=['@top_srcdir@', '@top_srcdir@/apertium'] + '@LIBXML2_CFLAGS@'.replace('-I', '').split(), |
20 | 20 | library_dirs=['@top_srcdir@/apertium/.libs'], |
21 | libraries = ['apertium@VERSION_MAJOR@'], | |
21 | libraries = ['apertium'], | |
22 | 22 | extra_compile_args=compile_args, |
23 | 23 | extra_link_args=link_args, |
24 | 24 | ) |
103 | 103 | </xsl:template> |
104 | 104 | |
105 | 105 | <xsl:template match="lrx|rules|rule|def-seqs|def-seq|match|select|remove|or|repeat|seq|metalrx|def-macros|def-macro|macro|section-def-cats|def-cat|cat-item|section-def-attrs|def-attr|attr-item"> |
106 | <xsl:choose> | |
107 | <xsl:when test="./@v=$lang or count(./@v)=0"> | |
108 | <xsl:copy> | |
109 | <xsl:copy-of select="@*[not(local-name()='v')]"/> | |
110 | <xsl:for-each select="./node()"> | |
111 | <xsl:apply-templates select="."/> | |
112 | </xsl:for-each> | |
113 | </xsl:copy> | |
114 | </xsl:when> | |
115 | <xsl:otherwise/> | |
116 | </xsl:choose> | |
117 | </xsl:template> | |
118 | ||
119 | <xsl:template match="with-param|param"> | |
106 | 120 | <xsl:copy> |
107 | 121 | <xsl:copy-of select="@*"/> |
108 | <xsl:for-each select="./node()"> | |
109 | <xsl:choose> | |
110 | <xsl:when test="./@v=$lang or count(./@v)=0"> | |
111 | <xsl:apply-templates select="."/> | |
112 | </xsl:when> | |
113 | <xsl:otherwise/> | |
114 | </xsl:choose> | |
115 | </xsl:for-each> | |
122 | <xsl:apply-templates select="node()"/> | |
116 | 123 | </xsl:copy> |
117 | 124 | </xsl:template> |
118 | 125 |
0 | <?xml version="1.0" encoding="utf-8"?> | |
1 | <transfer default="chunk"> | |
2 | <section-def-cats> | |
3 | <def-cat n="vblexpassfv"> | |
4 | <cat-item tags="vblex.pres.pasv"/> | |
5 | <cat-item tags="vblex.pret.pasv"/> | |
6 | <cat-item tags="vblex.pstv.pres"/> | |
7 | </def-cat> | |
8 | <def-cat n="prn"> | |
9 | <cat-item tags="prn.*"/> | |
10 | </def-cat> | |
11 | <def-cat n="nacr"> | |
12 | <cat-item tags="n.acr"/> | |
13 | </def-cat> | |
14 | <def-cat n="det"> | |
15 | <cat-item tags="det"/> | |
16 | </def-cat> | |
17 | <def-cat n="adj"> | |
18 | <cat-item tags="adj"/> | |
19 | </def-cat> | |
20 | </section-def-cats> | |
21 | <section-def-attrs> | |
22 | <def-attr n="a_nom"> | |
23 | <attr-item tags="n"/> | |
24 | <attr-item tags="np"/> | |
25 | </def-attr> | |
26 | </section-def-attrs> | |
27 | ||
28 | <section-def-vars> | |
29 | <def-var n="caseFirstWord"/> | |
30 | <def-var n="tmp"/> | |
31 | </section-def-vars> | |
32 | ||
33 | <section-def-lists> | |
34 | <def-list n="foo"> | |
35 | <list-item v="foo"/> | |
36 | </def-list> | |
37 | </section-def-lists> | |
38 | ||
39 | <section-def-macros> | |
40 | <def-macro n="set_caseFirstWord" npar="1" | |
41 | c="Downcases the argument, unless np"> | |
42 | <!-- TODO: Put a marker for acronyms where we don't want to copy case from them; then drop the tag --> | |
43 | <choose><when> | |
44 | <test><not><equal><clip pos="1" side="tl" part="a_nom"/><lit-tag v="np"/></equal></not></test> | |
45 | <modify-case><clip pos="1" side="tl" part="lemh"/><lit v="aa"/></modify-case> | |
46 | </when></choose> | |
47 | <choose> | |
48 | <when c="Proper noun, not first word in sentence: let first word have lowercase"> | |
49 | <test><and> | |
50 | <not><equal><var n="caseFirstWord"/><lit v=""/></equal></not> | |
51 | <begins-with><clip pos="1" side="tl" part="a_nom"/><lit-tag v="np"/></begins-with> | |
52 | </and></test> | |
53 | <let><var n="caseFirstWord"/><lit v="aa"/></let> | |
54 | </when> | |
55 | <otherwise> | |
56 | <let><var n="caseFirstWord"/><get-case-from pos="1"><lit v="aa"/></get-case-from></let> | |
57 | </otherwise> | |
58 | </choose> | |
59 | ||
60 | </def-macro> | |
61 | </section-def-macros> | |
62 | ||
63 | <section-rules> | |
64 | <rule> | |
65 | <pattern> | |
66 | <pattern-item n="prn"/> | |
67 | </pattern> | |
68 | <action> | |
69 | <let> | |
70 | <clip pos="1" side="tl" part="IAMSTUCKINANXMLFILEPLEASEHELP"/> | |
71 | <lit v=""/> | |
72 | </let> | |
73 | </action> | |
74 | </rule> | |
75 | </section-rules> | |
76 | </transfer> |
0 | <?xml version="1.0" encoding="utf-8"?> | |
1 | <transfer default="chunk"> | |
2 | <section-def-cats> | |
3 | <def-cat n="vblexpassfv"> | |
4 | <cat-item tags="vblex.pres.pasv"/> | |
5 | <cat-item tags="vblex.pret.pasv"/> | |
6 | <cat-item tags="vblex.pstv.pres"/> | |
7 | </def-cat> | |
8 | <def-cat n="prn"> | |
9 | <cat-item tags="prn.*"/> | |
10 | </def-cat> | |
11 | <def-cat n="nacr"> | |
12 | <cat-item tags="n.acr"/> | |
13 | </def-cat> | |
14 | <def-cat n="det"> | |
15 | <cat-item tags="det"/> | |
16 | </def-cat> | |
17 | <def-cat n="adj"> | |
18 | <cat-item tags="adj"/> | |
19 | </def-cat> | |
20 | </section-def-cats> | |
21 | <section-def-attrs> | |
22 | <def-attr n="a_nom"> | |
23 | <attr-item tags="n"/> | |
24 | <attr-item tags="np"/> | |
25 | </def-attr> | |
26 | </section-def-attrs> | |
27 | ||
28 | <section-def-vars> | |
29 | <def-var n="caseFirstWord"/> | |
30 | <def-var n="tmp"/> | |
31 | </section-def-vars> | |
32 | ||
33 | <section-def-lists> | |
34 | <def-list n="foo"> | |
35 | <list-item v="foo"/> | |
36 | </def-list> | |
37 | </section-def-lists> | |
38 | ||
39 | <section-def-macros> | |
40 | <def-macro n="set_caseFirstWord" npar="1" | |
41 | c="Downcases the argument, unless np"> | |
42 | <!-- TODO: Put a marker for acronyms where we don't want to copy case from them; then drop the tag --> | |
43 | <choose><when> | |
44 | <test><not><equal><clip pos="1" side="tl" part="a_nom"/><lit-tag v="np"/></equal></not></test> | |
45 | <modify-case><clip pos="1" side="tl" part="lemh"/><lit v="aa"/></modify-case> | |
46 | </when></choose> | |
47 | <choose> | |
48 | <when c="Proper noun, not first word in sentence: let first word have lowercase"> | |
49 | <test><and> | |
50 | <not><equal><var n="caseFirstWord"/><lit v=""/></equal></not> | |
51 | <begins-with><clip pos="1" side="tl" part="a_nom"/><lit-tag v="np"/></begins-with> | |
52 | </and></test> | |
53 | <let><var n="caseFirstWord"/><lit v="aa"/></let> | |
54 | </when> | |
55 | <otherwise> | |
56 | <let><var n="caseFirstWord"/><get-case-from pos="1"><lit v="aa"/></get-case-from></let> | |
57 | </otherwise> | |
58 | </choose> | |
59 | ||
60 | </def-macro> | |
61 | </section-def-macros> | |
62 | ||
63 | <section-rules> | |
64 | </section-rules> | |
65 | </transfer> |
0 | <?xml version="1.0" encoding="UTF-8"?> | |
1 | <!-- -*- nxml -*- --> | |
2 | <interchunk> | |
3 | <section-def-cats> | |
4 | <def-cat n="SN"> | |
5 | <cat-item tags="SN"/> | |
6 | <cat-item tags="SN.*"/> | |
7 | </def-cat> | |
8 | <def-cat n="prn"> | |
9 | <cat-item tags="prn"/> | |
10 | <cat-item tags="prn.*"/> | |
11 | </def-cat> | |
12 | <def-cat n="prn2"> | |
13 | <cat-item tags="prn2"/> | |
14 | <cat-item tags="prn2.*"/> | |
15 | </def-cat> | |
16 | <def-cat n="test1"> | |
17 | <cat-item tags="test1"/> | |
18 | <cat-item tags="test1.*"/> | |
19 | </def-cat> | |
20 | <def-cat n="test2"> | |
21 | <cat-item tags="test2"/> | |
22 | <cat-item tags="test2.*"/> | |
23 | </def-cat> | |
24 | <def-cat n="test3"> | |
25 | <cat-item tags="test3"/> | |
26 | <cat-item tags="test3.*"/> | |
27 | </def-cat> | |
28 | <def-cat n="test4"> | |
29 | <cat-item tags="test4"/> | |
30 | <cat-item tags="test4.*"/> | |
31 | </def-cat> | |
32 | <def-cat n="test5"> | |
33 | <cat-item tags="test5"/> | |
34 | <cat-item tags="test5.*"/> | |
35 | </def-cat> | |
36 | <def-cat n="SENT"> | |
37 | <cat-item tags="SENT"/> | |
38 | </def-cat> | |
39 | </section-def-cats> | |
40 | ||
41 | <section-def-attrs> | |
42 | <def-attr n="a_nbr"> | |
43 | <attr-item tags="sg"/> | |
44 | <attr-item tags="pl"/> | |
45 | <attr-item tags="sp"/> | |
46 | <attr-item tags="ND"/> | |
47 | </def-attr> | |
48 | <def-attr n="a_cas"> | |
49 | <attr-item tags="nom"/> | |
50 | <attr-item tags="acc"/> | |
51 | <attr-item tags="dat"/> | |
52 | <attr-item tags="gen"/> | |
53 | <attr-item tags="ins"/> | |
54 | <attr-item tags="loc"/> | |
55 | <attr-item tags="abl"/> | |
56 | </def-attr> | |
57 | </section-def-attrs> | |
58 | ||
59 | <section-rules> | |
60 | </section-rules> | |
61 | </interchunk> |
0 | <?xml version="1.0" encoding="UTF-8"?> | |
1 | <!-- -*- nxml -*- --> | |
2 | <postchunk> | |
3 | <section-def-cats> | |
4 | <def-cat n="n"> | |
5 | <cat-item name="n"/> | |
6 | </def-cat> | |
7 | <def-cat n="hasmacro"> | |
8 | <cat-item name="hasmacro"/> | |
9 | </def-cat> | |
10 | <def-cat n="thing"> | |
11 | <cat-item name="thing"/> | |
12 | </def-cat> | |
13 | <def-cat n="nomacro"> | |
14 | <cat-item name="nomacro"/> | |
15 | </def-cat> | |
16 | <def-cat n="n_n"> | |
17 | <cat-item name="n_n"/> | |
18 | </def-cat> | |
19 | <def-cat n="n_n2"> | |
20 | <cat-item name="n_n2"/> | |
21 | </def-cat> | |
22 | <def-cat n="n_n3"> | |
23 | <cat-item name="n_n3"/> | |
24 | </def-cat> | |
25 | <def-cat n="n_n4"> | |
26 | <cat-item name="n_n4"/> | |
27 | </def-cat> | |
28 | <def-cat n="thing_wb"> | |
29 | <cat-item name="thing_wb"/> | |
30 | </def-cat> | |
31 | </section-def-cats> | |
32 | ||
33 | ||
34 | <section-def-attrs> | |
35 | <def-attr n="gen"> | |
36 | <attr-item tags="m"/> | |
37 | <attr-item tags="mf"/> | |
38 | <attr-item tags="nt"/> | |
39 | <attr-item tags="f"/> | |
40 | <attr-item tags="GD"/> | |
41 | </def-attr> | |
42 | <def-attr n="art"> | |
43 | <attr-item tags="def"/> | |
44 | <attr-item tags="ind"/> | |
45 | </def-attr> | |
46 | <def-attr n="a_verb"> | |
47 | <attr-item tags="nomacro"/> | |
48 | <attr-item tags="nomacro.pstv"/> | |
49 | </def-attr> | |
50 | <def-attr n="temps"> | |
51 | <attr-item tags="inf"/> | |
52 | <attr-item tags="imp"/> | |
53 | <attr-item tags="pres"/> | |
54 | <attr-item tags="pret"/> | |
55 | <attr-item tags="pp"/> | |
56 | </def-attr> | |
57 | <def-attr n="voice"> | |
58 | <attr-item tags="pasv"/> | |
59 | </def-attr> | |
60 | <def-attr n="nbr"> | |
61 | <attr-item tags="sg"/> | |
62 | <attr-item tags="pl"/> | |
63 | <attr-item tags="sp"/> | |
64 | <attr-item tags="ND"/> | |
65 | </def-attr> | |
66 | </section-def-attrs> | |
67 | ||
68 | <section-def-vars> | |
69 | <def-var n="foo"/> | |
70 | </section-def-vars> | |
71 | ||
72 | ||
73 | <section-def-macros> | |
74 | <def-macro n="out_adj" npar="1"> | |
75 | <choose><when> | |
76 | <test><and> | |
77 | <equal><clip pos="1" part="nbr"/><lit-tag v="sg"/></equal> | |
78 | <equal><clip pos="1" part="art"/><lit v=""/></equal> | |
79 | </and></test> | |
80 | <let><clip pos="1" part="nbr"/><concat><clip pos="1" part="nbr"/><lit-tag v="def"/></concat></let> | |
81 | <let><clip pos="1" part="gen"/><concat><lit v=""/></concat></let> | |
82 | </when></choose> | |
83 | ||
84 | <out><lu><clip pos="1" part="whole"/></lu></out> | |
85 | </def-macro> | |
86 | ||
87 | <def-macro n="default_to_chunk_number" npar="2" | |
88 | c="arg1: lu, arg2: chunk"> | |
89 | <choose><when> | |
90 | <test><and> | |
91 | <not><equal><clip pos="2" part="nbr"/><lit v=""/></equal></not> | |
92 | <equal><clip pos="1" part="nbr"/><lit-tag v="ND"/></equal> | |
93 | </and></test> | |
94 | <let><clip pos="1" part="nbr"/><clip pos="2" part="nbr"/></let> | |
95 | </when></choose> | |
96 | </def-macro> | |
97 | </section-def-macros> | |
98 | ||
99 | ||
100 | <section-rules> | |
101 | </section-rules> | |
102 | </postchunk> |
105 | 105 | "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test3<test3>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test2<test2>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #superblankrule4 -> Output rule has one <b/>, print one blank, then flush all after rule output |
106 | 106 | "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank5] ^test3<test3>{^wordb# xyz<n><ND><f>$}$ ;[blank6]; ^test2<test2>{^wordc# xyz<n><ND><f>$}$ [blank7];;", #Multiple matching rules -> superblankrule1 & superblankrule4 |
107 | 107 | "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test2x<test2z>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank5];"] #Rule followed by unknown |
108 | ||
108 | ||
109 | 109 | expectedOutputs = [ "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;", |
110 | 110 | "[blank1];; ^test1<test1x>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3x>{^wordc# xyz<n><ND><f>$}$ [blank4];;", |
111 | 111 | "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test2<test2>{^worda<n><ND><m>$}$ ;[blank3]; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ^test2<test2>{^worda<n><ND><m>$}$ ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;", |
112 | 112 | "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$^test3<test3>{^worda<n><ND><m>$}$^test1<test1>{^wordc# xyz<n><ND><f>$}$ ;[blank2] ;[blank3]; [blank4];;", |
113 | 113 | "[blank1];; ^test3<test3>{^wordb# xyz<n><ND><f>$}$^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordc# xyz<n><ND><f>$}$ ;[blank3]; [blank4];;", |
114 | 114 | "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test3<test3>{^wordb# xyz<n><ND><f>$}$^test1<test1>{^worda<n><ND><m>$}$ ;[blank5] ^test2<test2>{^wordc# xyz<n><ND><f>$}$ ;[blank6]; [blank7];;", |
115 | "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test2x<test2z>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank5];"] | |
115 | "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test2x<test2z>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank5];"] | |
116 | 116 | |
117 | 117 | |
118 | 118 | class BincompatTest(SimpleInterchunkTest): |
120 | 120 | |
121 | 121 | def compile(self): |
122 | 122 | pass |
123 | ||
124 | class EmptyTransferTest(InterchunkTest): | |
125 | t2xdata = "data/empty.t1x" | |
126 | inputs = ["^default<default>{^ho<prn><f>$}$"] | |
127 | expectedOutputs = ["^default<default>{^ho<prn><f>$}$"] |
122 | 122 | "[blank1];; ^n_n4<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #superblank rule 3 -> Output rule has one <b/>, print one blank, then flush all after rule output |
123 | 123 | "[blank1];; ^n_n<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^n_n4<SN><sg>{^worda<n><ND><m>$ ;[blank4] ^wordb# xyz<n><ND><f>$ ;[blank5]; ^wordc<n>$}$ ;[blank6]; ", #Multiple matching rules |
124 | 124 | "[blank1];; ^n_n2<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ^n_k<SN><sgn>{^worda<nn><NDn><mn>$}$ ;[blank5]" ] #Matching rule followed by unknown word |
125 | ||
125 | ||
126 | 126 | expectedOutputs = [ "[blank1];; ^wordb# xyz<n><ND><f>$ ;[blank2] [blank2.1]; ^worda<n><ND><m>$ ^worda+wordb# xyz$ ;[blank3]; ", |
127 | 127 | "[blank1];; ^worda<nn><NDn><mn>$ ;[blank2] ^wordb# xyz<nn><NDn><fn>$ ;[blank3]; ", |
128 | 128 | "[blank1];; ^wordb# xyz<n><ND><f>$ ;[blank2] ^worda<n><ND><m>$ ;[blank3]; ^worda+wordb# xyz$ ^wordc<n>$ ;[blank4]; ", |
137 | 137 | |
138 | 138 | def compile(self): |
139 | 139 | pass |
140 | ||
141 | class EmptyTransferTest(PostchunkTest): | |
142 | t2xdata = "data/empty.t1x" | |
143 | inputs = ["^default<default>{^ho<prn><f>$}$"] | |
144 | expectedOutputs = ["^ho<prn><f>$"] |
0 | 0 | # This is expected to be built before the parent dir, which depends on |
1 | 1 | # tagger/test-find-similar-ambiguity-class for run_tests.py |
2 | 2 | |
3 | library_includedir = $(includedir)/$(PACKAGE_NAME)-$(VERSION_API)/$(PACKAGE_NAME) | |
3 | library_includedir = $(includedir)/$(PACKAGE_NAME) | |
4 | 4 | |
5 | 5 | check_PROGRAMS = test-find-similar-ambiguity-class |
6 | 6 | check_SCRIPTS = $(GENERATEDSCRIPTS) |
8 | 8 | AM_CPPFLAGS = -I$(top_srcdir) |
9 | 9 | |
10 | 10 | apertiumdir = $(prefix)/share/apertium |
11 | apertiuminclude = $(prefix)/include/apertium-$(VERSION_API) | |
11 | apertiuminclude = $(prefix)/include | |
12 | 12 | apertiumlib = $(prefix)/lib |
13 | 13 | apertiumsysconf = $(prefix)/etc/apertium |
14 | 14 | |
15 | 15 | test_find_similar_ambiguity_class_SOURCES = test_find_similar_ambiguity_classes.cc |
16 | test_find_similar_ambiguity_class_LDADD = -L$(top_srcdir)/$(PACKAGE_NAME)/.libs/ -l$(PACKAGE_NAME)$(VERSION_MAJOR) | |
16 | test_find_similar_ambiguity_class_LDADD = -L$(top_srcdir)/$(PACKAGE_NAME)/.libs/ -l$(PACKAGE_NAME) |
21 | 21 | inputs = [""] |
22 | 22 | expectedOutputs = [""] |
23 | 23 | expectedRetCodeFail = False |
24 | expectedCompRetCodeFail = False | |
24 | 25 | |
25 | 26 | def alarmHandler(self, signum, frame): |
26 | 27 | raise Alarm |
53 | 54 | return b"".join(output).decode('utf-8') |
54 | 55 | |
55 | 56 | def compile(self): |
56 | compileCmd = ["../apertium/apertium-preprocess-transfer", | |
57 | self.t1xdata, | |
58 | self.bindata] | |
59 | self.assertEqual(call(compileCmd), | |
60 | 0) | |
57 | retCode = call(["../apertium/apertium-preprocess-transfer", | |
58 | self.t1xdata, | |
59 | self.bindata]) | |
60 | if self.expectedCompRetCodeFail: | |
61 | self.assertNotEqual(retCode, 0) | |
62 | else: | |
63 | self.assertEqual(retCode, 0) | |
64 | return retCode == 0 | |
61 | 65 | |
62 | 66 | def runTest(self): |
63 | self.compile() | |
67 | if not self.compile(): | |
68 | return | |
64 | 69 | try: |
65 | 70 | cmd = ["../apertium/apertium-transfer"] \ |
66 | 71 | + self.flags \ |
119 | 124 | |
120 | 125 | def compile(self): |
121 | 126 | pass |
127 | ||
128 | ||
129 | class EmptyTransferTest(TransferTest): | |
130 | t1xdata = "data/empty.t1x" | |
131 | inputs = ["^hun<prn><f>/ho<prn><f>$"] | |
132 | expectedOutputs = ["^default<default>{^ho<prn><f>$}$"] | |
133 | ||
134 | ||
135 | class BadAttrTest(TransferTest): | |
136 | t1xdata = "data/bad-attr.t1x" | |
137 | expectedCompRetCodeFail = True |