Imported Upstream version 0.9.1~r343
Tino Didriksen
7 years ago
0 | language: python | |
1 | ||
2 | python: | |
3 | - "3.2" | |
4 | - "3.3" | |
5 | - "3.4" | |
6 | - "3.5" | |
7 | - "3.5-dev" | |
8 | - "nightly" | |
9 | ||
10 | install: pip3 install tornado | |
11 | ||
12 | before_script: | |
13 | - wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash | |
14 | - sudo apt-get -f install apertium-all-dev | |
15 | - sudo apt-get -f install apertium-sme-nob apertium-es-en | |
16 | - svn co https://svn.code.sf.net/p/apertium/svn/languages/apertium-nno /tmp/languages/apertium-nno | |
17 | - ( cd /tmp/languages/apertium-nno && ./autogen.sh && make -j2 ) | |
18 | ||
19 | script: | |
20 | - NONPAIRS=/tmp/languages t/run-tests | |
21 | ||
22 | ||
23 | notifications: | |
24 | irc: | |
25 | channels: | |
26 | - secure: "jMRqT7lndd2vTXON34HHdNQ6v+DSOSUQZwKXsJytw9F55pPPS4z+76vTChHKdNcp4nV5nWhCarZR2X4bK7qWO5i25XIXsffFm4TK/XWDdZgLKHCS431mzcSVrYYASC0nn15HgGAl/H6dMksNThuV5gaItSPcKNXC9amBYLbTeSt/H+rxDBuPs+m7yy049EySTWjUoFacel7HDoTEXQyl82Ks5I0oJM+ErgS1VAe6RPiXIH+eBGbPT0YoVYUS1WoKSZYaLhLZ/jOLDnnTiFhTn/f8+uZrbAfLGWzkp9/NbyMCByQFH48HFzjudOD6BgxRumJaDFhImrKrDvPpMulXB1MLl+JD9mK+lioLaqUiUu+2wkBJ90bvOCNqhKLR94upev7Skg52n96Wc91EfiVxuwiLDJOzwFAbZ9VaziNI/Ld3y1qHO84spV7R7AWFhI34xGNkAOCJ1sZZWcH2rGFjsldLNPHeyhNBaZhxGdGdVvrbTm0jbR4G0iD54BLK1IPuJt/eaETWrAnE2XgDnJ3PE9JKtFOe5tpuhF/R9s8qFYGEVmG9SnSxqJax7K6XcQ6utla10qFX0mR5Ub8S+Ryu9fN5+g+U2fCrfJrH44RhDCz961SGcRYVaWDITtDTeAtBGP6G0jOkNv8yi21ha50y4uDxvXq1ETfoHLeYwzzCHmU=" | |
27 | on_failure: always | |
28 | on_success: never | |
29 | # The irc channel is encrypted for goavki/apertium-apy, so build notifications from forks won't show up on the IRC channel | |
30 | # Encrypt with: | |
31 | # $ gem install --user-install travis | |
32 | # $ ~/.gem/ruby/*/bin/travis encrypt -r goavki/apertium-apy 'chat.freenode.net#apertium' |
0 | See NEWS for a per-release summary, this is just the git log. | |
1 | ||
2 | 2016-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
3 | ||
4 | * tools/sanity-test-apy.py: test swe-nor | |
5 | ||
6 | 2016-05-31 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
7 | ||
8 | * : commit 72eb4dad68cb8d4a12f2d74a2395cafec3b7af96 Author: Kevin | |
9 | Brubeck Unhammer <unhammer@fsfe.org> Date: Tue May 31 15:37:53 | |
10 | 2016 +0200 | |
11 | ||
12 | 2016-05-27 Kevin Brubeck Unhammer <unhammer+dill@mm.st> | |
13 | ||
14 | * : Merge pull request #21 from Putti/variable Remove unused variable | |
15 | ||
16 | 2016-05-27 Joonas Kylmälä <j.kylmala@gmail.com> | |
17 | ||
18 | * modeSearch.py: Remove unused variable | |
19 | ||
20 | 2016-05-27 Joonas Kylmälä <j.kylmala@gmail.com> | |
21 | ||
22 | * modeSearch.py: Move mode logging to it's own function | |
23 | ||
24 | 2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
25 | ||
26 | * .travis.yml: travis: lint | |
27 | ||
28 | 2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
29 | ||
30 | * .travis.yml: travis: irc notify | |
31 | ||
32 | 2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
33 | ||
34 | * .gitignore: ignores | |
35 | ||
36 | 2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
37 | ||
38 | * .gitignore, servlet.py, systemd.py, tools/systemd/apy.service: | |
39 | systemd watchdog; only used if started by systemd and the .service file has e.g. WatchdogSec=10s | |
40 | ||
41 | 2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
42 | ||
43 | * servlet.py: don't try to close missingFreqsDb unless open | |
44 | ||
45 | 2016-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
46 | ||
47 | * translation_py32.py: rm arbitrary diff from translation.py | |
48 | ||
49 | 2016-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
50 | ||
51 | * translation_py32.py: merge 8f8ffda to py32 as well | |
52 | ||
53 | 2016-05-14 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
54 | ||
55 | * translation.py: allow space around '|' in .mode files really, we should just make all language pairs install modes.xml | |
56 | though | |
57 | ||
58 | 2016-04-29 Kevin Brubeck Unhammer <unhammer+dill@mm.st> | |
59 | ||
60 | * : Merge pull request #16 from wolfgangth/master Update apertiumlangs.sql | |
61 | ||
62 | 2016-04-29 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
63 | ||
64 | * missingdb.py: missingdb: close cursor each time as well | |
65 | ||
66 | 2016-04-29 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
67 | ||
68 | * missingdb.py, servlet.py, util.py: rewrote missingFreqs-handling | |
69 | to fix locking in its own class, all relevant variables kept there, only one | |
70 | function that commits, lock at the right spot | |
71 | ||
72 | 2016-04-03 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
73 | ||
74 | * servlet.py: getPairOrError callers expect None if error should close #14 | |
75 | ||
76 | 2016-03-01 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
77 | ||
78 | * tools/sanity-test-apy.py: dan→swe in tests | |
79 | ||
80 | 2016-02-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
81 | ||
82 | * ChangeLog, NEWS: NEWS with readable summaries; git2cl > ChangeLog should close #13 | |
83 | ||
84 | 2016-02-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
85 | ||
86 | * servlet.py: ANN: New APY release! New features include: - version number | |
87 | ||
88 | 2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
89 | ||
90 | * servlet.py: usecounts rather useless without uptime | |
91 | ||
92 | 2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
93 | ||
94 | * servlet.py: hopefully more readable, informative /stats | |
95 | ||
96 | 2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
97 | ||
98 | * servlet.py: send len(q), not q to logAfterTranslation the joy of dynamic typing | |
99 | ||
100 | 2016-02-03 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
101 | ||
102 | * : commit 57960f74a878d65cd1b500c79ac8f6da5b6a9c44 Author: Kevin | |
103 | Brubeck Unhammer <unhammer@fsfe.org> Date: Wed Feb 3 11:33:10 2016 | |
104 | +0100 | |
105 | ||
106 | 2016-02-02 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
107 | ||
108 | * : commit 0fe763d71eb00b6dd0c977c9813301d9fa621955 Author: Kevin | |
109 | Brubeck Unhammer <unhammer@fsfe.org> Date: Tue Feb 2 10:42:19 2016 | |
110 | +0100 | |
111 | ||
112 | 2016-02-01 Kevin Brubeck Unhammer <unhammer+dill@mm.st> | |
113 | ||
114 | * : Merge pull request #11 from danielmartinez/master Add option for unknown marks in translateDoc | |
115 | ||
116 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
117 | ||
118 | * servlet.py: no 'yield from' in py32 | |
119 | ||
120 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
121 | ||
122 | * : commit 77b2b1b1ff8923fef392d7bba0f53182a1a7cc09 Author: Kevin | |
123 | Brubeck Unhammer <unhammer@fsfe.org> Date: Mon Jan 18 11:03:14 | |
124 | 2016 +0100 | |
125 | ||
126 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
127 | ||
128 | * servlet.py: fix: /translatePage shouldn't block while fetching the | |
129 | page | |
130 | ||
131 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
132 | ||
133 | * servlet.py: let translatepagehandler use more of translatehandler | |
134 | ||
135 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
136 | ||
137 | * servlet.py, util.py: less redundant scalemt calls (get_status will | |
138 | DTRT if called after send_error) | |
139 | ||
140 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
141 | ||
142 | * t/run-tests: test for 400's | |
143 | ||
144 | 2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
145 | ||
146 | * t/run-tests: run_tests { run_test () } | |
147 | ||
148 | 2016-01-17 Sai Vineet <saivineet89@gmail.com> | |
149 | ||
150 | * : commit 5ffe1ce2a1fd5afe2825c31a6cb9357ff8b42ca3 Author: Sai | |
151 | Vineet <saivineet89@gmail.com> Date: Sun Jan 17 16:05:30 2016 | |
152 | +0530 | |
153 | ||
154 | 2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
155 | ||
156 | * servlet.py: warnings consistency | |
157 | ||
158 | 2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
159 | ||
160 | * translation.py: don't split if we're not going to use it | |
161 | ||
162 | 2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
163 | ||
164 | * tools/sanity-test-apy.py: arg-cat tests | |
165 | ||
166 | 2016-01-17 Sai Vineet <saivineet89@gmail.com> | |
167 | ||
168 | * servlet.py: Add more stats | |
169 | ||
170 | 2016-01-16 Kevin Brubeck Unhammer <unhammer+dill@mm.st> | |
171 | ||
172 | * : Merge pull request #9 from sabertooth-cat/master Add page translation capability :) (waiting a bit with the html-side | |
173 | of things) | |
174 | ||
175 | 2016-01-15 E <ethanchi@ethanchi.attlocal.net> | |
176 | ||
177 | * servlet.py, translation.py: Fix splitting error | |
178 | ||
179 | 2016-01-15 E <ethanchi@ethanchi.attlocal.net> | |
180 | ||
181 | * servlet.py, translation.py: Subclassed TranslateHandler | |
182 | ||
183 | 2016-01-15 E <ethanchi@ethanchi.attlocal.net> | |
184 | ||
185 | * servlet.py: Modified TranslatePageHandler to subclass | |
186 | TranslateHandler | |
187 | ||
188 | 2016-01-15 E <ethanchi@ethanchi.attlocal.net> | |
189 | ||
190 | * servlet.py, translation.py: Modified page translation feature to | |
191 | be asynchronous | |
192 | ||
193 | 2016-01-15 E <ethanchi@ethanchi.attlocal.net> | |
194 | ||
195 | * : Fix conflict in servlet.py | |
196 | ||
197 | 2016-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
198 | ||
199 | * servlet.py, translation.py, translation_py32.py: support both py32 | |
200 | and py35+ by splitting translation.py into two modules (try-except won't work, | |
201 | since py32 calls return-from-gen a SyntaxError on _reading_ the | |
202 | file; StopIteration deprected in py35) | |
203 | ||
204 | 2016-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
205 | ||
206 | * README.org: travis: fix link | |
207 | ||
208 | 2016-01-13 E <ethanchi@ethanchi.attlocal.net> | |
209 | ||
210 | * servlet.py, translation.py: Add page translation capability | |
211 | ||
212 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
213 | ||
214 | * t/run-tests: show logs on error; clear before run | |
215 | ||
216 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
217 | ||
218 | * .travis.yml: travis: try to stay in source dir | |
219 | ||
220 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
221 | ||
222 | * .travis.yml: travis: no --user | |
223 | ||
224 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
225 | ||
226 | * README, README.org: travis img | |
227 | ||
228 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
229 | ||
230 | * .travis.yml: travis: lang python, pip install tornado | |
231 | ||
232 | 2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
233 | ||
234 | * .travis.yml: travis | |
235 | ||
236 | 2016-01-12 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
237 | ||
238 | * tools/sanity-test-apy.py: test for ara-mlt_translit | |
239 | ||
240 | 2016-01-12 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
241 | ||
242 | * tools/sanity-test-apy.py: test for dan-nno | |
243 | ||
244 | 2016-01-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
245 | ||
246 | * translation.py: comment the return-from-generator, since it looks | |
247 | a bit weird | |
248 | ||
249 | 2016-01-05 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
250 | ||
251 | * translation.py: Made compatible with Python <=3.3 Squashed from commit dd129f7ce3fe996566b97c3d90b5480e45c1fb8d Author: | |
252 | jatinluthra14 <jatinluthra14@gmail.com> Date: Tue Jan 5 20:05:22 | |
253 | 2016 +0530 see http://stackoverflow.com/a/16780113/69663 | |
254 | ||
255 | 2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
256 | ||
257 | * t/README: how to get data needed for t/run-tests | |
258 | ||
259 | 2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
260 | ||
261 | * t/run-tests: better installed-check | |
262 | ||
263 | 2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
264 | ||
265 | * t/run-tests: allow overriding data paths before running and better errors if data is simply not installed | |
266 | ||
267 | 2015-12-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
268 | ||
269 | * tools/apertium-viewer.html: translates as you type, gci work by | |
270 | ethan a chi | |
271 | ||
272 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
273 | ||
274 | * tools/apertiumlangs.sql: eus→eu | |
275 | ||
276 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
277 | ||
278 | * tools/apertiumlangs.sql: more crh | |
279 | ||
280 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
281 | ||
282 | * tools/apertiumlangs.sql: a few more kaz fixes | |
283 | ||
284 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
285 | ||
286 | * tools/apertiumlangs.sql: agh, kazakh | |
287 | ||
288 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
289 | ||
290 | * tools/apertiumlangs.sql: more langs | |
291 | ||
292 | 2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
293 | ||
294 | * tools/apertiumlangs.sql, tools/turkic.sql, util.py: some updates, | |
295 | including crh name in some Turkic languages | |
296 | ||
297 | 2015-10-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
298 | ||
299 | * translation.py: flush lrx-proc | |
300 | ||
301 | 2015-10-04 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
302 | ||
303 | * translation.py: don't keep pipes open for ca-oc@aran pair Seems this one keeps getting clogged, presumably transfer isn't | |
304 | outputting some <b pos=N/> | |
305 | ||
306 | 2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
307 | ||
308 | * t/run-tests: allow passing port as first arg | |
309 | ||
310 | 2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
311 | ||
312 | * translation.py: use flushing even if hfst-proc (lrx-proc still | |
313 | TODO) | |
314 | ||
315 | 2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
316 | ||
317 | * t/run-tests: don't run tests if port taken | |
318 | ||
319 | 2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
320 | ||
321 | * t/run-tests: extremely simple regression tests | |
322 | ||
323 | 2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
324 | ||
325 | * servlet.py: -old debug output | |
326 | ||
327 | 2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
328 | ||
329 | * servlet.py, util.py: much simplified GenerateHandler as well using | |
330 | translateSimple +cleanup | |
331 | ||
332 | 2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
333 | ||
334 | * servlet.py: much simplified AnalyzeHandler using translateSimple | |
335 | ||
336 | 2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com> | |
337 | ||
338 | * tools/apertiumlangs.sql: some Turkic name defaults | |
339 | ||
340 | 2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com> | |
341 | ||
342 | * tools/apertiumlangs.sql: a few qaraqalpaq additions | |
343 | ||
344 | 2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com> | |
345 | ||
346 | * tools/apertiumlangs.sql: copy-pasta fix | |
347 | ||
348 | 2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com> | |
349 | ||
350 | * tools/apertiumlangs.sql: uzbek names for some Turkic languages | |
351 | ||
352 | 2015-08-20 Jonathan Washington <jonathan.north.washington@gmail.com> | |
353 | ||
354 | * tools/apertiumlangs.sql: some language names in Kyrgyz | |
355 | ||
356 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
357 | ||
358 | * translation.py: tornado3: use gen.Task wrapper for translateSimple | |
359 | as well | |
360 | ||
361 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
362 | ||
363 | * toro.py: add a license header for toro | |
364 | ||
365 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
366 | ||
367 | * translation.py: read_until requires a callback in tornado3, | |
368 | gen.Task wrapper turns callbacks into yieldables | |
369 | ||
370 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
371 | ||
372 | * servlet.py, translation.py: use toro if no tornado.locks | |
373 | ||
374 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
375 | ||
376 | * toro.py: s/Queue/queue for py3 | |
377 | ||
378 | 2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
379 | ||
380 | * toro.py: __init__.py from 524fbe5b8f of | |
381 | https://github.com/ajdavis/toro/ | |
382 | ||
383 | 2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
384 | ||
385 | * servlet.py: doc more | |
386 | ||
387 | 2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
388 | ||
389 | * servlet.py, translation.py: --restart-pipe-after it has had that | |
390 | many requests Pipelines to be shut down now put in a holding area until there are | |
391 | no more users of that object, so we can restart high-traffic | |
392 | pipelines | |
393 | ||
394 | 2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
395 | ||
396 | * servlet.py: --min-pipes-per-pair implemented e.g. to keep at least one pipeline per pair open | |
397 | ||
398 | 2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
399 | ||
400 | * servlet.py, translation.py: implement --max-pipes-per-pair and | |
401 | --max-users-per-pipe Pipeline is now an object, subclasses: Simple~ and Flushing~ locks | |
402 | and use-count initialised in Pipeline FlushingPipeline opens pipes | |
403 | on init and closes on del translate is a method of a Pipeline http server has a list of pipelines per pair, default max-len 1, but | |
404 | e.g. -i3 does help with high loads even on my 3-core test machine | |
405 | ||
406 | 2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
407 | ||
408 | * servlet.py, translation.py: use tornado 4.2 instead of toro so that was easy | |
409 | ||
410 | 2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
411 | ||
412 | * servlet.py: bump lastUsage _before_ translating, decrease chances | |
413 | of concurrent requests running shutdownPair on us --max-idle-secs is still a bit dangerous in the case where a long | |
414 | request takes more time to process than max_idle_secs, but that | |
415 | value should hopefully be larger than the time taken to translate | |
416 | the largest possible request … Or we can keep a semaphore for each pair counting how many requests | |
417 | are waiting for it, and only shutdown if none are (but should switch | |
418 | from toro to tornado.lock first) | |
419 | ||
420 | 2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
421 | ||
422 | * servlet.py: default to -j1 since we don't want to default to | |
423 | eating all your ram Use -j0 (or -j -- ) to run one http server per core, but note that | |
424 | for each http server you will have all available language pairs in | |
425 | memory (and each pair uses around 7 processes). | |
426 | ||
427 | 2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
428 | ||
429 | * translation.py: put the dot/space found when splitting long | |
430 | strings at end of prev (not beginning of next) string | |
431 | ||
432 | 2015-08-01 Xavi Ivars <xavi.ivars@gmail.com> | |
433 | ||
434 | * translation.py: [apy] Fixing bug incorrectly splitting short | |
435 | strings | |
436 | ||
437 | 2015-08-01 Xavi Ivars <xavi.ivars@gmail.com> | |
438 | ||
439 | * translation.py: [apy] Fixing bug incorrectly splitting short | |
440 | strings | |
441 | ||
442 | 2015-07-11 Jonathan Washington <jonathan.north.washington@gmail.com> | |
443 | ||
444 | * tools/apertiumlangs.sql: some minor fixes to Turkic language names | |
445 | ||
446 | 2015-07-03 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
447 | ||
448 | * servlet.py: bail out if it doesn't seem we have a utf-8 locale | |
449 | ||
450 | 2015-04-27 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
451 | ||
452 | * tools/sanity-test-apy.py: skip some | |
453 | ||
454 | 2015-04-27 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
455 | ||
456 | * translation.py: each arg gets quoted individually (did this ever | |
457 | work?) | |
458 | ||
459 | 2015-03-19 Xavi Ivars <xavi.ivars@gmail.com> | |
460 | ||
461 | * servlet.py, tools/apertium-viewer.html, translation.py: [APY] | |
462 | Adding /pipedebug mode | |
463 | ||
464 | 2014-12-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
465 | ||
466 | * tools/apertium-viewer.html: apertium-viewer.html by GCI student | |
467 | Rap /pipedebug not implemented in apy yet, so this is only using a fake | |
468 | hardcoded response | |
469 | http://www.google-melange.com/gci/task/view/google/gci2014/6466660768677888 | |
470 | ||
471 | 2014-11-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
472 | ||
473 | * translation.py: hardbreakFn now sends up to PIPE_BUF bytes; | |
474 | rush-hour test checks if lock.locked() | |
475 | ||
476 | 2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
477 | ||
478 | * servlet.py: / redirect to wiki/apy | |
479 | ||
480 | 2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
481 | ||
482 | * servlet.py, translation.py: select/poll-style translation instead | |
483 | of threading We now yield a Future from a PipeIOStream in translate, using | |
484 | tornado.process.Subprocess instead of Popen. Depends on toro for the locking, so do "pip3 install toro" | |
485 | (unfortunately not in Debian yet). Much cleanup, e.g. startPipeline really belongs in translation.py; | |
486 | pipeline_cmds now contains the do_flush bool and commands string. Shows vmsize increases if verbosity>1. | |
487 | ||
488 | 2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
489 | ||
490 | * tools/README: deps for langNamesScraper | |
491 | ||
492 | 2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
493 | ||
494 | * README, servlet.py: servlet doesn't actually use lxml.etree | |
495 | ||
496 | 2014-11-05 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
497 | ||
498 | * COPYING: copying | |
499 | ||
500 | 2014-11-04 Xavi Ivars <xavi.ivars@gmail.com> | |
501 | ||
502 | * servlet.py, util.py: [apy] Better handling DB | |
503 | ||
504 | 2014-11-04 Xavi Ivars <xavi.ivars@gmail.com> | |
505 | ||
506 | * servlet.py: Only close de DB once | |
507 | ||
508 | 2014-10-21 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
509 | ||
510 | * servlet.py: let missingFreqs.db=None by default since it really | |
511 | shouldn't be required for apy to work | |
512 | ||
513 | 2014-10-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
514 | ||
515 | * tools/sanity-test-apy.py: test hin-urd, hbs-eng; timeout 15 | |
516 | ||
517 | 2014-10-17 Xavi Ivars <xavi.ivars@gmail.com> | |
518 | ||
519 | * servlet.py: [APY] New mode -M to keep unknown words in memory | |
520 | until a threshold is achieved (fix indent) | |
521 | ||
522 | 2014-10-15 Xavi Ivars <xavi.ivars@gmail.com> | |
523 | ||
524 | * util.py: [APY] New mode -M to keep unknown words in memory until a | |
525 | threshold is achieved (missing file) | |
526 | ||
527 | 2014-10-15 Xavi Ivars <xavi.ivars@gmail.com> | |
528 | ||
529 | * servlet.py: [APY] New mode -M to keep unknown words in memory | |
530 | until a threshold is achieved | |
531 | ||
532 | 2014-10-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
533 | ||
534 | * Makefile: rm -f | |
535 | ||
536 | 2014-10-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
537 | ||
538 | * Makefile: a clean target | |
539 | ||
540 | 2014-10-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
541 | ||
542 | * translation.py: https://sourceforge.net/p/apertium/tickets/45/ | |
543 | don't flush with lrx-proc yet | |
544 | ||
545 | 2014-10-01 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
546 | ||
547 | * tools/sanity-test-apy.py: exit(1) if anything failed | |
548 | ||
549 | 2014-09-16 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
550 | ||
551 | * servlet.py: simply remove async_callback | |
552 | https://github.com/tornadoweb/tornado/blob/branch3.1/tornado/web.py#L1036says async_callback is Obsolete and unnecessary since Tornado 1.1 | |
553 | ||
554 | 2014-09-07 Xavi Ivars <xavi.ivars@gmail.com> | |
555 | ||
556 | * util.py: [fix] Real IP behind a proxy | |
557 | ||
558 | 2014-08-18 Sushain Cherivirala <sushain@skc.name> | |
559 | ||
560 | * servlet.py: odt support | |
561 | ||
562 | 2014-08-05 Sushain Cherivirala <sushain@skc.name> | |
563 | ||
564 | * servlet.py: check extracted file list if detected as | |
565 | application/zip | |
566 | ||
567 | 2014-08-03 Sushain Cherivirala <sushain@skc.name> | |
568 | ||
569 | * servlet.py: break | |
570 | ||
571 | 2014-08-01 Sushain Cherivirala <sushain@skc.name> | |
572 | ||
573 | * servlet.py: mimetype > xdg-mime > file | |
574 | ||
575 | 2014-08-01 Sushain Cherivirala <sushain@skc.name> | |
576 | ||
577 | * servlet.py, translation.py: fixes | |
578 | ||
579 | 2014-07-28 Sushain Cherivirala <sushain@skc.name> | |
580 | ||
581 | * servlet.py: Better file command parsing | |
582 | ||
583 | 2014-07-28 Sushain Cherivirala <sushain@skc.name> | |
584 | ||
585 | * servlet.py: Switch errors | |
586 | ||
587 | 2014-07-27 Sushain Cherivirala <sushain@skc.name> | |
588 | ||
589 | * servlet.py: Add download headers to /translateDoc | |
590 | ||
591 | 2014-07-27 Sushain Cherivirala <sushain@skc.name> | |
592 | ||
593 | * servlet.py, translation.py: Use file command to detect MIME type | |
594 | ||
595 | 2014-07-27 Sushain Cherivirala <sushain@skc.name> | |
596 | ||
597 | * servlet.py, translation.py: Functional document translation -- | |
598 | still needs threading and logging | |
599 | ||
600 | 2014-07-26 Sushain Cherivirala <sushain@skc.name> | |
601 | ||
602 | * servlet.py, translation.py: cleanup | |
603 | ||
604 | 2014-07-26 Sushain Cherivirala <sushain@skc.name> | |
605 | ||
606 | * servlet.py: Start on document translation support | |
607 | ||
608 | 2014-07-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
609 | ||
610 | * tools/apertiumlangs.sql: better ʻ | |
611 | ||
612 | 2014-07-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
613 | ||
614 | * tools/apertiumlangs.sql: a couple kaa names | |
615 | ||
616 | 2014-07-24 Sushain Cherivirala <sushain@skc.name> | |
617 | ||
618 | * servlet.py: '?mode' -> '?lang', '/coverage' -> '/calcCoverage' NOT | |
619 | BACKWARDS COMPATIBILITY | |
620 | ||
621 | 2014-07-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
622 | ||
623 | * servlet.py: possibly stop complaining about unicode chars in | |
624 | source code | |
625 | ||
626 | 2014-07-06 Xavi Ivars <xavi.ivars@gmail.com> | |
627 | ||
628 | * servlet.py, translation.py: Code review feedback | |
629 | ||
630 | 2014-07-06 Xavi Ivars <xavi.ivars@gmail.com> | |
631 | ||
632 | * keys.py, servlet.py, translation.py, util.py: [Softcatalà] | |
633 | Integrate ScaleMT-like logs into APY | |
634 | ||
635 | 2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
636 | ||
637 | * servlet.py: pretty sure a star before a space can't be an unknown | |
638 | word mark, that'd only work if spaces were in <alphabet> this should hopefully fix the overly greedy noteUnknownTokens | |
639 | ||
640 | 2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
641 | ||
642 | * tools/sanity-test-apy.py: some output while running so it doesn't | |
643 | seem like we've stalled | |
644 | ||
645 | 2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
646 | ||
647 | * tools/langNamesScraper.py: s/unicode/langNames/ | |
648 | ||
649 | 2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
650 | ||
651 | * Makefile: WARNING: unicode.db now called langNames.db (fix your | |
652 | apy startup scripts) | |
653 | ||
654 | 2014-06-27 Sushain Cherivirala <sushain@skc.name> | |
655 | ||
656 | * Makefile, servlet.py, util.py: Add missing freqs database (ticket | |
657 | #30) | |
658 | ||
659 | 2014-06-26 Sushain Cherivirala <sushain@skc.name> | |
660 | ||
661 | * servlet.py, util.py: formatting/style changes | |
662 | ||
663 | 2014-06-19 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
664 | ||
665 | * tools/systemd/README, tools/systemd/apy.service: systemd example | |
666 | service file | |
667 | ||
668 | 2014-06-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
669 | ||
670 | * servlet.py, translation.py: use -f html-noent instead of -f html | |
671 | ||
672 | 2014-06-18 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
673 | ||
674 | * servlet.py: rename confusing fn name | |
675 | ||
676 | 2014-06-17 Sushain Cherivirala <sushain@skc.name> | |
677 | ||
678 | * servlet.py: Make all modes accept ISO 639-1 codes | |
679 | ||
680 | 2014-06-17 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
681 | ||
682 | * servlet.py: /listPairs?include_deprecated_codes to include | |
683 | two-letter codes in output | |
684 | ||
685 | 2014-06-17 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
686 | ||
687 | * servlet.py: silently fail to fail if client passes two-letter | |
688 | codes to /translate | |
689 | ||
690 | 2014-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
691 | ||
692 | * translation.py: -unused fluff | |
693 | ||
694 | 2014-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
695 | ||
696 | * servlet.py: stripUnknownMarks in a fn | |
697 | ||
698 | 2014-06-09 Sushain Cherivirala <sushain@skc.name> | |
699 | ||
700 | * servlet.py: sub-optimal implementation of markUnknown for | |
701 | /translate | |
702 | ||
703 | 2014-06-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
704 | ||
705 | * tools/apertiumlangs.sql: -چا to چە | |
706 | ||
707 | 2014-06-05 Jonathan Washington <jonathan.north.washington@gmail.com> | |
708 | ||
709 | * tools/apertiumlangs.sql: db updates | |
710 | ||
711 | 2014-06-04 Jonathan Washington <jonathan.north.washington@gmail.com> | |
712 | ||
713 | * tools/turkic.sql: uy>ug | |
714 | ||
715 | 2014-05-30 Jonathan Washington <jonathan.north.washington@gmail.com> | |
716 | ||
717 | * tools/apertiumlangs.sql: reversed | |
718 | ||
719 | 2014-05-30 Jonathan Washington <jonathan.north.washington@gmail.com> | |
720 | ||
721 | * tools/apertiumlangs.sql, tools/turkic.sql: updated databases or | |
722 | something | |
723 | ||
724 | 2014-05-15 Sushain Cherivirala <sushain@skc.name> | |
725 | ||
726 | * Makefile: don't try deleting non-existent unicode.db | |
727 | ||
728 | 2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
729 | ||
730 | * servlet.py: def isFlushable in case other cmds are unflushable as | |
731 | well | |
732 | ||
733 | 2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
734 | ||
735 | * servlet.py, translation.py: No flushing for hfst-proc pairs | |
736 | (restart on each translation) Still uses a lock, so should at least not spawn a several processes | |
737 | per pipeline at once. Entries in BaseHandler.pipelines now have a third arg do_flush Closes https://sourceforge.net/p/apertium/tickets/22/ | |
738 | ||
739 | 2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
740 | ||
741 | * servlet.py: start pipeline (if necessary) in the _worker, since | |
742 | this is the function that starts translation | |
743 | ||
744 | 2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
745 | ||
746 | * servlet.py: cleanPairs/notePairUsage only used by /translate, | |
747 | don't need to be in BaseHandler | |
748 | ||
749 | 2014-05-13 Sushain Cherivirala <sushain@skc.name> | |
750 | ||
751 | * servlet.py: attempt to fix gen.coroutine import error | |
752 | ||
753 | 2014-05-13 Sushain Cherivirala <sushain@skc.name> | |
754 | ||
755 | * servlet.py: python 3.3. doesn't like generators with return None | |
756 | ||
757 | 2014-05-09 Sushain Cherivirala <sushain@skc.name> | |
758 | ||
759 | * servlet.py: 400 -> 408 HTTP errors | |
760 | ||
761 | 2014-05-09 Sushain Cherivirala <sushain@skc.name> | |
762 | ||
763 | * servlet.py: Enable CORS | |
764 | ||
765 | 2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
766 | ||
767 | * servlet.py: json error responses, closes | |
768 | https://sourceforge.net/p/apertium/tickets/19/ | |
769 | ||
770 | 2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
771 | ||
772 | * servlet.py: fixed: /listPairs?q=adsf was treated as /listPairs | |
773 | ||
774 | 2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
775 | ||
776 | * servlet.py: enable_pretty_logging doesn't seem to work if | |
777 | logging's been used once already | |
778 | ||
779 | 2014-05-08 Sushain Cherivirala <sushain@skc.name> | |
780 | ||
781 | * servlet.py: Make /analyze, /generate, /perWord and /coverage | |
782 | non-blocking | |
783 | ||
784 | 2014-05-08 Sushain Cherivirala <sushain@skc.name> | |
785 | ||
786 | * servlet.py: Use CLD for language detection and fallback to naive | |
787 | approach if CLD not installed | |
788 | ||
789 | 2014-05-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
790 | ||
791 | * servlet.py: /stats and -mN to shut down pipelines that haven't | |
792 | been used in the last N secs (not too useful, but was so easy to | |
793 | implement …) | |
794 | ||
795 | 2014-05-07 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
796 | ||
797 | * tools/sanity-test-apy.py: more tests | |
798 | ||
799 | 2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
800 | ||
801 | * tools/sanity-test-apy.py: output some stuff | |
802 | ||
803 | 2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
804 | ||
805 | * tools/sanity-test-apy.py: more tests, still just kaz-tat/sme-nob | |
806 | failing | |
807 | ||
808 | 2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
809 | ||
810 | * tools/sanity-test-apy.py: insanity-untest | |
811 | ||
812 | 2014-04-28 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
813 | ||
814 | * Makefile: rm unicode.db before running sqlite in case it already | |
815 | exists | |
816 | ||
817 | 2014-04-11 Francis Tyers <ftyers@users.noreply.github.com> | |
818 | ||
819 | * tools/apertiumlangs.sql: add some avar translations | |
820 | ||
821 | 2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
822 | ||
823 | * servlet.py: return 400 on modefile parse fail (better than | |
824 | nothing) | |
825 | ||
826 | 2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
827 | ||
828 | * servlet.py, translation.py: per-pipeline locks! zomg. | |
829 | ||
830 | 2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
831 | ||
832 | * servlet.py: doh! use sendResponse, not just finish, got wrong | |
833 | headers etc | |
834 | ||
835 | 2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
836 | ||
837 | * servlet.py, translation.py: Start translate() in a separate thread | |
838 | while still reusing pipelines, using ThreadableMixin Made it split on fairly short segments, might want to tweak | |
839 | "hardbreak" constants | |
840 | ||
841 | 2014-04-09 Sushain Cherivirala <sushain@skc.name> | |
842 | ||
843 | * servlet.py: Non-blocking requests | |
844 | ||
845 | 2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
846 | ||
847 | * servlet.py: wops :) | |
848 | ||
849 | 2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
850 | ||
851 | * modeSearch.py, servlet.py: Main arg to servlet.py now includes | |
852 | _all_ .mode files under that path. This now finds all my 88 /usr/local/share/apertium/modes, vs only 14 | |
853 | before … To specify a path for non-pair modes only, pass the | |
854 | -s/--nonpairs-path argument: ./servlet.py /path/to/include/all/modes/from -s | |
855 | /path/to/include/tagger/analyser/generator/modes/from Also has real symlink loop checking. Pass -v2 to show the discovered modes on startup. | |
856 | ||
857 | 2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
858 | ||
859 | * Makefile: how to make unicode.db, rather important … | |
860 | ||
861 | 2014-04-07 Sushain Cherivirala <sushain@skc.name> | |
862 | ||
863 | * util.py: oops, precedence rules are weird | |
864 | ||
865 | 2014-04-07 Sushain Cherivirala <sushain@skc.name> | |
866 | ||
867 | * modeSearch.py, util.py: Support variations in mode search | |
868 | ||
869 | 2014-04-07 Francis Tyers <ftyers@users.noreply.github.com> | |
870 | ||
871 | * modeSearch.py: add support for finding variants | |
872 | ||
873 | 2014-04-05 selimcan <selimcan@72bbbca6-d526-0410-a7d9-f06f51895060> | |
874 | ||
875 | * tools/apertiumlangs.sql: add tatar words for 'english' and | |
876 | 'spanish' | |
877 | ||
878 | 2014-04-05 Francis Tyers <ftyers@users.noreply.github.com> | |
879 | ||
880 | * tools/apertiumlangs.sql: add new langs in aragonese | |
881 | ||
882 | 2014-04-04 Sushain Cherivirala <sushain@skc.name> | |
883 | ||
884 | * tools/apertiumlangs.sql: Regenerate language name DB | |
885 | ||
886 | 2014-04-04 Sushain Cherivirala <sushain@skc.name> | |
887 | ||
888 | * tools/langNamesScraper.py: Fix bug with language list generation, | |
889 | add some languages to list manually | |
890 | ||
891 | 2014-04-04 Francis Tyers <ftyers@users.noreply.github.com> | |
892 | ||
893 | * tools/apertiumlangs.sql: new langs | |
894 | ||
895 | 2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com> | |
896 | ||
897 | * modeSearch.py, translation.py: linux line breaks on all py files | |
898 | ||
899 | 2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com> | |
900 | ||
901 | * util.py: vim modeline | |
902 | ||
903 | 2014-02-02 Sushain Cherivirala <sushain@skc.name> | |
904 | ||
905 | * util.py: Fix indentation and add error message | |
906 | ||
907 | 2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com> | |
908 | ||
909 | * util.py: keep global track of dbConn | |
910 | ||
911 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
912 | ||
913 | * tools/apertiumlangs.sql: fixes: az in tt, ky in ru | |
914 | ||
915 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
916 | ||
917 | * tools/apertiumlangs.sql: Tatar names for languages | |
918 | ||
919 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
920 | ||
921 | * tools/sysvinit/apy.sh: updated script, points to new db | |
922 | ||
923 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
924 | ||
925 | * tools/apertiumlangs.sql: line to update languageNames table with | |
926 | fixes table | |
927 | ||
928 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
929 | ||
930 | * tools/apertiumlangs.sql: fixes table | |
931 | ||
932 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
933 | ||
934 | * tools/apertiumlangs.sql: apertium languages, gonna fuċ more later | |
935 | ||
936 | 2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com> | |
937 | ||
938 | * tools/turkic.sql: two tables | |
939 | ||
940 | 2014-01-26 Jonathan Washington <jonathan.north.washington@gmail.com> | |
941 | ||
942 | * tools/turkic.sql: a few names for Chuvash | |
943 | ||
944 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
945 | ||
946 | * modeSearch.py: Follow symlinks when searching for modes Impose | |
947 | limit on search depth | |
948 | ||
949 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
950 | ||
951 | * modeSearch.py, servlet.py, tools.py, tools/langNamesScraper.py, | |
952 | util.py: Add a few ISO code mappings Make lang name scraper depend | |
953 | on util for ISO code conversion Rename tools.py to util.py | |
954 | ||
955 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
956 | ||
957 | * modeSearch.py, translation.py: Optimize mode search performance | |
958 | (even more) | |
959 | ||
960 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
961 | ||
962 | * modeSearch.py: Optimize mode search performance (more) | |
963 | ||
964 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
965 | ||
966 | * modeSearch.py: Optimize mode search performance (hopefully) | |
967 | ||
968 | 2014-01-25 Sushain Cherivirala <sushain@skc.name> | |
969 | ||
970 | * modeSearch.py, servlet.py: Improve mode searching functionality | |
971 | (walk through all dirs recursively) | |
972 | ||
973 | 2014-01-24 Sushain Cherivirala <sushain@skc.name> | |
974 | ||
975 | * tools/langNamesScraper.py: Make usage appear only when no | |
976 | arguments provided | |
977 | ||
978 | 2014-01-23 Sushain Cherivirala <sushain@skc.name> | |
979 | ||
980 | * modeSearch.py, servlet.py, tools.py: Regularize all APY language | |
981 | listings to 3 alpha language codes | |
982 | ||
983 | 2014-01-20 Jonathan Washington <jonathan.north.washington@gmail.com> | |
984 | ||
985 | * tools/turkic.sql: use ша/ше, not тілі :\ | |
986 | ||
987 | 2014-01-20 Jonathan Washington <jonathan.north.washington@gmail.com> | |
988 | ||
989 | * translation.py: extra DEBUG lines, but all commented out | |
990 | ||
991 | 2014-01-15 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
992 | ||
993 | * servlet.py: removed unused constant | |
994 | ||
995 | 2014-01-14 Jonathan Washington <jonathan.north.washington@gmail.com> | |
996 | ||
997 | * tools/sysvinit/apy.sh: init script | |
998 | ||
999 | 2014-01-14 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1000 | ||
1001 | * servlet.py: logging | |
1002 | ||
1003 | 2014-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1004 | ||
1005 | * servlet.py: well ioloop, if you're too spineless to kill them I'll | |
1006 | have to do it myself | |
1007 | ||
1008 | 2014-01-13 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1009 | ||
1010 | * tools/turkic.sql: Kyrgyz and Karakalpak spelling in English | |
1011 | ||
1012 | 2014-01-12 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1013 | ||
1014 | * servlet.py: signal stuff | |
1015 | ||
1016 | 2014-01-12 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1017 | ||
1018 | * tools/turkic.sql: bak in bak | |
1019 | ||
1020 | 2014-01-11 Sushain Cherivirala <sushain@skc.name> | |
1021 | ||
1022 | * servlet.py: Prevent APY 500 on empty text for /identifyLang and | |
1023 | /coverage | |
1024 | ||
1025 | 2014-01-11 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1026 | ||
1027 | * tools/turkic.sql: bak = ba | |
1028 | ||
1029 | 2014-01-11 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1030 | ||
1031 | * tools/turkic.sql: bak, kum | |
1032 | ||
1033 | 2014-01-09 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1034 | ||
1035 | * tools/turkic.sql: some fixes that shouldn't've been necessary. | |
1036 | native speakers need to go through and clean this stuff up :( | |
1037 | ||
1038 | 2014-01-09 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1039 | ||
1040 | * tools/langNamesScraper.py, tools/turkic.sql: Apertium language | |
1041 | names in Turkic languages, English, and Russian Proper header for | |
1042 | langNamesScraper.py | |
1043 | ||
1044 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1045 | ||
1046 | * tools/upstart/apertium-apy.conf: fixed logging | |
1047 | ||
1048 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1049 | ||
1050 | * tools/upstart/apertium-all.conf, | |
1051 | tools/upstart/apertium-apy-gateway.conf, | |
1052 | tools/upstart/apertium-apy.conf, | |
1053 | tools/upstart/apertium-html-tools.conf: updated so apertium-all | |
1054 | controls stuff | |
1055 | ||
1056 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1057 | ||
1058 | * tools/upstart/apertium-all.conf: oh, and this too | |
1059 | ||
1060 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1061 | ||
1062 | * tools/upstart/apertium-apy-gateway.conf, | |
1063 | tools/upstart/apertium-apy.conf, | |
1064 | tools/upstart/apertium-html-tools.conf: new scripts | |
1065 | ||
1066 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1067 | ||
1068 | * tools/upstart/apertium-all.conf, | |
1069 | tools/upstart/apertium-apy-gateway.conf, | |
1070 | tools/upstart/apertium-apy.conf, | |
1071 | tools/upstart/apertium-html-tools.conf: logging added | |
1072 | ||
1073 | 2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1074 | ||
1075 | * tools/upstart/apertium, tools/upstart/apertium-all.conf, | |
1076 | tools/upstart/apertium-apy-gateway.conf, | |
1077 | tools/upstart/apertium-apy.conf, | |
1078 | tools/upstart/apertium-html-tools.conf: all apertium upstart configs | |
1079 | ||
1080 | 2014-01-07 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1081 | ||
1082 | * tools/upstart/apertium-apy-gateway.conf, | |
1083 | tools/upstart/apertium-apy.conf: upstart scripts | |
1084 | ||
1085 | 2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1086 | ||
1087 | * langNamesScraper.py, tools/langNamesScraper.py: here's the right | |
1088 | one | |
1089 | ||
1090 | 2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1091 | ||
1092 | * tools.py, tools/lgNameScraper.py: GAAH | |
1093 | ||
1094 | 2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1095 | ||
1096 | * tools.py, tools/lgNameScraper.py: hmm | |
1097 | ||
1098 | 2014-01-04 Sushain Cherivirala <sushain@skc.name> | |
1099 | ||
1100 | * tools.py: Fix bug in /getLocalizedLanguages in APY when both two | |
1101 | and three character code for same language are requested | |
1102 | ||
1103 | 2014-01-04 Sushain Cherivirala <sushain@skc.name> | |
1104 | ||
1105 | * gateway.py: Simplify APY gateway routing and fix bug with | |
1106 | /getLocale | |
1107 | ||
1108 | 2014-01-03 Sushain Cherivirala <sushain@skc.name> | |
1109 | ||
1110 | * gateway.py: Switch APY Gateway load balancer | |
1111 | ||
1112 | 2014-01-02 Sushain Cherivirala <sushain@skc.name> | |
1113 | ||
1114 | * gateway.py: Fix bug with translation request routing in APY | |
1115 | gateway | |
1116 | ||
1117 | 2014-01-01 Sushain Cherivirala <sushain@skc.name> | |
1118 | ||
1119 | * servlet.py: Restore compatibility with Python 3.2 | |
1120 | ||
1121 | 2013-12-31 Ng Wei En <wei2912@users.noreply.github.com> | |
1122 | ||
1123 | * servlet.py, tools.py: Fix high memory usage issue. | |
1124 | ||
1125 | 2013-12-30 Sushain Cherivirala <sushain@skc.name> | |
1126 | ||
1127 | * gateway.py, tools.py: Make Fastest paradigm balancer work with all | |
1128 | APY modes Fix minor bugs in gateway request handling (evident on | |
1129 | 304s) Fix minor bugs in APY coverage mode | |
1130 | ||
1131 | 2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1132 | ||
1133 | * gateway.py, serverlist-example: Added example serverlist, | |
1134 | commented what needs to be done for gateway-scaleMT compatibility | |
1135 | ||
1136 | 2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1137 | ||
1138 | * gateway.py: Beginning support for scaleMT | |
1139 | ||
1140 | 2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1141 | ||
1142 | * gateway.py: Fixed /perWord | |
1143 | ||
1144 | 2013-12-27 Sushain Cherivirala <sushain@skc.name> | |
1145 | ||
1146 | * gateway.py: Improve Fastest paradigm load balancer in APY gateway | |
1147 | ||
1148 | 2013-12-27 Sushain Cherivirala <sushain@skc.name> | |
1149 | ||
1150 | * gateway.py: Make some APY gateway balancers compatible with | |
1151 | changes to get_server call | |
1152 | ||
1153 | 2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1154 | ||
1155 | * gateway.py: Fixed JS bug | |
1156 | ||
1157 | 2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1158 | ||
1159 | * gateway.py: Changed structure of capabilities dictionary, more | |
1160 | documentation | |
1161 | ||
1162 | 2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1163 | ||
1164 | * gateway.py: Smarter balancing -- going for servers with the | |
1165 | correct langpairs | |
1166 | ||
1167 | 2013-12-26 Sushain Cherivirala <sushain@skc.name> | |
1168 | ||
1169 | * servlet.py, tools.py: Add coverage mode to APY Add preliminary | |
1170 | language identification mode to APY | |
1171 | ||
1172 | 2013-12-25 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1173 | ||
1174 | * gateway.py: Resolved the conflict, moved things around | |
1175 | ||
1176 | 2013-12-25 Sushain Cherivirala <sushain@skc.name> | |
1177 | ||
1178 | * gateway.py, servlet.py: Add Fastest paradigm load balancer for APY | |
1179 | Make pool terminate properly to prevent blocking | |
1180 | ||
1181 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1182 | ||
1183 | * servlet.py, translation.py: Modularize translation functionality | |
1184 | in APY Make translation calls asynchronous with timeout Handle | |
1185 | invalid translation pair errors properly (not in translateSplitting | |
1186 | and with 400) | |
1187 | ||
1188 | 2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1189 | ||
1190 | * gateway.py: now dropping dead servers, checking against empty | |
1191 | serverlists | |
1192 | ||
1193 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1194 | ||
1195 | * servlet.py: Fix bug in APY translate mode from port to Tornado | |
1196 | ||
1197 | 2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1198 | ||
1199 | * gateway.py: forwarding request headers | |
1200 | ||
1201 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1202 | ||
1203 | * servlet.py: Reject /getLocale requests without proper headers in | |
1204 | APY Send back proper HTTP 408 response for timed out requests | |
1205 | ||
1206 | 2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1207 | ||
1208 | * gateway.py: should keep going now | |
1209 | ||
1210 | 2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1211 | ||
1212 | * gateway.py: if our port's being used, use the next | |
1213 | ||
1214 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1215 | ||
1216 | * gateway.py: Change server list file format and add debug mode to | |
1217 | APY gateway Remove empty folder | |
1218 | ||
1219 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1220 | ||
1221 | * langNamesScraper.py: Move language name scraper | |
1222 | ||
1223 | 2013-12-23 Sushain Cherivirala <sushain@skc.name> | |
1224 | ||
1225 | * gateway.py: Generalize balancer class in APY gateway Use generator | |
1226 | for Round Robin balancer Add Least Connections balancer (incomplete) | |
1227 | Add Random balancer Improve logging Add testing interval support | |
1228 | ||
1229 | 2013-12-22 Sushain Cherivirala <sushain@skc.name> | |
1230 | ||
1231 | * gateway.py: Add a few preliminary tests to APY Gateway | |
1232 | ||
1233 | 2013-12-22 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1234 | ||
1235 | * servlet.py: Compatibility with python3.2 and earlier by not using | |
1236 | with statements | |
1237 | ||
1238 | 2013-12-22 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1239 | ||
1240 | * gateway.py, servlet.py: including support for tornado 3.1.1, a bit | |
1241 | for python3.2 | |
1242 | ||
1243 | 2013-12-22 Sushain Cherivirala <sushain@skc.name> | |
1244 | ||
1245 | * gateway.py, servlet.py: Modify APY gateway arguments Add | |
1246 | preliminary testing functionality to gateway Remove unnecessary | |
1247 | self.finish() in post handlers | |
1248 | ||
1249 | 2013-12-21 Sushain Cherivirala <sushain@skc.name> | |
1250 | ||
1251 | * modeSearch.py, servlet.py, tools.py: Make more parts of APY | |
1252 | asynchronous Make APY arguments more user-friendly Allow APY to run | |
1253 | with multiple request serving threads Start modularizing APY | |
1254 | ||
1255 | 2013-12-21 Sushain Cherivirala <sushain@skc.name> | |
1256 | ||
1257 | * servlet.py: Add alternative approaches to /perWord output in APY | |
1258 | Fix response headers Improve logging | |
1259 | ||
1260 | 2013-12-21 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1261 | ||
1262 | * gateway.py: With headers now | |
1263 | ||
1264 | 2013-12-21 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060> | |
1265 | ||
1266 | * gateway.py: a very unintelligent round-robin gateway | |
1267 | ||
1268 | 2013-12-21 Sushain Cherivirala <sushain@skc.name> | |
1269 | ||
1270 | * servlet.py: Make analyzation and generation in APY non-blocking | |
1271 | ||
1272 | 2013-12-20 Sushain Cherivirala <sushain@skc.name> | |
1273 | ||
1274 | * servlet.py: Clean up BaseHandler in APY Add pos argument to | |
1275 | /perWords in APY | |
1276 | ||
1277 | 2013-12-20 Sushain Cherivirala <sushain@skc.name> | |
1278 | ||
1279 | * servlet.py: Improve /perWord functionality in APY | |
1280 | ||
1281 | 2013-12-20 Sushain Cherivirala <sushain@skc.name> | |
1282 | ||
1283 | * servlet.py: Make it clearer when APY launches Prevent extra token | |
1284 | in /perWord requests Fix SSL support bug | |
1285 | ||
1286 | 2013-12-19 Sushain Cherivirala <sushain@skc.name> | |
1287 | ||
1288 | * servlet.py: Fix bugs in APY | |
1289 | ||
1290 | 2013-12-19 Francis Tyers <ftyers@users.noreply.github.com> | |
1291 | ||
1292 | * servlet.py: updates | |
1293 | ||
1294 | 2013-12-19 Sushain Cherivirala <sushain@skc.name> | |
1295 | ||
1296 | * servlet.py: Port Apertium APY to Tornado web framework | |
1297 | ||
1298 | 2013-12-19 Sushain Cherivirala <sushain@skc.name> | |
1299 | ||
1300 | * servlet.py: Simplify output of list analyzers/generators/taggers | |
1301 | in APY Fix bug with POST to APY | |
1302 | ||
1303 | 2013-12-18 Sushain Cherivirala <sushain@skc.name> | |
1304 | ||
1305 | * servlet.py: Add alias for /list?q=taggers and fix input value in | |
1306 | /perWords outputs | |
1307 | ||
1308 | 2013-12-18 Sushain Cherivirala <sushain@skc.name> | |
1309 | ||
1310 | * servlet.py: Condense list URL mappings Add perWord URL mapping | |
1311 | with functionality General cleanup Search for taggers in Apertium | |
1312 | path | |
1313 | ||
1314 | 2013-12-17 Sushain Cherivirala <sushain@skc.name> | |
1315 | ||
1316 | * servlet.py: Add translation by word support to APY | |
1317 | ||
1318 | 2013-12-15 Sushain Cherivirala <sushain@skc.name> | |
1319 | ||
1320 | * servlet.py: Make html-tools translation interface auto-detect | |
1321 | locale correctly | |
1322 | ||
1323 | 2013-12-14 Sushain Cherivirala <sushain@skc.name> | |
1324 | ||
1325 | * servlet.py: Make localized language names database optional | |
1326 | argument Allow ISO 639-2 codes in localized language name requests | |
1327 | ||
1328 | 2013-12-12 Sushain Cherivirala <sushain@skc.name> | |
1329 | ||
1330 | * servlet.py: Add HTTP access to APY localized language names | |
1331 | functionality | |
1332 | ||
1333 | 2013-12-10 Sushain Cherivirala <sushain@skc.name> | |
1334 | ||
1335 | * servlet.py: Improve Unicode.org language name scraper and APY | |
1336 | localized language function | |
1337 | ||
1338 | 2013-12-10 Sushain Cherivirala <sushain@skc.name> | |
1339 | ||
1340 | * servlet.py: Add localized languages function to APY Add | |
1341 | Unicode.org localized language name scraper | |
1342 | ||
1343 | 2013-12-08 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1344 | ||
1345 | * servlet.py: support en_APERTIUM | |
1346 | ||
1347 | 2013-12-07 Sushain Cherivirala <sushain@skc.name> | |
1348 | ||
1349 | * servlet.py: Add SSL support to Apertium APY | |
1350 | ||
1351 | 2013-12-04 Sushain Cherivirala <sushain@skc.name> | |
1352 | ||
1353 | * servlet.py: Fix morphological analyzer mode detection regexp in | |
1354 | APY | |
1355 | ||
1356 | 2013-12-01 Sushain Cherivirala <sushain@skc.name> | |
1357 | ||
1358 | * servlet.py: Make morphological generator work for single input | |
1359 | without ^...$ | |
1360 | ||
1361 | 2013-12-01 Sushain Cherivirala <sushain@skc.name> | |
1362 | ||
1363 | * servlet.py: Add preliminary morphological analyzer and generator | |
1364 | support to simple-html interface | |
1365 | ||
1366 | 2013-12-01 Sushain Cherivirala <sushain@skc.name> | |
1367 | ||
1368 | * servlet.py: Add list generators/analyzers functions to APY | |
1369 | ||
1370 | 2013-12-01 Sushain Cherivirala <sushain@skc.name> | |
1371 | ||
1372 | * servlet.py: Add morphological analysis and generation support to | |
1373 | APY | |
1374 | ||
1375 | 2013-10-10 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1376 | ||
1377 | * servlet.py: Threading request handler, to handle multiple users Uses TCPServer inheriting ThreadingMixIn. A lock on translateMode | |
1378 | (which has to have at most one thread per pipeline) ensures that | |
1379 | part stays single-threaded (to avoid Alice getting Bob's text). http://stackoverflow.com/a/487281/69663 recommends select/polling | |
1380 | over threading (http://docs.python.org/3.3/library/socketserver.html | |
1381 | for diffs) but requires either lots of manually written dispatching | |
1382 | code (http://pymotw.com/2/select/) or a framework like Twisted. Try testing with e.g. python3 servlet "$APERTIUMPATH" 2737 & curl -s --data-urlencode 'langpair=nb|nn' --data-urlencode \ | |
1383 | 'q@/tmp/reallybigfile' 'http://localhost:2737/translate' | |
1384 | >/tmp/output & curl 'http://localhost:2737/translate?langpair=nb|nn&q=men+ikke+den' | |
1385 | curl 'http://localhost:2737/translate?langpair=nb|nn&q=men+ikke+den' | |
1386 | curl 'http://localhost:2737/translate?langpair=nb|nn&q=men+ikke+den' And see how the last three (after a slight wait) start outputting | |
1387 | before the first request is done. | |
1388 | ||
1389 | 2013-09-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1390 | ||
1391 | * servlet.py: doh, typo in do_GET | |
1392 | ||
1393 | 2013-09-20 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1394 | ||
1395 | * servlet.py: split up big requests before sending to pipeline so we | |
1396 | don't fill up buffers This doesn't handle parallel requests, simply to avoid deadlocking | |
1397 | on read/write. Also, support POST, e.g. curl --data-urlencode | |
1398 | 'langpair=nb|nn' --data-urlencode 'q@file' localhost:2737/translate | |
1399 | ||
1400 | 2013-09-20 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1401 | ||
1402 | * servlet.py: a little debugging | |
1403 | ||
1404 | 2013-09-19 Kevin Brubeck Unhammer <unhammer@fsfe.org> | |
1405 | ||
1406 | * servlet.py: fflush after writing the NUL (instead of writing it | |
1407 | twice); use deformat; read apertiumpath/port from command line | |
1408 | arguments | |
1409 | ||
1410 | 2013-09-14 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1411 | ||
1412 | * servlet.py: proper handling of apertium-pretransfer | |
1413 | ||
1414 | 2013-09-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1415 | ||
1416 | * servlet.py: a small bug fix and some more debugs | |
1417 | ||
1418 | 2013-09-08 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1419 | ||
1420 | * servlet.py: skip pretransfer | |
1421 | ||
1422 | 2013-09-04 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1423 | ||
1424 | * servlet.py: apertium-transfer -z fails. also, script needs to be | |
1425 | async / threaded | |
1426 | ||
1427 | 2013-09-03 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1428 | ||
1429 | * servlet.py: another test to prevent failure | |
1430 | ||
1431 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1432 | ||
1433 | * servlet.py: return False stack could be collapsed to a single line | |
1434 | ||
1435 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1436 | ||
1437 | * servlet.py: getting stuck at procOut.stdout.read(1) | |
1438 | ||
1439 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1440 | ||
1441 | * servlet.py: now requires r46823 for lrx-proc | |
1442 | ||
1443 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1444 | ||
1445 | * servlet.py: three different ways to translate: modes file direct, | |
1446 | modes file deep, and apertium direct | |
1447 | ||
1448 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1449 | ||
1450 | * servlet.py: uses .mode files | |
1451 | ||
1452 | 2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1453 | ||
1454 | * servlet.py: programming fail | |
1455 | ||
1456 | 2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1457 | ||
1458 | * servlet.py: now with callback support, I think | |
1459 | ||
1460 | 2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1461 | ||
1462 | * servlet.py: some cleaning up of commented-out code | |
1463 | ||
1464 | 2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com> | |
1465 | ||
1466 | * apertium-apy, where apy=APy, and stands for API in Python | |
1467 |
0 | # -*- mode:org -*- | |
1 | #+STARTUP: showall | |
2 | ||
3 | * Version 0.9.1, 2016-06-10 | |
4 | Git rev: 3c536b37def552d073ddda4d27d9358103e304c4 | |
5 | ||
6 | Changes since 0.9.0: | |
7 | ||
8 | - Various minor cleanups and lints | |
9 | ||
10 | - systemd watchdog – if APY was started by systemd and the .service | |
11 | file has e.g. WatchdogSec=10s, we send a watchdog ping every <10s. | |
12 | See tools/systemd for example .service file. | |
13 | ||
14 | - rewrote missingFreqs-handling to fix locking (in its own class | |
15 | now); and don't try to close missingFreqsDb unless it was actually | |
16 | open | |
17 | ||
18 | - Handle .mode files from newest Apertium (which now quotes paths | |
19 | correctly there) | |
20 | ||
21 | - fix #14 false was sent by getPairOrError instead of None | |
22 | ||
23 | ||
24 | * Version 0.9.0, 2016-02-11 | |
25 | Git rev: 1d0e110cd1e9a74f0099138823ec31f4c145ea73 | |
26 | ||
27 | Changes since r61425 / 0dd2cd: | |
28 | ||
29 | - First official release with a version number :-) | |
30 | ||
31 | - Should now be compatible with all Python versions from 3.2 to | |
32 | 3.5-dev (raises StopIteration in 3.2, returns from generator in | |
33 | higher versions) | |
34 | ||
35 | - APY now has some simple regression tests and Continuous | |
36 | Integration. See t/README if you want to run them yourself, but | |
37 | they are also run at https://travis-ci.org/goavki/apertium-apy on | |
38 | every pull request or push to | |
39 | https://github.org/goavki/apertium-apy | |
40 | ||
41 | - /translateDoc now accepts optional argument &markUnknown which can | |
42 | be either "yes" or "no", as /translate already does. Defaults to | |
43 | "yes", which will put a "*" in front of any unknown word. | |
44 | ||
45 | - New, *experimental* endpoint | |
46 | /translatePage?url=URL&langpair=FROM|TO which will retrieve an URL | |
47 | and return HTML with translated text. Like /translate, accepts | |
48 | optional argument &markUnknown which can be either "yes" or "no", | |
49 | and does not block other requests (but if there are few pipelines | |
50 | per pair, long page translations may currently hold up shorter | |
51 | requests to the same language pair, since we translate the whole | |
52 | page in one go instead of splitting it up). | |
53 | ||
54 | - New/improved language name localisations (Kyrgyz, Uzbek, | |
55 | Qaraqalpaq, Kazakh, Basque, Crimean Tatar, …) | |
56 | ||
57 | - More information in /stats: Now shows number of requests, chars | |
58 | and time spent actively translating during the last N seconds | |
59 | (defaults to 3600, changable with new optional argument -T / | |
60 | --stat-period-max-age), and total server uptime. | |
61 | ||
62 | - Now does NUL-flushing (keeps pipelines open) even if the pair uses | |
63 | hfst-proc or lrx-proc (NUL-flushing works in those programs as of | |
64 | 2015-10-15). | |
65 | ||
66 | - Analysis/generation now both use translateSimple, less duplicate | |
67 | code. | |
68 | ||
69 | - tools/apertium-viewer.html – a simple one-page HTML interface to | |
70 | APY translation, meant for debugging – now does translate as you | |
71 | type. | |
72 | ||
73 | - tools/sanity-test-apy.py updated to do a healthcheck on all pairs | |
74 | currently running on https://apertium.org | |
75 | ||
76 | - Minor cleanups suggested by flake8. | |
77 | ||
78 | * r61425, 2015-08-13 | |
79 | Git rev: 0dd2cdba544e228b6268f0c0babbdf1698af27f8 | |
80 | ||
81 | - Unofficial release, but in Debian sid as 0.1.0~r61425-1 | |
82 | ||
83 | - See http://wiki.apertium.org/wiki/Apy for an overview of what APY | |
84 | can do. |
0 | See http://wiki.apertium.org/wiki/Apertium-apy | |
1 | ||
2 | #+CAPTION: Build Status | |
3 | [[https://travis-ci.org/goavki/apertium-apy][https://travis-ci.org/goavki/apertium-apy.svg]] | |
4 |
0 | #!/usr/bin/env python3 | |
1 | # vim: set ts=4 sw=4 sts=4 et : | |
2 | ||
3 | import sqlite3, logging | |
4 | from datetime import datetime | |
5 | import threading | |
6 | from collections import defaultdict | |
7 | from contextlib import closing | |
8 | ||
9 | class MissingDb(object): | |
10 | def __init__(self, dbPath, wordmemlimit): | |
11 | self.lock = threading.RLock() | |
12 | self.conn = None | |
13 | self.dbPath = dbPath | |
14 | self.words = defaultdict(lambda: defaultdict(lambda: 0)) | |
15 | self.wordcount = 0 | |
16 | self.wordmemlimit = wordmemlimit | |
17 | ||
18 | def noteUnknown(self, token, pair): | |
19 | self.words[pair][token] += 1 | |
20 | self.wordcount += 1 | |
21 | # so if wordmemlimit is 0, we commit on each word | |
22 | if self.wordcount > self.wordmemlimit: | |
23 | self.commit() | |
24 | self.words.clear() | |
25 | self.wordcount = 0 | |
26 | ||
27 | def commit(self): | |
28 | timeBefore = datetime.now() | |
29 | with self.lock: | |
30 | if not self.conn: | |
31 | self.conn = sqlite3.connect(self.dbPath) | |
32 | ||
33 | with closing(self.conn.cursor()) as c: | |
34 | c.execute("PRAGMA synchronous = NORMAL") | |
35 | c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))') | |
36 | c.executemany('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + :amount)', | |
37 | ({'pair': pair, | |
38 | 'token': token, | |
39 | 'amount' : self.words[pair][token]} | |
40 | for pair in self.words | |
41 | for token in self.words[pair])) | |
42 | self.conn.commit() | |
43 | ms = timedeltaToMilliseconds(datetime.now() - timeBefore) | |
44 | logging.info("\tSaving %s unknown words to the DB (%s ms)", self.wordcount, ms) | |
45 | ||
46 | def closeDb(self): | |
47 | if not self.conn: | |
48 | logging.warning('no connection on closeDb') | |
49 | return | |
50 | logging.warning('closing connection') | |
51 | self.conn.commit() | |
52 | self.conn.close() | |
53 | self.conn = None | |
54 | ||
55 | ||
56 | def timedeltaToMilliseconds(td): | |
57 | return td.days*86400000 + td.seconds*1000 + int(td.microseconds/1000) |
48 | 48 | if mtype != 'pair': |
49 | 49 | modename = m.group(1) # e.g. en-es-anmorph |
50 | 50 | langlist = [toAlpha3Code(l) for l in m.group(2).split('-')] |
51 | lang_src = langlist[0] # e.g. en | |
52 | 51 | lang_pair = '-'.join(langlist) # e.g. en-es |
53 | 52 | dir_of_modes = os.path.dirname(dirpath) |
54 | 53 | mode = (dir_of_modes, |
63 | 62 | toAlpha3Code(lang_trg)) |
64 | 63 | modes[mtype].append(mode) |
65 | 64 | |
66 | if verbosity>1: | |
67 | for mtype in modes: | |
68 | if modes[mtype]: | |
69 | logging.info("\"%s\" modes found:\n%s" % ( | |
70 | mtype, | |
71 | "\n".join(["\t".join(m) for m in modes[mtype]]))) | |
72 | ||
65 | if verbosity > 1: | |
66 | _log_modes(modes) | |
73 | 67 | |
74 | 68 | return modes |
69 | ||
70 | def _log_modes(modes): | |
71 | """Print given modes to log.""" | |
72 | for mtype in modes: | |
73 | if modes[mtype]: | |
74 | logging.info("\"%s\" modes found:\n%s" % ( | |
75 | mtype, | |
76 | "\n".join(["\t".join(m) for m in modes[mtype]]))) |
2 | 2 | # coding=utf-8 |
3 | 3 | # -*- encoding: utf-8 -*- |
4 | 4 | |
5 | import sys, os, re, ssl, argparse, logging, time, signal, tempfile, zipfile | |
5 | import sys, os, re, argparse, logging, time, signal, tempfile, zipfile | |
6 | 6 | from subprocess import Popen, PIPE |
7 | 7 | from multiprocessing import Pool, TimeoutError |
8 | 8 | from functools import wraps |
9 | 9 | from threading import Thread |
10 | from datetime import datetime | |
10 | from datetime import datetime, timedelta | |
11 | import heapq | |
11 | 12 | |
12 | 13 | import tornado, tornado.web, tornado.httpserver, tornado.process, tornado.iostream |
13 | from tornado import escape, gen | |
14 | from tornado import httpclient | |
15 | from tornado import gen | |
16 | from tornado import escape | |
14 | 17 | from tornado.escape import utf8 |
15 | try: #3.1 | |
18 | try: # 3.1 | |
16 | 19 | from tornado.log import enable_pretty_logging |
17 | except ImportError: #2.1 | |
20 | except ImportError: # 2.1 | |
18 | 21 | from tornado.options import enable_pretty_logging |
19 | 22 | |
20 | import toro | |
21 | ||
22 | 23 | from modeSearch import searchPath |
23 | from util import getLocalizedLanguages, apertium, bilingualTranslate, removeLast, stripTags, processPerWord, getCoverage, getCoverages, toAlpha3Code, toAlpha2Code, noteUnknownToken, scaleMtLog, TranslationInfo, closeDb, flushUnknownWords, inMemoryUnknownToken | |
24 | import translation | |
24 | from util import getLocalizedLanguages, stripTags, processPerWord, getCoverage, getCoverages, toAlpha3Code, toAlpha2Code, scaleMtLog, TranslationInfo | |
25 | ||
26 | import systemd | |
27 | import missingdb | |
28 | ||
29 | from urllib.parse import urlparse | |
30 | ||
31 | if sys.version_info.minor < 3: | |
32 | import translation_py32 as translation | |
33 | else: | |
34 | import translation | |
35 | ||
36 | import util | |
25 | 37 | from keys import getKey |
26 | 38 | |
27 | 39 | try: |
29 | 41 | except: |
30 | 42 | cld2 = None |
31 | 43 | |
44 | try: | |
45 | import chardet | |
46 | except: | |
47 | chardet = None | |
48 | ||
49 | __version__ = "0.9.1" | |
50 | ||
32 | 51 | def run_async_thread(func): |
33 | 52 | @wraps(func) |
34 | 53 | def async_func(*args, **kwargs): |
35 | func_hl = Thread(target = func, args = args, kwargs = kwargs) | |
54 | func_hl = Thread(target=func, args=args, kwargs=kwargs) | |
36 | 55 | func_hl.start() |
37 | 56 | return func_hl |
38 | 57 | |
39 | 58 | return async_func |
40 | 59 | |
60 | ||
61 | missingFreqsDb = None # has to be global for sig_handler :-/ | |
62 | ||
41 | 63 | def sig_handler(sig, frame): |
42 | 64 | global missingFreqsDb |
43 | if missingFreqsDb: | |
65 | if missingFreqsDb is not None: | |
44 | 66 | if 'children' in frame.f_locals: |
45 | 67 | for child in frame.f_locals['children']: |
46 | 68 | os.kill(child, signal.SIGTERM) |
47 | flushUnknownWords(missingFreqsDb) | |
48 | else: # we are one of the children | |
49 | flushUnknownWords(missingFreqsDb) | |
69 | missingFreqsDb.commit() | |
70 | else: | |
71 | # we are one of the children | |
72 | missingFreqsDb.commit() | |
73 | missingFreqsDb.closeDb() | |
50 | 74 | logging.warning('Caught signal: %s', sig) |
51 | closeDb() | |
52 | 75 | exit() |
76 | ||
53 | 77 | |
54 | 78 | class BaseHandler(tornado.web.RequestHandler): |
55 | 79 | pairs = {} |
56 | 80 | analyzers = {} |
57 | 81 | generators = {} |
58 | 82 | taggers = {} |
59 | pipelines = {} # (l1, l2): (inpipe, outpipe), only contains flushing pairs! | |
83 | pipelines = {} # (l1, l2): [translation.Pipeline], only contains flushing pairs! | |
84 | pipelines_holding = [] | |
60 | 85 | callback = None |
61 | 86 | timeout = None |
62 | 87 | scaleMtLogs = False |
63 | inMemoryUnknown = False | |
64 | inMemoryLimit = -1 | |
65 | 88 | verbosity = 0 |
66 | 89 | |
67 | 90 | stats = { |
91 | 'startdate': datetime.now(), | |
68 | 92 | 'useCount': {}, |
69 | 'lastUsage': {}, | |
70 | 93 | 'vmsize': 0, |
94 | 'timing': [] | |
71 | 95 | } |
72 | 96 | |
73 | # The lock is needed so we don't let two coroutines write | |
74 | # simultaneously to a pipeline; then the first call to read might | |
75 | # read translations of text put there by the second call … | |
76 | pipeline_locks = {} # (l1, l2): lock for (l1, l2) in pairs | |
77 | pipeline_cmds = {} # (l1, l2): (do_flush, commands) | |
97 | pipeline_cmds = {} # (l1, l2): translation.ParsedModes | |
98 | max_pipes_per_pair = 1 | |
99 | min_pipes_per_pair = 0 | |
100 | max_users_per_pipe = 5 | |
101 | max_idle_secs = 0 | |
102 | restart_pipe_after = 1000 | |
78 | 103 | |
79 | 104 | def initialize(self): |
80 | 105 | self.callback = self.get_argument('callback', default=None) |
150 | 175 | self.set_status(204) |
151 | 176 | self.finish() |
152 | 177 | |
178 | ||
153 | 179 | class ListHandler(BaseHandler): |
154 | 180 | @tornado.web.asynchronous |
155 | 181 | def get(self): |
172 | 198 | else: |
173 | 199 | self.send_error(400, explanation='Expecting q argument to be one of analysers, generators, disambiguators or pairs') |
174 | 200 | |
201 | ||
175 | 202 | class StatsHandler(BaseHandler): |
176 | 203 | @tornado.web.asynchronous |
177 | 204 | def get(self): |
205 | numRequests = self.get_argument('requests', 1000) | |
206 | try: | |
207 | numRequests = int(numRequests) | |
208 | except ValueError: | |
209 | numRequests = 1000 | |
210 | ||
211 | periodStats = self.stats['timing'][-numRequests:] | |
212 | times = sum([x[1]-x[0] for x in periodStats], | |
213 | timedelta()) | |
214 | chars = sum(x[2] for x in periodStats) | |
215 | if times.total_seconds() != 0: | |
216 | charsPerSec = round(chars/times.total_seconds(), 2) | |
217 | else: | |
218 | charsPerSec = 0.0 | |
219 | nrequests = len(periodStats) | |
220 | maxAge = (datetime.now()-periodStats[0][0]).total_seconds() if periodStats else 0 | |
221 | ||
222 | uptime = int((datetime.now()-self.stats['startdate']).total_seconds()) | |
223 | useCount = { '%s-%s' % pair: useCount | |
224 | for pair, useCount in self.stats['useCount'].items() } | |
225 | runningPipes = { '%s-%s' % pair: len(pipes) | |
226 | for pair, pipes in self.pipelines.items() | |
227 | if pipes != [] } | |
228 | holdingPipes = len(self.pipelines_holding) | |
229 | ||
178 | 230 | self.sendResponse({ |
179 | 'responseData': { '%s-%s' % pair: useCount for pair, useCount in self.stats['useCount'].items() }, | |
231 | 'responseData': { | |
232 | 'uptime': uptime, | |
233 | 'useCount': useCount, | |
234 | 'runningPipes': runningPipes, | |
235 | 'holdingPipes': holdingPipes, | |
236 | 'periodStats': { | |
237 | 'charsPerSec': charsPerSec, | |
238 | 'totChars': chars, | |
239 | 'totTimeSpent': times.total_seconds(), | |
240 | 'requests': nrequests, | |
241 | 'ageFirstRequest': maxAge | |
242 | } | |
243 | }, | |
180 | 244 | 'responseDetails': None, |
181 | 245 | 'responseStatus': 200 |
182 | 246 | }) |
183 | 247 | |
248 | ||
184 | 249 | class RootHandler(BaseHandler): |
185 | 250 | @tornado.web.asynchronous |
186 | 251 | def get(self): |
189 | 254 | class TranslateHandler(BaseHandler): |
190 | 255 | def notePairUsage(self, pair): |
191 | 256 | self.stats['useCount'][pair] = 1 + self.stats['useCount'].get(pair, 0) |
192 | if self.max_idle_secs: | |
193 | self.stats['lastUsage'][pair] = time.time() | |
194 | 257 | |
195 | 258 | unknownMarkRE = re.compile(r'\*([^.,;:\t\* ]+)') |
196 | def maybeStripMarks(self, markUnknown, l1, l2, translated): | |
197 | self.noteUnknownTokens("%s-%s" % (l1, l2), translated) | |
259 | def maybeStripMarks(self, markUnknown, pair, translated): | |
260 | self.noteUnknownTokens("%s-%s" % pair, translated) | |
198 | 261 | if markUnknown: |
199 | 262 | return translated |
200 | 263 | else: |
201 | 264 | return re.sub(self.unknownMarkRE, r'\1', translated) |
202 | 265 | |
203 | 266 | def noteUnknownTokens(self, pair, text): |
204 | if self.missingFreqs: | |
267 | global missingFreqsDb | |
268 | if missingFreqsDb is not None: | |
205 | 269 | for token in re.findall(self.unknownMarkRE, text): |
206 | if self.inMemoryUnknown: | |
207 | inMemoryUnknownToken(token, pair, self.missingFreqs, self.inMemoryLimit) | |
208 | else: | |
209 | noteUnknownToken(token, pair, self.missingFreqs) | |
210 | ||
211 | def shutdownPair(self, pair): | |
212 | logging.info("shutting down") | |
213 | self.pipelines[pair][0].stdin.close() | |
214 | self.pipelines[pair][0].stdout.close() | |
215 | self.pipelines.pop(pair) | |
270 | missingFreqsDb.noteUnknown(token, pair) | |
271 | ||
272 | def cleanable(self, i, pair, pipe): | |
273 | if pipe.useCount > self.restart_pipe_after: | |
274 | # Not affected by min_pipes_per_pair | |
275 | logging.info('A pipe for pair %s-%s has handled %d requests, scheduling restart', | |
276 | pair[0], pair[1], self.restart_pipe_after) | |
277 | return True | |
278 | elif (i >= self.min_pipes_per_pair | |
279 | and self.max_idle_secs != 0 | |
280 | and time.time() - pipe.lastUsage > self.max_idle_secs): | |
281 | logging.info("A pipe for pair %s-%s hasn't been used in %d secs, scheduling shutdown", | |
282 | pair[0], pair[1], self.max_idle_secs) | |
283 | return True | |
284 | else: | |
285 | return False | |
216 | 286 | |
217 | 287 | def cleanPairs(self): |
218 | if self.max_idle_secs: | |
219 | for pair, lastUsage in self.stats['lastUsage'].items(): | |
220 | if pair in self.pipelines and time.time() - lastUsage > self.max_idle_secs: | |
221 | logging.info('Shutting down pair %s-%s since it has not been used in %d seconds' % ( | |
222 | pair[0], pair[1], self.max_idle_secs)) | |
223 | self.shutdownPair(pair) | |
224 | ||
225 | def getPipeLock(self, l1, l2): | |
226 | if (l1, l2) not in self.pipeline_locks: | |
227 | self.pipeline_locks[(l1, l2)] = toro.Lock() | |
228 | return self.pipeline_locks[(l1, l2)] | |
288 | for pair in self.pipelines: | |
289 | pipes = self.pipelines[pair] | |
290 | to_clean = set(p for i, p in enumerate(pipes) | |
291 | if self.cleanable(i, pair, p)) | |
292 | self.pipelines_holding += to_clean | |
293 | pipes[:] = [p for p in pipes if p not in to_clean] | |
294 | heapq.heapify(pipes) | |
295 | # The holding area lets us restart pipes after n usages next | |
296 | # time round, since with lots of traffic an active pipe may | |
297 | # never reach 0 users | |
298 | self.pipelines_holding[:] = [p for p in self.pipelines_holding | |
299 | if p.users > 0] | |
300 | if self.pipelines_holding: | |
301 | logging.info("%d pipelines still scheduled for shutdown", len(self.pipelines_holding)) | |
229 | 302 | |
230 | 303 | def getPipeCmds(self, l1, l2): |
231 | 304 | if (l1, l2) not in self.pipeline_cmds: |
233 | 306 | self.pipeline_cmds[(l1, l2)] = translation.parseModeFile(mode_path) |
234 | 307 | return self.pipeline_cmds[(l1, l2)] |
235 | 308 | |
236 | def getPipeline(self, l1, l2): | |
237 | do_flush, commands = self.getPipeCmds(l1, l2) | |
238 | if not do_flush: | |
239 | return None | |
240 | if (l1, l2) not in self.pipelines: | |
241 | logging.info('%s-%s not in pipelines of this process, starting …' % (l1, l2)) | |
242 | self.pipelines[(l1, l2)] = translation.startPipeline(commands) | |
243 | return self.pipelines[(l1, l2)] | |
309 | def shouldStartPipe(self, l1, l2): | |
310 | pipes = self.pipelines.get((l1, l2), []) | |
311 | if pipes == []: | |
312 | logging.info("%s-%s not in pipelines of this process", | |
313 | l1, l2) | |
314 | return True | |
315 | else: | |
316 | min_p = pipes[0] | |
317 | if len(pipes) < self.max_pipes_per_pair and min_p.users > self.max_users_per_pipe: | |
318 | logging.info("%s-%s has ≥%d users per pipe but only %d pipes", | |
319 | l1, l2, min_p.users, len(pipes)) | |
320 | return True | |
321 | else: | |
322 | return False | |
323 | ||
324 | def getPipeline(self, pair): | |
325 | (l1, l2) = pair | |
326 | if self.shouldStartPipe(l1, l2): | |
327 | logging.info("Starting up a new pipeline for %s-%s …", l1, l2) | |
328 | if pair not in self.pipelines: | |
329 | self.pipelines[pair] = [] | |
330 | p = translation.makePipeline(self.getPipeCmds(l1, l2)) | |
331 | heapq.heappush(self.pipelines[pair], p) | |
332 | return self.pipelines[pair][0] | |
244 | 333 | |
245 | 334 | def logBeforeTranslation(self): |
335 | return datetime.now() | |
336 | ||
337 | def logAfterTranslation(self, before, length): | |
338 | after = datetime.now() | |
246 | 339 | if self.scaleMtLogs: |
247 | return datetime.now() | |
248 | return | |
249 | ||
250 | def logAfterTranslation(self, before, toTranslate): | |
251 | if self.scaleMtLogs: | |
252 | after = datetime.now() | |
253 | 340 | tInfo = TranslationInfo(self) |
254 | 341 | key = getKey(tInfo.key) |
255 | scaleMtLog(self.get_status(), after-before, tInfo, key, len(toTranslate)) | |
256 | ||
257 | @gen.coroutine | |
258 | def get(self): | |
259 | toTranslate = self.get_argument('q') | |
260 | markUnknown = self.get_argument('markUnknown', default='yes') in ['yes', 'true', '1'] | |
261 | ||
342 | scaleMtLog(self.get_status(), after-before, tInfo, key, length) | |
343 | ||
344 | if self.get_status() == 200: | |
345 | oldest = self.stats['timing'][0][0] if self.stats['timing'] else datetime.now() | |
346 | if datetime.now() - oldest > self.STAT_PERIOD_MAX_AGE: | |
347 | self.stats['timing'].pop(0) | |
348 | self.stats['timing'].append( | |
349 | (before, after, length)) | |
350 | ||
351 | def getPairOrError(self, langpair, text_length): | |
262 | 352 | try: |
263 | l1, l2 = map(toAlpha3Code, self.get_argument('langpair').split('|')) | |
353 | l1, l2 = map(toAlpha3Code, langpair.split('|')) | |
264 | 354 | except ValueError: |
265 | 355 | self.send_error(400, explanation='That pair is invalid, use e.g. eng|spa') |
266 | if self.scaleMtLogs: | |
267 | before = datetime.now() | |
268 | tInfo = TranslationInfo(self) | |
269 | key = getKey(tInfo.key) | |
270 | after = datetime.now() | |
271 | scaleMtLog(400, after-before, tInfo, key, len(toTranslate)) | |
272 | return | |
273 | ||
274 | if '%s-%s' % (l1, l2) in self.pairs: | |
275 | before = self.logBeforeTranslation() | |
276 | lock = self.getPipeLock(l1, l2) | |
277 | _, commands = self.getPipeCmds(l1, l2) | |
278 | pipeline = self.getPipeline(l1, l2) | |
279 | translated = yield translation.translate(toTranslate, lock, pipeline, commands) | |
280 | self.logAfterTranslation(before, toTranslate) | |
281 | self.sendResponse({ | |
282 | 'responseData': { | |
283 | 'translatedText': self.maybeStripMarks(markUnknown, l1, l2, translated) | |
284 | }, | |
285 | 'responseDetails': None, | |
286 | 'responseStatus': 200 | |
287 | }) | |
288 | self.notePairUsage((l1, l2)) | |
289 | self.cleanPairs() | |
290 | else: | |
356 | self.logAfterTranslation(self.logBeforeTranslation(), text_length) | |
357 | return None | |
358 | if '%s-%s' % (l1, l2) not in self.pairs: | |
291 | 359 | self.send_error(400, explanation='That pair is not installed') |
292 | if self.scaleMtLogs: | |
293 | before = datetime.now() | |
294 | tInfo = TranslationInfo(self) | |
295 | key = getKey(tInfo.key) | |
296 | after = datetime.now() | |
297 | scaleMtLog(400, after-before, tInfo, key, len(toTranslate)) | |
360 | self.logAfterTranslation(self.logBeforeTranslation(), text_length) | |
361 | return None | |
362 | else: | |
363 | return (l1, l2) | |
364 | ||
365 | @gen.coroutine | |
366 | def translateAndRespond(self, pair, pipeline, toTranslate, markUnknown, nosplit=False): | |
367 | markUnknown = markUnknown in ['yes', 'true', '1'] | |
368 | self.notePairUsage(pair) | |
369 | before = self.logBeforeTranslation() | |
370 | translated = yield pipeline.translate(toTranslate, nosplit) | |
371 | self.logAfterTranslation(before, len(toTranslate)) | |
372 | self.sendResponse({ | |
373 | 'responseData': { | |
374 | 'translatedText': self.maybeStripMarks(markUnknown, pair, translated) | |
375 | }, | |
376 | 'responseDetails': None, | |
377 | 'responseStatus': 200 | |
378 | }) | |
379 | self.cleanPairs() | |
380 | ||
381 | @gen.coroutine | |
382 | def get(self): | |
383 | pair = self.getPairOrError(self.get_argument('langpair'), | |
384 | len(self.get_argument('q'))) | |
385 | if pair is not None: | |
386 | pipeline = self.getPipeline(pair) | |
387 | yield self.translateAndRespond(pair, | |
388 | pipeline, | |
389 | self.get_argument('q'), | |
390 | self.get_argument('markUnknown', default='yes')) | |
391 | ||
392 | ||
393 | class TranslatePageHandler(TranslateHandler): | |
394 | def htmlToText(self, html, url): | |
395 | if chardet: | |
396 | encoding = chardet.detect(html).get("encoding", "utf-8") | |
397 | else: | |
398 | encoding = "utf-8" | |
399 | text = html.decode(encoding) | |
400 | text = text.replace('href="/', 'href="{uri.scheme}://{uri.netloc}/'.format(uri=urlparse(url))) | |
401 | text = re.sub(r'a([^>]+)href=[\'"]?([^\'" >]+)', 'a \\1 href="#" onclick=\'window.parent.translateLink("\\2");\'', text) | |
402 | return text | |
403 | ||
404 | @gen.coroutine | |
405 | def get(self): | |
406 | pair = self.getPairOrError(self.get_argument('langpair'), | |
407 | # Don't yet know the size of the text, and don't want to fetch it unnecessarily: | |
408 | -1) | |
409 | if pair is not None: | |
410 | pipeline = self.getPipeline(pair) | |
411 | http_client = httpclient.AsyncHTTPClient() | |
412 | url = self.get_argument('url') | |
413 | request = httpclient.HTTPRequest(url=url, | |
414 | # TODO: tweak | |
415 | connect_timeout=20.0, | |
416 | request_timeout=20.0) | |
417 | response = yield http_client.fetch(request) | |
418 | toTranslate = self.htmlToText(response.body, url) | |
419 | yield self.translateAndRespond(pair, | |
420 | pipeline, | |
421 | toTranslate, | |
422 | self.get_argument('markUnknown', default='yes'), | |
423 | nosplit=True) | |
424 | ||
298 | 425 | |
299 | 426 | class TranslateDocHandler(TranslateHandler): |
300 | 427 | mimeTypeCommand = None |
333 | 460 | else: |
334 | 461 | return mimeType |
335 | 462 | |
463 | # TODO: Some kind of locking. Although we can't easily re-use open | |
464 | # pairs here (would have to reimplement lots of | |
465 | # /usr/bin/apertium), we still want some limits on concurrent doc | |
466 | # translation. | |
336 | 467 | @tornado.web.asynchronous |
337 | 468 | def get(self): |
338 | 469 | try: |
339 | 470 | l1, l2 = map(toAlpha3Code, self.get_argument('langpair').split('|')) |
340 | 471 | except ValueError: |
341 | 472 | self.send_error(400, explanation='That pair is invalid, use e.g. eng|spa') |
473 | ||
474 | markUnknown = self.get_argument('markUnknown', default='yes') in ['yes', 'true', '1'] | |
342 | 475 | |
343 | 476 | allowedMimeTypes = { |
344 | 477 | 'text/plain': 'txt', |
368 | 501 | self.request.headers['Content-Type'] = 'application/octet-stream' |
369 | 502 | self.request.headers['Content-Disposition'] = 'attachment' |
370 | 503 | |
371 | self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)])) | |
504 | if markUnknown: | |
505 | self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)],True)) | |
506 | else: | |
507 | self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)],False)) | |
372 | 508 | self.finish() |
373 | 509 | else: |
374 | 510 | self.send_error(400, explanation='Invalid file type %s' % mtype) |
375 | 511 | else: |
376 | 512 | self.send_error(400, explanation='That pair is not installed') |
377 | 513 | |
514 | ||
378 | 515 | class AnalyzeHandler(BaseHandler): |
379 | @tornado.web.asynchronous | |
380 | @tornado.gen.coroutine | |
381 | def get(self): | |
382 | mode = toAlpha3Code(self.get_argument('lang')) | |
383 | toAnalyze = self.get_argument('q') | |
384 | ||
385 | def handleAnalysis(analysis): | |
386 | if analysis is None: | |
387 | self.send_error(408, explanation='Request timed out') | |
388 | else: | |
389 | lexicalUnits = removeLast(toAnalyze, re.findall(r'\^([^\$]*)\$([^\^]*)', analysis)) | |
390 | self.sendResponse([(lexicalUnit[0], lexicalUnit[0].split('/')[0] + lexicalUnit[1]) for lexicalUnit in lexicalUnits]) | |
391 | ||
392 | if mode in self.analyzers: | |
393 | pool = Pool(processes=1) | |
394 | result = pool.apply_async(apertium, [toAnalyze, self.analyzers[mode][0], self.analyzers[mode][1]]) | |
395 | pool.close() | |
396 | ||
397 | @run_async_thread | |
398 | def worker(callback): | |
399 | try: | |
400 | callback(result.get(timeout=self.timeout)) | |
401 | except TimeoutError: | |
402 | pool.terminate() | |
403 | callback(None) | |
404 | ||
405 | analysis = yield tornado.gen.Task(worker) | |
406 | handleAnalysis(analysis) | |
516 | def postproc_text(self, in_text, result): | |
517 | lexical_units = util.removeDotFromDeformat(in_text, re.findall(r'\^([^\$]*)\$([^\^]*)', result)) | |
518 | return [(lu[0], lu[0].split('/')[0] + lu[1]) | |
519 | for lu | |
520 | in lexical_units] | |
521 | ||
522 | @tornado.web.asynchronous | |
523 | @gen.coroutine | |
524 | def get(self): | |
525 | in_text = self.get_argument('q') | |
526 | in_mode = toAlpha3Code(self.get_argument('lang')) | |
527 | if in_mode in self.analyzers: | |
528 | [path, mode] = self.analyzers[in_mode] | |
529 | formatting = 'txt' | |
530 | commands = [['apertium', '-d', path, '-f', formatting, mode]] | |
531 | result = yield translation.translateSimple(in_text, commands) | |
532 | self.sendResponse(self.postproc_text(in_text, result)) | |
407 | 533 | else: |
408 | 534 | self.send_error(400, explanation='That mode is not installed') |
409 | 535 | |
536 | ||
410 | 537 | class GenerateHandler(BaseHandler): |
538 | def preproc_text(self, in_text): | |
539 | lexical_units = re.findall(r'(\^[^\$]*\$[^\^]*)', in_text) | |
540 | if len(lexical_units) == 0: | |
541 | lexical_units = ['^%s$' % (in_text,)] | |
542 | return lexical_units, '[SEP]'.join(lexical_units) | |
543 | ||
544 | def postproc_text(self, lexical_units, result): | |
545 | return [(generation, lexical_units[i]) | |
546 | for (i, generation) | |
547 | in enumerate(result.split('[SEP]'))] | |
548 | ||
411 | 549 | @tornado.web.asynchronous |
412 | 550 | @gen.coroutine |
413 | 551 | def get(self): |
414 | mode = toAlpha3Code(self.get_argument('lang')) | |
415 | toGenerate = self.get_argument('q') | |
416 | ||
417 | def handleGeneration(generated): | |
418 | if generated is None: | |
419 | self.send_error(408, explanation='Request timed out') | |
420 | else: | |
421 | generated = removeLast(toGenerate, generated) | |
422 | self.sendResponse([(generation, lexicalUnits[index]) for (index, generation) in enumerate(generated.split('[SEP]'))]) | |
423 | ||
424 | if mode in self.generators: | |
425 | lexicalUnits = re.findall(r'(\^[^\$]*\$[^\^]*)', toGenerate) | |
426 | if len(lexicalUnits) == 0: | |
427 | lexicalUnits = ['^%s$' % toGenerate] | |
428 | pool = Pool(processes=1) | |
429 | result = pool.apply_async(apertium, ('[SEP]'.join(lexicalUnits), self.generators[mode][0], self.generators[mode][1]), {'formatting': 'none'}) | |
430 | pool.close() | |
431 | ||
432 | @run_async_thread | |
433 | def worker(callback): | |
434 | try: | |
435 | callback(result.get(timeout=self.timeout)) | |
436 | except TimeoutError: | |
437 | pool.terminate() | |
438 | callback(None) | |
439 | ||
440 | generated = yield tornado.gen.Task(worker) | |
441 | handleGeneration(generated) | |
552 | in_text = self.get_argument('q') | |
553 | in_mode = toAlpha3Code(self.get_argument('lang')) | |
554 | if in_mode in self.generators: | |
555 | [path, mode] = self.generators[in_mode] | |
556 | formatting = 'none' | |
557 | commands = [['apertium', '-d', path, '-f', formatting, mode]] | |
558 | lexical_units, to_generate = self.preproc_text(in_text) | |
559 | result = yield translation.translateSimple(to_generate, commands) | |
560 | self.sendResponse(self.postproc_text(lexical_units, result)) | |
442 | 561 | else: |
443 | 562 | self.send_error(400, explanation='That mode is not installed') |
563 | ||
444 | 564 | |
445 | 565 | class ListLanguageNamesHandler(BaseHandler): |
446 | 566 | @tornado.web.asynchronous |
467 | 587 | else: |
468 | 588 | self.sendResponse({}) |
469 | 589 | |
590 | ||
470 | 591 | class PerWordHandler(BaseHandler): |
471 | 592 | @tornado.web.asynchronous |
472 | 593 | @gen.coroutine |
536 | 657 | output = yield tornado.gen.Task(worker) |
537 | 658 | handleOutput(output) |
538 | 659 | |
660 | ||
539 | 661 | class CoverageHandler(BaseHandler): |
540 | 662 | @tornado.web.asynchronous |
541 | 663 | @gen.coroutine |
570 | 692 | else: |
571 | 693 | self.send_error(400, explanation='That mode is not installed') |
572 | 694 | |
695 | ||
573 | 696 | class IdentifyLangHandler(BaseHandler): |
574 | 697 | @tornado.web.asynchronous |
575 | 698 | def get(self): |
583 | 706 | possibleLangs = filter(lambda x: x[1] != 'un', cldResults[2]) |
584 | 707 | self.sendResponse({toAlpha3Code(possibleLang[1]): possibleLang[2] for possibleLang in possibleLangs}) |
585 | 708 | else: |
586 | self.sendResponse({'nob': 100}) # TODO: Some more reasonable response | |
709 | self.sendResponse({'nob': 100}) # TODO: Some more reasonable response | |
587 | 710 | else: |
588 | 711 | def handleCoverages(coverages): |
589 | 712 | self.sendResponse(coverages) |
593 | 716 | pool.close() |
594 | 717 | try: |
595 | 718 | coverages = result.get(timeout=self.timeout) |
719 | # TODO: Coverages are not actually sent!! | |
596 | 720 | except TimeoutError: |
597 | 721 | self.send_error(408, explanation='Request timed out') |
598 | 722 | pool.terminate() |
599 | 723 | |
724 | ||
600 | 725 | class GetLocaleHandler(BaseHandler): |
601 | 726 | @tornado.web.asynchronous |
602 | 727 | def get(self): |
605 | 730 | self.sendResponse(locales) |
606 | 731 | else: |
607 | 732 | self.send_error(400, explanation='Accept-Language missing from request headers') |
733 | ||
608 | 734 | |
609 | 735 | class PipeDebugHandler(BaseHandler): |
610 | 736 | |
634 | 760 | |
635 | 761 | self.sendResponse({ |
636 | 762 | 'responseData': {'output': output, 'pipeline': pipeline}, |
637 | 'responseDetails': None, | |
638 | 'responseStatus': 200 | |
763 | 'responseDetails': None, | |
764 | 'responseStatus': 200 | |
639 | 765 | }) |
640 | 766 | |
641 | missingFreqsDb = '' | |
642 | ||
643 | def setupHandler(port, pairs_path, nonpairs_path, langNames, missingFreqs, timeout, max_idle_secs, verbosity=0, scaleMtLogs=False, memory=0): | |
767 | ||
768 | def setupHandler(port, pairs_path, nonpairs_path, langNames, missingFreqsPath, timeout, max_pipes_per_pair, min_pipes_per_pair, max_users_per_pipe, max_idle_secs, restart_pipe_after, verbosity=0, scaleMtLogs=False, memory=1000): | |
644 | 769 | |
645 | 770 | global missingFreqsDb |
646 | missingFreqsDb= missingFreqs | |
771 | if missingFreqsPath: | |
772 | missingFreqsDb = missingdb.MissingDb(missingFreqsPath, memory) | |
647 | 773 | |
648 | 774 | Handler = BaseHandler |
649 | 775 | Handler.langNames = langNames |
650 | Handler.missingFreqs = missingFreqs | |
651 | 776 | Handler.timeout = timeout |
777 | Handler.max_pipes_per_pair = max_pipes_per_pair | |
778 | Handler.min_pipes_per_pair = min_pipes_per_pair | |
779 | Handler.max_users_per_pipe = max_users_per_pipe | |
652 | 780 | Handler.max_idle_secs = max_idle_secs |
781 | Handler.restart_pipe_after = restart_pipe_after | |
653 | 782 | Handler.scaleMtLogs = scaleMtLogs |
654 | Handler.inMemoryUnknown = True if memory > 0 else False | |
655 | Handler.inMemoryLimit = memory | |
656 | 783 | Handler.verbosity = verbosity |
657 | 784 | |
658 | 785 | modes = searchPath(pairs_path, verbosity=verbosity) |
673 | 800 | Handler.taggers[lang_pair] = (dirpath, modename) |
674 | 801 | |
675 | 802 | |
803 | def sanity_check(): | |
804 | locale_vars = ["LANG", "LC_ALL"] | |
805 | u8 = re.compile("UTF-?8", re.IGNORECASE) | |
806 | if not any(re.search(u8, os.environ.get(key, "")) | |
807 | for key in locale_vars): | |
808 | print("servlet.py: error: APY needs a UTF-8 locale, please set LANG or LC_ALL", | |
809 | file=sys.stderr) | |
810 | sys.exit(1) | |
811 | ||
676 | 812 | if __name__ == '__main__': |
677 | parser = argparse.ArgumentParser(description='Start Apertium APY') | |
813 | sanity_check() | |
814 | parser = argparse.ArgumentParser(description='Apertium APY -- API server for machine translation and language analysis') | |
678 | 815 | parser.add_argument('pairs_path', help='path to Apertium installed pairs (all modes files in this path are included)') |
679 | 816 | parser.add_argument('-s', '--nonpairs-path', help='path to Apertium SVN (only non-translator debug modes are included from this path)') |
680 | 817 | parser.add_argument('-l', '--lang-names', help='path to localised language names sqlite database (default = langNames.db)', default='langNames.db') |
683 | 820 | parser.add_argument('-c', '--ssl-cert', help='path to SSL Certificate', default=None) |
684 | 821 | parser.add_argument('-k', '--ssl-key', help='path to SSL Key File', default=None) |
685 | 822 | parser.add_argument('-t', '--timeout', help='timeout for requests (default = 10)', type=int, default=10) |
686 | parser.add_argument('-j', '--num-processes', help='number of processes to run (default = number of cores)', type=int, default=0) | |
823 | parser.add_argument('-j', '--num-processes', help='number of processes to run (default = 1; use 0 to run one http server per core, where each http server runs all available language pairs)', nargs='?', type=int, default=1) | |
687 | 824 | parser.add_argument('-d', '--daemon', help='daemon mode: redirects stdout and stderr to files apertium-apy.log and apertium-apy.err ; use with --log-path', action='store_true') |
688 | 825 | parser.add_argument('-P', '--log-path', help='path to log output files to in daemon mode; defaults to local directory', default='./') |
689 | parser.add_argument('-m', '--max-idle-secs', help='shut down pipelines it have not been used in this many seconds', type=int, default=0) | |
826 | parser.add_argument('-i', '--max-pipes-per-pair', help='how many pipelines we can spin up per language pair (default = 1)', type=int, default=1) | |
827 | parser.add_argument('-n', '--min-pipes-per-pair', help='when shutting down pipelines, keep at least this many open per language pair (default = 0)', type=int, default=0) | |
828 | parser.add_argument('-u', '--max-users-per-pipe', help='how many concurrent requests per pipeline before we consider spinning up a new one (default = 5)', type=int, default=5) | |
829 | parser.add_argument('-m', '--max-idle-secs', help='if specified, shut down pipelines that have not been used in this many seconds', type=int, default=0) | |
830 | parser.add_argument('-r', '--restart-pipe-after', help='restart a pipeline if it has had this many requests (default = 1000)', type=int, default=1000) | |
690 | 831 | parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0) |
832 | parser.add_argument('-V', '--version', help='show APY version', action='version', version="%(prog)s version " + __version__) | |
691 | 833 | parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true') |
692 | parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached", type=int, default=0) | |
834 | parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached (default = 1000)", type=int, default=1000) | |
835 | parser.add_argument('-T', '--stat-period-max-age', help="How many seconds back to keep track request timing stats (default = 3600)", type=int, default=3600) | |
693 | 836 | args = parser.parse_args() |
694 | 837 | |
695 | 838 | if args.daemon: |
706 | 849 | logger = logging.getLogger('scale-mt') |
707 | 850 | logger.propagate = False |
708 | 851 | smtlog = os.path.join(args.log_path, 'ScaleMTRequests.log') |
709 | loggingHandler = logging.handlers.TimedRotatingFileHandler(smtlog,'midnight',0) | |
852 | loggingHandler = logging.handlers.TimedRotatingFileHandler(smtlog, 'midnight', 0) | |
710 | 853 | loggingHandler.suffix = "%Y-%m-%d" |
711 | 854 | logger.addHandler(loggingHandler) |
712 | 855 | |
714 | 857 | if(args.daemon): |
715 | 858 | logging.getLogger("tornado.access").propagate = False |
716 | 859 | |
860 | if args.stat_period_max_age: | |
861 | BaseHandler.STAT_PERIOD_MAX_AGE = timedelta(0, args.stat_period_max_age, 0) | |
862 | ||
717 | 863 | if not cld2: |
718 | logging.warning('Unable to import CLD2, continuing using naive method of language detection') | |
719 | ||
720 | setupHandler(args.port, args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout, args.max_idle_secs, args.verbosity, args.scalemt_logs, args.unknown_memory_limit) | |
864 | logging.warning("Unable to import CLD2, continuing using naive method of language detection") | |
865 | if not chardet: | |
866 | logging.warning("Unable to import chardet, assuming utf-8 encoding for all websites") | |
867 | ||
868 | setupHandler(args.port, args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout, args.max_pipes_per_pair, args.min_pipes_per_pair, args.max_users_per_pipe, args.max_idle_secs, args.restart_pipe_after, args.verbosity, args.scalemt_logs, args.unknown_memory_limit) | |
721 | 869 | |
722 | 870 | application = tornado.web.Application([ |
723 | 871 | (r'/', RootHandler), |
726 | 874 | (r'/stats', StatsHandler), |
727 | 875 | (r'/translate', TranslateHandler), |
728 | 876 | (r'/translateDoc', TranslateDocHandler), |
877 | (r'/translatePage', TranslatePageHandler), | |
729 | 878 | (r'/analy[sz]e', AnalyzeHandler), |
730 | 879 | (r'/generate', GenerateHandler), |
731 | 880 | (r'/listLanguageNames', ListLanguageNamesHandler), |
738 | 887 | |
739 | 888 | global http_server |
740 | 889 | if args.ssl_cert and args.ssl_key: |
741 | http_server = tornado.httpserver.HTTPServer(application, ssl_options = { | |
890 | http_server = tornado.httpserver.HTTPServer(application, ssl_options={ | |
742 | 891 | 'certfile': args.ssl_cert, |
743 | 892 | 'keyfile': args.ssl_key, |
744 | 893 | }) |
752 | 901 | |
753 | 902 | http_server.bind(args.port) |
754 | 903 | http_server.start(args.num_processes) |
755 | tornado.ioloop.IOLoop.instance().start() | |
904 | ||
905 | loop = tornado.ioloop.IOLoop.instance() | |
906 | wd = systemd.setup_watchdog() | |
907 | if wd is not None: | |
908 | wd.systemd_ready() | |
909 | logging.info("Initialised systemd watchdog, pinging every {}s".format(1000*wd.period)) | |
910 | tornado.ioloop.PeriodicCallback(wd.watchdog_ping, 1000*wd.period, loop).start() | |
911 | loop.start() |
0 | #!/usr/bin/env python3 | |
1 | # -*- indent-tabs-mode: nil -*- | |
2 | # coding=utf-8 | |
3 | # -*- encoding: utf-8 -*- | |
4 | ||
5 | """ | |
6 | Copyright (C) 2016 Kevin Brubeck Unhammer | |
7 | based on https://gist.github.com/Spindel/1d07533ef94a4589d348 / watchdogged.py | |
8 | Copyright (C) 2015 D.S. Ljungmark, Modio AB | |
9 | This program is free software: you can redistribute it and/or modify | |
10 | it under the terms of the GNU General Public License as published by | |
11 | the Free Software Foundation, either version 3 of the License, or | |
12 | (at your option) any later version. | |
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | You should have received a copy of the GNU General Public License | |
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | """ | |
20 | ||
21 | import logging | |
22 | import socket | |
23 | import os | |
24 | ||
25 | # All singletons are prefixed "the" | |
26 | theLog = logging.getLogger(__name__) | |
27 | ||
28 | ||
29 | def watchdog_period(): | |
30 | """Return the time (in seconds) that we need to ping within.""" | |
31 | val = os.environ.get("WATCHDOG_USEC", None) | |
32 | if not val: | |
33 | return None | |
34 | return int(val)/1000000 | |
35 | ||
36 | ||
37 | def notify_socket(clean_environment=True): | |
38 | """Return a tuple of address, socket for future use. | |
39 | clean_environment removes the variables from env to prevent children | |
40 | from inheriting it and doing something wrong. | |
41 | """ | |
42 | _empty = None, None | |
43 | address = os.environ.get("NOTIFY_SOCKET", None) | |
44 | if clean_environment: | |
45 | address = os.environ.pop("NOTIFY_SOCKET", None) | |
46 | ||
47 | if not address: | |
48 | return _empty | |
49 | ||
50 | if len(address) == 1: | |
51 | return _empty | |
52 | ||
53 | if address[0] not in ("@", "/"): | |
54 | return _empty | |
55 | ||
56 | if address[0] == "@": | |
57 | address = "\0" + address[1:] | |
58 | ||
59 | # SOCK_CLOEXEC was added in Python 3.2 and requires Linux >= 2.6.27. | |
60 | # It means "close this socket after fork/exec() | |
61 | try: | |
62 | sock = socket.socket(socket.AF_UNIX, | |
63 | socket.SOCK_DGRAM | socket.SOCK_CLOEXEC) | |
64 | except AttributeError: | |
65 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) | |
66 | ||
67 | return address, sock | |
68 | ||
69 | ||
70 | class Watchdog(object): | |
71 | def __init__(self, period, address, sock): | |
72 | # "The daemon should then issue sd_notify("WATCHDOG=1") calls every half of that interval." | |
73 | self.period = float(period)/2.0 | |
74 | self.address = address | |
75 | self.sock = sock | |
76 | ||
77 | def __del__(self): | |
78 | self.systemd_stop() | |
79 | ||
80 | def sd_message(self, message): | |
81 | """Send a message to the systemd bus/socket. | |
82 | message is expected to be bytes. | |
83 | """ | |
84 | if not (self.address and self.sock and message): | |
85 | theLog.info("Couldn't message! {} {} {}".format(self.address, self.sock, message)) | |
86 | return False | |
87 | assert isinstance(message, bytes) | |
88 | ||
89 | try: | |
90 | retval = self.sock.sendto(message, self.address) | |
91 | except socket.error: | |
92 | return False | |
93 | return (retval > 0) | |
94 | ||
95 | def watchdog_ping(self): | |
96 | """Helper function to send a watchdog ping.""" | |
97 | return self.sd_message(b"WATCHDOG=1") | |
98 | ||
99 | def systemd_ready(self): | |
100 | """Helper function to send a ready signal.""" | |
101 | return self.sd_message(b"READY=1") | |
102 | ||
103 | def systemd_stop(self): | |
104 | """Helper function to signal service stopping.""" | |
105 | return self.sd_message(b"STOPPING=1") | |
106 | ||
107 | ||
108 | def setup_watchdog(): | |
109 | # Get our settings from the environment | |
110 | notify = notify_socket() | |
111 | period = watchdog_period() | |
112 | # Validate some in-data | |
113 | if not notify[0]: | |
114 | theLog.info("No notification socket, not launched via systemd?") | |
115 | return None | |
116 | if not period: | |
117 | theLog.warning("Found systemd notification socket, but no watchdog period set in the unit file!") | |
118 | return None | |
119 | wd = Watchdog(period, *notify) | |
120 | return wd |
0 | The tests require some test data. | |
1 | ||
2 | To install the test data on Debian-based systems, first install core | |
3 | tools as show at http://wiki.apertium.org/wiki/Debian and then do | |
4 | ||
5 | sudo apt-get install apertium-sme-nob apertium-es-en | |
6 | mkdir ~/apy-testdata | |
7 | cd ~/apy-testdata | |
8 | svn co https://svn.code.sf.net/p/apertium/svn/languages/apertium-nno | |
9 | cd apertium-nno | |
10 | ./autogen.sh | |
11 | make -j4 | |
12 | ||
13 | Now go back to the apy directory, and do | |
14 | ||
15 | NONPAIRS=~/apy-testdata t/run-tests | |
16 | ||
17 | to run the tests. |
0 | #!/bin/bash | |
1 | ||
2 | ### Put e.g. in | |
3 | # | |
4 | # #!/bin/sh | |
5 | # NONPAIRS=/path/to/apertium-nno t/run-tests | |
6 | # | |
7 | ### in your .git/hooks/pre-commit and chmod +x .git/hooks/pre-commit | |
8 | ### | |
9 | ### Optional first argument is a free port number to use. | |
10 | ||
11 | ### Tests (TODO: get these from a file instead): | |
12 | declare -ar INPUTS=( "government" "ja" "ikkje" "ja<ij>" "^ja<ij>$" "ignored" "ignored") | |
13 | declare -ar OUTPUTS=( "Gobierno" "og" "ikkje/ikkje<adv>" "ja" "ja" "400" "400") | |
14 | declare -ar MODES=( "eng|spa" "sme|nob" "nno" "nno" "nno" "typomode" "non|mod") | |
15 | declare -ar TYPES=( "translate" "translate" "analyse" "generate" "generate" "translate?" "translate?") | |
16 | declare -ar EXTRACTS=( "" "" "" "" "" ".code" ".code") | |
17 | ||
18 | ### Paths to apertium test data: | |
19 | ### The tests assume you have apertium-sme-nob and apertium-en-es | |
20 | ### installed, and apertium-nno checked out from SVN and compiled and | |
21 | ### available from INSTALLEDPAIRS and NONPAIRS directories | |
22 | ### respectively: | |
23 | declare -r INSTALLEDPAIRS=${INSTALLEDPAIRS:-/usr/share/apertium} | |
24 | declare -r NONPAIRS=${NONPAIRS:-/l/a/languages} | |
25 | ### You don't have to change these variables here, instead run the | |
26 | ### script like this: | |
27 | # $ NONPAIRS=/path/to/apertium-nno t/run-tests | |
28 | ### to set the path to apertium-nno before running, or | |
29 | # $ NONPAIRS=/path/to/apertium-nno INSTALLEDPAIRS=~/local t/run-tests | |
30 | ### to set both paths before running. | |
31 | ||
32 | ||
33 | ### Actual script follows: | |
34 | set -e -u -o pipefail | |
35 | ||
36 | PORT="${1:-2737}" | |
37 | APYPID= | |
38 | cleanup () { | |
39 | [[ -n ${APYPID} ]] && kill "${APYPID}" | |
40 | } | |
41 | trap cleanup EXIT | |
42 | ||
43 | declare -r CHECK="[2K[999D[1;32m✓[00m" | |
44 | declare -r CROSS="[2K[999D[1;31m❌[00m" | |
45 | ||
46 | wait_for_startup () { | |
47 | local -i max_secs=10 | |
48 | local -i i=0 | |
49 | while [[ $i -lt ${max_secs} ]]; do | |
50 | echo -n "." | |
51 | sleep 1 | |
52 | if curl -s "http://localhost:${PORT}" >/dev/null; then | |
53 | echo "${CHECK} APY seems to have started up" | |
54 | return 0 | |
55 | fi | |
56 | (( i++ )) || true | |
57 | done | |
58 | echo "${CROSS} Waited ${max_secs} secs without any response from APY" | |
59 | return 1 | |
60 | } | |
61 | ||
62 | extract_response () { | |
63 | local type=${TYPES[$1]} | |
64 | local extract=${EXTRACTS[$1]} | |
65 | if [[ ${extract} = "" ]]; then | |
66 | case ${type} in | |
67 | translate) | |
68 | jq -r .responseData.translatedText | |
69 | ;; | |
70 | generate|analyse) | |
71 | jq -r .[][] | awk 'NR%2==1' | |
72 | ;; | |
73 | *) | |
74 | echo "Unknown test type ${type} and no method given in EXTRACTS" >&2 | |
75 | exit 1 | |
76 | ;; | |
77 | esac | |
78 | else | |
79 | jq -r "${extract}" | |
80 | fi | |
81 | } | |
82 | ||
83 | ensure_installed () { | |
84 | local type=${TYPES[$1]} | |
85 | local mode=${MODES[$1]} | |
86 | case ${type} in | |
87 | translate) | |
88 | curl -s "http://localhost:${PORT}/list?q=pairs" \ | |
89 | | jq -e ".responseData|map(.sourceLanguage+\"|\"+.targetLanguage)|index(\"$mode\")" &>/dev/null | |
90 | ;; | |
91 | generate) | |
92 | curl -s "http://localhost:${PORT}/list?q=generators" \ | |
93 | | jq -e "has(\"${mode}\")" &>/dev/null | |
94 | ;; | |
95 | analyse) | |
96 | curl -s "http://localhost:${PORT}/list?q=analysers" \ | |
97 | | jq -e "has(\"${mode}\")" &>/dev/null | |
98 | ;; | |
99 | # Anything else we let slide | |
100 | esac | |
101 | } | |
102 | ||
103 | run_test () { | |
104 | local -ri i=$1 | |
105 | local -r in=${INPUTS[$i]} | |
106 | local -r mode=${MODES[$i]} | |
107 | local -r type=${TYPES[$i]} | |
108 | local url="http://localhost:${PORT}/${type}?lang=${mode}&q=${in}" | |
109 | if [[ ${type} = translate ]]; then | |
110 | url="http://localhost:${PORT}/translate?langpair=${mode}&q=${in}" | |
111 | fi | |
112 | if ! ensure_installed "$i"; then | |
113 | cat <<EOF | |
114 | [1;31m❌[00m TEST FAILED FOR ${mode} ${type} | |
115 | It seems like ${mode} (${type}) is not installed; install ${mode} and | |
116 | set NONPAIRS/INSTALLEDPAIRS variables before running this script. | |
117 | ||
118 | EOF | |
119 | return 1 | |
120 | fi | |
121 | local -r got=$(curl -s "${url}" | extract_response "$i") | |
122 | local -r want=${OUTPUTS[$i]} | |
123 | if [[ ${got} != ${want} ]]; then | |
124 | cat <<EOF | |
125 | ||
126 | [1;31m❌[00m TEST FAILED FOR ${mode} ${type} | |
127 | WANTED: "${want}" | |
128 | GOT: "${got}" | |
129 | ||
130 | EOF | |
131 | return 1 | |
132 | fi | |
133 | return 0 | |
134 | } | |
135 | ||
136 | run_tests () { | |
137 | local -i failures=0 | |
138 | ||
139 | for (( i=0; i<${#INPUTS[@]}; i++ )); do | |
140 | if ! run_test "$i"; then | |
141 | (( failures++ )) || true | |
142 | fi | |
143 | done | |
144 | ||
145 | local got=$(curl -s "http://localhost:${PORT}/translate?langpair=typo&q=whatever" | jq -r .code) | |
146 | if [[ "${got}" -ne 400 ]]; then | |
147 | (( failures++ )) || true | |
148 | fi | |
149 | ||
150 | if [[ ${failures} -eq 0 ]]; then | |
151 | cat <<EOF | |
152 | ${CHECK} All $i tests passed | |
153 | EOF | |
154 | else | |
155 | cat <<EOF | |
156 | ${CROSS} Ran $i tests, ${failures} failures | |
157 | EOF | |
158 | return "${failures}" | |
159 | fi | |
160 | return 0 | |
161 | } | |
162 | ||
163 | ||
164 | if ! command -V jq &>/dev/null; then | |
165 | echo "Please install jq, e.g. 'sudo apt-get install jq'" 2>/dev/null | |
166 | fi | |
167 | ||
168 | if netstat -lnt|awk "\$4~/:${PORT}\$/"|grep -q .; then | |
169 | lsof -i :"${PORT}" | |
170 | echo >&2 | |
171 | echo "Port ${PORT} seems taken, can't run tests" >&2 | |
172 | echo "(you can pass an alternative port as first argument to this script)" >&2 | |
173 | exit 10 | |
174 | fi | |
175 | ||
176 | cd "$(dirname "$0")" | |
177 | rm -f apertium-apy.log apertium-apy.err | |
178 | ../servlet.py -p "${PORT}" -d -j1 -i3 -u1 -n1 -m3 -s "${NONPAIRS}" -- "${INSTALLEDPAIRS}" & APYPID=$! | |
179 | wait_for_startup | |
180 | if run_tests; then | |
181 | exit $? | |
182 | else | |
183 | result=$? | |
184 | tail -n 999 apertium-apy.log apertium-apy.err | |
185 | exit "${result}" | |
186 | fi | |
187 | ||
188 | # TODO: concurrency tests |
80 | 80 | <select name="langpair"> |
81 | 81 | </select> |
82 | 82 | <br> |
83 | <input type="checkbox" id="instant"> Instant translation | |
84 | <br> | |
83 | 85 | <label>Type text here.</label> |
84 | <textarea name='q' class='block'></textarea> | |
86 | <textarea name='q' id='q' class='block'></textarea> | |
85 | 87 | <br> |
86 | 88 | <input type="button" value="Submit" onClick="sendRequest()"> |
87 | 89 | </form> |
88 | 90 | <div id="responsediv"> |
89 | 91 | </div> |
92 | ||
93 | <script> | |
94 | ||
95 | var timer, lastPunct = false, punct = [46, 33, 58, 63, 47, 45, 190, 171, 49], timeoutPunct = 500, timeoutOther = 1000; | |
96 | var q = document.getElementById("q"); | |
97 | function timeout (event) { | |
98 | if(lastPunct && event.keyCode === 32 || event.keyCode === 13) { | |
99 | // Don't override the short timeout for simple space-after-punctuation | |
100 | return; | |
101 | } | |
102 | ||
103 | if(timer && document.getElementById("instant").checked) { | |
104 | clearTimeout(timer); | |
105 | } | |
106 | ||
107 | var timeout; | |
108 | ||
109 | if (punct.indexOf(event.keyCode) !== -1) { | |
110 | timeout = timeoutPunct; | |
111 | lastPunct = true; | |
112 | } | |
113 | else { | |
114 | timeout = timeoutOther; | |
115 | lastPunct = false; | |
116 | } | |
117 | ||
118 | timer = setTimeout(function () { | |
119 | if(document.getElementById("instant").checked) { | |
120 | sendRequest(); | |
121 | } | |
122 | }, timeout); | |
123 | } | |
124 | var query = document.getElementById("q"); | |
125 | q.addEventListener("keyup", timeout); | |
126 | q.addEventListener("paste", timeout); | |
127 | ||
128 | ||
129 | ||
130 | ||
131 | ||
132 | </script> | |
90 | 133 | </body> |
91 | </html>⏎ | |
134 | </html> |
2720 | 2720 | INSERT INTO "languageNames" VALUES(2735,'en','nl','Dutch'); |
2721 | 2721 | INSERT INTO "languageNames" VALUES(2736,'en','nn','Norwegian Nynorsk'); |
2722 | 2722 | INSERT INTO "languageNames" VALUES(2737,'en','no','Norwegian'); |
2723 | INSERT INTO "languageNames" VALUES(2738,'en','nog','Nogai'); | |
2723 | INSERT INTO "languageNames" VALUES(2738,'en','nog','Nogay'); | |
2724 | 2724 | INSERT INTO "languageNames" VALUES(2739,'en','oc','Occitan'); |
2725 | 2725 | INSERT INTO "languageNames" VALUES(2740,'en','os','Ossetic'); |
2726 | 2726 | INSERT INTO "languageNames" VALUES(2741,'en','pa','Punjabi'); |
2755 | 2755 | INSERT INTO "languageNames" VALUES(2770,'en','tl','Tagalog'); |
2756 | 2756 | INSERT INTO "languageNames" VALUES(2771,'en','tr','Turkish'); |
2757 | 2757 | INSERT INTO "languageNames" VALUES(2772,'en','tt','Tatar'); |
2758 | INSERT INTO "languageNames" VALUES(2773,'en','tyv','Tuvinian'); | |
2758 | INSERT INTO "languageNames" VALUES(2773,'en','tyv','Tuvan'); | |
2759 | 2759 | INSERT INTO "languageNames" VALUES(2774,'en','udm','Udmurt'); |
2760 | 2760 | INSERT INTO "languageNames" VALUES(2775,'en','uk','Ukrainian'); |
2761 | 2761 | INSERT INTO "languageNames" VALUES(2776,'en','ur','Urdu'); |
8893 | 8893 | INSERT INTO "fixes" VALUES(NULL,'ky','tk','түркмөнчө'); |
8894 | 8894 | INSERT INTO "fixes" VALUES(NULL,'ky','tr','түркчө'); |
8895 | 8895 | INSERT INTO "fixes" VALUES(NULL,'ky','uz','өзбекче'); |
8896 | INSERT INTO "fixes" VALUES(NULL,'ky','kaa','каракалпакча'); | |
8897 | INSERT INTO "fixes" VALUES(NULL,'ky','tyv','тывача'); | |
8898 | INSERT INTO "fixes" VALUES(NULL,'ky','sah','сахача (якутча)'); | |
8899 | INSERT INTO "fixes" VALUES(NULL,'ky','nog','ногойчо'); | |
8896 | 8900 | INSERT INTO "fixes" VALUES(NULL,'kk','az','әзірбайжан тілі'); |
8897 | 8901 | INSERT INTO "fixes" VALUES(NULL,'kk','ba','башқортша'); |
8898 | 8902 | INSERT INTO "fixes" VALUES(NULL,'kk','en','ағылшынша'); |
9109 | 9113 | INSERT INTO "fixes" VALUES(NULL,'kaa','kk','qazaqsha'); |
9110 | 9114 | INSERT INTO "fixes" VALUES(NULL,'kaa','kaa','qaraqalpaqsha'); |
9111 | 9115 | INSERT INTO "fixes" VALUES(NULL,'kaa','uz','oʻzbekshe'); |
9116 | INSERT INTO "fixes" VALUES(NULL,'kaa','tt','tatarsha'); | |
9117 | INSERT INTO "fixes" VALUES(NULL,'kaa','ky','qırgʻızsha'); | |
9118 | INSERT INTO "fixes" VALUES(NULL,'uz','kaa','qoraqalpoqcha'); | |
9119 | INSERT INTO "fixes" VALUES(NULL,'uz','nog','noʻgʻaycha'); | |
9120 | INSERT INTO "fixes" VALUES(NULL,'uz','sah','saxacha (yoqutcha)'); | |
9121 | INSERT INTO "fixes" VALUES(NULL,'uz','chv','chuvashcha'); | |
9122 | INSERT INTO "fixes" VALUES(NULL,'uz','tyv','tuvacha'); | |
9123 | INSERT INTO "fixes" VALUES(NULL,'uz','kum','qoʻmiqcha'); | |
9124 | INSERT INTO "fixes" VALUES(NULL,'uz','bak','boshqircha'); | |
9125 | INSERT INTO "fixes" VALUES(NULL,'tyv','tyv','тыва дылда'); | |
9126 | INSERT INTO "fixes" VALUES(NULL,'nog','nog','ногъайша'); | |
9127 | INSERT INTO "fixes" VALUES(NULL,'sah','sah','сахалыы'); | |
9128 | INSERT INTO "fixes" VALUES(NULL,'en','crh','Crimean Tatar'); | |
9129 | INSERT INTO "fixes" VALUES(NULL,'uz','crh','qrimtatarcha'); | |
9130 | INSERT INTO "fixes" VALUES(NULL,'az','crh','krımtatarca'); | |
9131 | INSERT INTO "fixes" VALUES(NULL,'bak','crh','Ҡырымтатарса'); | |
9132 | INSERT INTO "fixes" VALUES(NULL,'chv','crh','крымтутарла'); | |
9133 | INSERT INTO "fixes" VALUES(NULL,'crh','crh','qırımtatarca'); | |
9134 | INSERT INTO "fixes" VALUES(NULL,'kaa','crh','qırımtatarsha'); | |
9135 | INSERT INTO "fixes" VALUES(NULL,'tt','crh','кырымтатарча'); | |
9136 | INSERT INTO "fixes" VALUES(NULL,'ky','crh','кырымтатарча'); | |
9137 | INSERT INTO "fixes" VALUES(NULL,'kk','crh','қырымтатарша'); | |
9138 | INSERT INTO "fixes" VALUES(NULL,'tr','crh','Kırımtatarca'); | |
9139 | INSERT INTO "fixes" VALUES(NULL,'uig','crh','قرىمتاتارچا'); | |
9140 | INSERT INTO "fixes" VALUES(NULL,'kk','sah','сахаша (якутша)'); | |
9141 | INSERT INTO "fixes" VALUES(NULL,'kk','kaa','қарақалпақша'); | |
9142 | INSERT INTO "fixes" VALUES(NULL,'kk','tyv','тываша'); | |
9143 | INSERT INTO "fixes" VALUES(NULL,'ca','crh','tàtar de Crimea'); | |
9144 | INSERT INTO "fixes" VALUES(NULL,'eu','crh','Krimeako tatarera'); | |
9145 | INSERT INTO "fixes" VALUES(NULL,'en','oct_aran','Occitan Aranese'); | |
9146 | INSERT INTO "fixes" VALUES(NULL,'de','oct_aran','Okzitanisch Aranesisch'); | |
9112 | 9147 | |
9113 | 9148 | INSERT INTO "languageNames" (lg, inLg, name) select lg, inLg, name from "fixes"; |
9114 | 9149 | COMMIT; |
15 | 15 | "urd-hin": ("تحریر ہیں", "लेख हैं"), |
16 | 16 | "afr-nld": ("ek", "ik"), |
17 | 17 | "ara-mlt": ("و", "u"), |
18 | "ara-mlt_translit": ("و", "u"), | |
19 | "arg-cat": ("e", "és"), | |
20 | "cat-arg": ("la", "a"), | |
18 | 21 | "arg-spa": ("e", "es"), |
19 | 22 | "spa-arg": ("la", "a"), |
20 | 23 | "ast-spa": ("nin", "ni"), |
53 | 56 | "mkd-eng": ("триесет", "thirty"), |
54 | 57 | "mlt-ara": ("u", "و"), |
55 | 58 | "nld-afr": ("ik", "ek"), |
59 | "nno-swe": ("kva", "vad"), | |
60 | "swe-nno": ("vad", "kva"), | |
61 | "swe-nob": ("vad", "hva"), | |
56 | 62 | "nno-dan": ("kva", "hvad"), |
63 | "dan-nno": ("hvad", "kva"), | |
64 | "dan-nob": ("hvad", "hva"), | |
57 | 65 | "nno_e-nno": ("korleis", "korleis"), |
58 | 66 | "nno-nob": ("korleis", "hvordan"), |
59 | 67 | "nno-nno_e": ("korleis", "korleis"), |
88 | 96 | "spa-por": ("tengo", "tenho"), |
89 | 97 | "spa-por_BR": ("tengo", "tenho"), |
90 | 98 | "swe-dan": ("vad", "hvad"), |
99 | "dan-swe": ("hvad", "vad"), | |
91 | 100 | "swe-isl": ("Av", "Af"), |
92 | 101 | "tat-kaz": ("ул", "ол"), |
93 | 102 | } |
11 | 11 | # By default, if it restarts >10 times within 5 secs, it marks it as failed and gives up: |
12 | 12 | Restart=always |
13 | 13 | |
14 | # Restart the service if it doesn't do a watchdog ping within 10 seconds: | |
15 | WatchdogSec=10s | |
16 | ||
14 | 17 | # No reason to have access to shared tmp files: |
15 | 18 | PrivateTmp=yes |
16 | 19 |
51 | 51 | INSERT INTO "fixes" VALUES(NULL,'uz','ug','uyg\'urcha'); |
52 | 52 | INSERT INTO "fixes" VALUES(NULL,'kk','ug','ұйғұрша'); |
53 | 53 | INSERT INTO "fixes" VALUES(NULL,'ky','ug','уйгурча'); |
54 | INSERT INTO "fixes" VALUES(NULL,'en','crh','Crimean Tatar'); | |
54 | 55 | CREATE TABLE languageNames (id integer primary key, lg text, inLg text, name text, unique(lg, inLg) on conflict replace); |
55 | 56 | INSERT INTO "languageNames" VALUES(NULL,'sah','af','Аппырыкааныстыы'); |
56 | 57 | INSERT INTO "languageNames" VALUES(NULL,'sah','ar','Араабтыы'); |
0 | # From https://github.com/ajdavis/toro/ | |
1 | ||
2 | # Toro Copyright (c) 2012 A. Jesse Jiryu Davis | |
3 | ||
4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may | |
5 | # not use this file except in compliance with the License. You may obtain | |
6 | # a copy of the License at | |
7 | ||
8 | # http://www.apache.org/licenses/LICENSE-2.0 | |
9 | ||
10 | # Unless required by applicable law or agreed to in writing, software | |
11 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
12 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
13 | # License for the specific language governing permissions and limitations | |
14 | # under the License. | |
15 | ||
16 | ||
17 | import contextlib | |
18 | import heapq | |
19 | import collections | |
20 | from functools import partial | |
21 | from queue import Full, Empty | |
22 | ||
23 | from tornado import ioloop | |
24 | from tornado import gen | |
25 | from tornado.concurrent import Future | |
26 | ||
27 | ||
28 | version_tuple = (0, 8, '+') | |
29 | ||
30 | version = '.'.join(map(str, version_tuple)) | |
31 | """Current version of Toro.""" | |
32 | ||
33 | ||
34 | __all__ = [ | |
35 | # Exceptions | |
36 | 'NotReady', 'AlreadySet', 'Full', 'Empty', 'Timeout', | |
37 | ||
38 | # Primitives | |
39 | 'AsyncResult', 'Event', 'Condition', 'Semaphore', 'BoundedSemaphore', | |
40 | 'Lock', | |
41 | ||
42 | # Queues | |
43 | 'Queue', 'PriorityQueue', 'LifoQueue', 'JoinableQueue' | |
44 | ] | |
45 | ||
46 | ||
47 | class NotReady(Exception): | |
48 | """Raised when accessing an :class:`AsyncResult` that has no value yet.""" | |
49 | pass | |
50 | ||
51 | ||
52 | class AlreadySet(Exception): | |
53 | """Raised when setting a value on an :class:`AsyncResult` that already | |
54 | has one.""" | |
55 | pass | |
56 | ||
57 | ||
58 | class Timeout(Exception): | |
59 | """Raised when a deadline passes before a Future is ready.""" | |
60 | ||
61 | def __str__(self): | |
62 | return "Timeout" | |
63 | ||
64 | ||
65 | class _TimeoutFuture(Future): | |
66 | ||
67 | def __init__(self, deadline, io_loop): | |
68 | """Create a Future with optional deadline. | |
69 | ||
70 | If deadline is not None, it may be a number denoting a unix timestamp | |
71 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` object | |
72 | for a deadline relative to the current time. | |
73 | ||
74 | set_exception(toro.Timeout()) is executed after a timeout. | |
75 | """ | |
76 | ||
77 | super(_TimeoutFuture, self).__init__() | |
78 | self.io_loop = io_loop | |
79 | if deadline is not None: | |
80 | callback = partial(self.set_exception, Timeout()) | |
81 | self._timeout_handle = io_loop.add_timeout(deadline, callback) | |
82 | else: | |
83 | self._timeout_handle = None | |
84 | ||
85 | def set_result(self, result): | |
86 | self._cancel_timeout() | |
87 | super(_TimeoutFuture, self).set_result(result) | |
88 | ||
89 | def set_exception(self, exception): | |
90 | self._cancel_timeout() | |
91 | super(_TimeoutFuture, self).set_exception(exception) | |
92 | ||
93 | def _cancel_timeout(self): | |
94 | if self._timeout_handle: | |
95 | self.io_loop.remove_timeout(self._timeout_handle) | |
96 | self._timeout_handle = None | |
97 | ||
98 | ||
99 | class _ContextManagerList(list): | |
100 | def __enter__(self, *args, **kwargs): | |
101 | for obj in self: | |
102 | obj.__enter__(*args, **kwargs) | |
103 | ||
104 | def __exit__(self, *args, **kwargs): | |
105 | for obj in self: | |
106 | obj.__exit__(*args, **kwargs) | |
107 | ||
108 | ||
109 | class _ContextManagerFuture(Future): | |
110 | """A Future that can be used with the "with" statement. | |
111 | ||
112 | When a coroutine yields this Future, the return value is a context manager | |
113 | that can be used like: | |
114 | ||
115 | with (yield future): | |
116 | pass | |
117 | ||
118 | At the end of the block, the Future's exit callback is run. Used for | |
119 | Lock.acquire() and Semaphore.acquire(). | |
120 | """ | |
121 | def __init__(self, wrapped, exit_callback): | |
122 | super(_ContextManagerFuture, self).__init__() | |
123 | wrapped.add_done_callback(self._done_callback) | |
124 | self.exit_callback = exit_callback | |
125 | ||
126 | def _done_callback(self, wrapped): | |
127 | if wrapped.exception(): | |
128 | self.set_exception(wrapped.exception()) | |
129 | else: | |
130 | self.set_result(wrapped.result()) | |
131 | ||
132 | def result(self): | |
133 | if self.exception(): | |
134 | raise self.exception() | |
135 | ||
136 | # Otherwise return a context manager that cleans up after the block. | |
137 | @contextlib.contextmanager | |
138 | def f(): | |
139 | try: | |
140 | yield | |
141 | finally: | |
142 | self.exit_callback() | |
143 | return f() | |
144 | ||
145 | ||
146 | def _consume_expired_waiters(waiters): | |
147 | # Delete waiters at the head of the queue who've timed out | |
148 | while waiters and waiters[0].done(): | |
149 | waiters.popleft() | |
150 | ||
151 | ||
152 | _null_result = object() | |
153 | ||
154 | ||
155 | class AsyncResult(object): | |
156 | """A one-time event that stores a value or an exception. | |
157 | ||
158 | The only distinction between AsyncResult and a simple Future is that | |
159 | AsyncResult lets coroutines wait with a deadline. The deadline can be | |
160 | configured separately for each waiter. | |
161 | ||
162 | An :class:`AsyncResult` instance cannot be reset. | |
163 | ||
164 | :Parameters: | |
165 | - `io_loop`: Optional custom IOLoop. | |
166 | """ | |
167 | ||
168 | def __init__(self, io_loop=None): | |
169 | self.io_loop = io_loop or ioloop.IOLoop.current() | |
170 | self.value = _null_result | |
171 | self.waiters = [] | |
172 | ||
173 | def __str__(self): | |
174 | result = '<%s ' % (self.__class__.__name__, ) | |
175 | if self.ready(): | |
176 | result += 'value=%r' % self.value | |
177 | else: | |
178 | result += 'unset' | |
179 | if self.waiters: | |
180 | result += ' waiters[%s]' % len(self.waiters) | |
181 | ||
182 | return result + '>' | |
183 | ||
184 | def set(self, value): | |
185 | """Set a value and wake up all the waiters.""" | |
186 | if self.ready(): | |
187 | raise AlreadySet | |
188 | ||
189 | self.value = value | |
190 | waiters, self.waiters = self.waiters, [] | |
191 | for waiter in waiters: | |
192 | if not waiter.done(): # Might have timed out | |
193 | waiter.set_result(value) | |
194 | ||
195 | def ready(self): | |
196 | return self.value is not _null_result | |
197 | ||
198 | def get(self, deadline=None): | |
199 | """Get a value once :meth:`set` is called. Returns a Future. | |
200 | ||
201 | The Future's result will be the value. The Future raises | |
202 | :exc:`toro.Timeout` if no value is set before the deadline. | |
203 | ||
204 | :Parameters: | |
205 | - `deadline`: Optional timeout, either an absolute timestamp | |
206 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for | |
207 | a deadline relative to the current time. | |
208 | """ | |
209 | future = _TimeoutFuture(deadline, self.io_loop) | |
210 | if self.ready(): | |
211 | future.set_result(self.value) | |
212 | else: | |
213 | self.waiters.append(future) | |
214 | ||
215 | return future | |
216 | ||
217 | def get_nowait(self): | |
218 | """Get the value if ready, or raise :class:`NotReady`.""" | |
219 | if self.ready(): | |
220 | return self.value | |
221 | else: | |
222 | raise NotReady | |
223 | ||
224 | ||
225 | class Condition(object): | |
226 | """A condition allows one or more coroutines to wait until notified. | |
227 | ||
228 | Like a standard Condition_, but does not need an underlying lock that | |
229 | is acquired and released. | |
230 | ||
231 | .. _Condition: http://docs.python.org/library/threading.html#threading.Condition | |
232 | ||
233 | :Parameters: | |
234 | - `io_loop`: Optional custom IOLoop. | |
235 | """ | |
236 | ||
237 | def __init__(self, io_loop=None): | |
238 | self.io_loop = io_loop or ioloop.IOLoop.current() | |
239 | self.waiters = collections.deque() # Queue of _Waiter objects | |
240 | ||
241 | def __str__(self): | |
242 | result = '<%s' % (self.__class__.__name__, ) | |
243 | if self.waiters: | |
244 | result += ' waiters[%s]' % len(self.waiters) | |
245 | return result + '>' | |
246 | ||
247 | def wait(self, deadline=None): | |
248 | """Wait for :meth:`notify`. Returns a Future. | |
249 | ||
250 | :exc:`~toro.Timeout` is executed after a timeout. | |
251 | ||
252 | :Parameters: | |
253 | - `deadline`: Optional timeout, either an absolute timestamp | |
254 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
255 | deadline relative to the current time. | |
256 | """ | |
257 | future = _TimeoutFuture(deadline, self.io_loop) | |
258 | self.waiters.append(future) | |
259 | return future | |
260 | ||
261 | def notify(self, n=1): | |
262 | """Wake up `n` waiters. | |
263 | ||
264 | :Parameters: | |
265 | - `n`: The number of waiters to awaken (default: 1) | |
266 | """ | |
267 | waiters = [] # Waiters we plan to run right now | |
268 | while n and self.waiters: | |
269 | waiter = self.waiters.popleft() | |
270 | if not waiter.done(): # Might have timed out | |
271 | n -= 1 | |
272 | waiters.append(waiter) | |
273 | ||
274 | for waiter in waiters: | |
275 | waiter.set_result(None) | |
276 | ||
277 | def notify_all(self): | |
278 | """Wake up all waiters.""" | |
279 | self.notify(len(self.waiters)) | |
280 | ||
281 | ||
282 | # TODO: show correct examples that avoid thread / process issues w/ concurrent.futures.Future | |
283 | class Event(object): | |
284 | """An event blocks coroutines until its internal flag is set to True. | |
285 | ||
286 | Similar to threading.Event_. | |
287 | ||
288 | .. _threading.Event: http://docs.python.org/library/threading.html#threading.Event | |
289 | ||
290 | .. seealso:: :doc:`examples/event_example` | |
291 | ||
292 | :Parameters: | |
293 | - `io_loop`: Optional custom IOLoop. | |
294 | """ | |
295 | ||
296 | def __init__(self, io_loop=None): | |
297 | self.io_loop = io_loop or ioloop.IOLoop.current() | |
298 | self.condition = Condition(io_loop=io_loop) | |
299 | self._flag = False | |
300 | ||
301 | def __str__(self): | |
302 | return '<%s %s>' % ( | |
303 | self.__class__.__name__, 'set' if self._flag else 'clear') | |
304 | ||
305 | def is_set(self): | |
306 | """Return ``True`` if and only if the internal flag is true.""" | |
307 | return self._flag | |
308 | ||
309 | def set(self): | |
310 | """Set the internal flag to ``True``. All waiters are awakened. | |
311 | Calling :meth:`wait` once the flag is true will not block. | |
312 | """ | |
313 | self._flag = True | |
314 | self.condition.notify_all() | |
315 | ||
316 | def clear(self): | |
317 | """Reset the internal flag to ``False``. Calls to :meth:`wait` | |
318 | will block until :meth:`set` is called. | |
319 | """ | |
320 | self._flag = False | |
321 | ||
322 | def wait(self, deadline=None): | |
323 | """Block until the internal flag is true. Returns a Future. | |
324 | ||
325 | The Future raises :exc:`~toro.Timeout` after a timeout. | |
326 | ||
327 | :Parameters: | |
328 | - `callback`: Function taking no arguments. | |
329 | - `deadline`: Optional timeout, either an absolute timestamp | |
330 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
331 | deadline relative to the current time. | |
332 | """ | |
333 | if self._flag: | |
334 | future = _TimeoutFuture(None, self.io_loop) | |
335 | future.set_result(None) | |
336 | return future | |
337 | else: | |
338 | return self.condition.wait(deadline) | |
339 | ||
340 | ||
341 | class Queue(object): | |
342 | """Create a queue object with a given maximum size. | |
343 | ||
344 | If `maxsize` is 0 (the default) the queue size is unbounded. | |
345 | ||
346 | Unlike the `standard Queue`_, you can reliably know this Queue's size | |
347 | with :meth:`qsize`, since your single-threaded Tornado application won't | |
348 | be interrupted between calling :meth:`qsize` and doing an operation on the | |
349 | Queue. | |
350 | ||
351 | **Examples:** | |
352 | ||
353 | :doc:`examples/producer_consumer_example` | |
354 | ||
355 | :doc:`examples/web_spider_example` | |
356 | ||
357 | :Parameters: | |
358 | - `maxsize`: Optional size limit (no limit by default). | |
359 | - `io_loop`: Optional custom IOLoop. | |
360 | ||
361 | .. _`Gevent's Queue`: http://www.gevent.org/gevent.queue.html | |
362 | ||
363 | .. _`standard Queue`: http://docs.python.org/library/queue.html#Queue.Queue | |
364 | """ | |
365 | def __init__(self, maxsize=0, io_loop=None): | |
366 | self.io_loop = io_loop or ioloop.IOLoop.current() | |
367 | if maxsize is None: | |
368 | raise TypeError("maxsize can't be None") | |
369 | ||
370 | if maxsize < 0: | |
371 | raise ValueError("maxsize can't be negative") | |
372 | ||
373 | self._maxsize = maxsize | |
374 | ||
375 | # _TimeoutFutures | |
376 | self.getters = collections.deque([]) | |
377 | # Pairs of (item, _TimeoutFuture) | |
378 | self.putters = collections.deque([]) | |
379 | self._init(maxsize) | |
380 | ||
381 | def _init(self, maxsize): | |
382 | self.queue = collections.deque() | |
383 | ||
384 | def _get(self): | |
385 | return self.queue.popleft() | |
386 | ||
387 | def _put(self, item): | |
388 | self.queue.append(item) | |
389 | ||
390 | def __repr__(self): | |
391 | return '<%s at %s %s>' % ( | |
392 | type(self).__name__, hex(id(self)), self._format()) | |
393 | ||
394 | def __str__(self): | |
395 | return '<%s %s>' % (type(self).__name__, self._format()) | |
396 | ||
397 | def _format(self): | |
398 | result = 'maxsize=%r' % (self.maxsize, ) | |
399 | if getattr(self, 'queue', None): | |
400 | result += ' queue=%r' % self.queue | |
401 | if self.getters: | |
402 | result += ' getters[%s]' % len(self.getters) | |
403 | if self.putters: | |
404 | result += ' putters[%s]' % len(self.putters) | |
405 | return result | |
406 | ||
407 | def _consume_expired_putters(self): | |
408 | # Delete waiters at the head of the queue who've timed out | |
409 | while self.putters and self.putters[0][1].done(): | |
410 | self.putters.popleft() | |
411 | ||
412 | def qsize(self): | |
413 | """Number of items in the queue""" | |
414 | return len(self.queue) | |
415 | ||
416 | @property | |
417 | def maxsize(self): | |
418 | """Number of items allowed in the queue.""" | |
419 | return self._maxsize | |
420 | ||
421 | def empty(self): | |
422 | """Return ``True`` if the queue is empty, ``False`` otherwise.""" | |
423 | return not self.queue | |
424 | ||
425 | def full(self): | |
426 | """Return ``True`` if there are `maxsize` items in the queue. | |
427 | ||
428 | .. note:: if the Queue was initialized with `maxsize=0` | |
429 | (the default), then :meth:`full` is never ``True``. | |
430 | """ | |
431 | if self.maxsize == 0: | |
432 | return False | |
433 | else: | |
434 | return self.maxsize <= self.qsize() | |
435 | ||
436 | def put(self, item, deadline=None): | |
437 | """Put an item into the queue. Returns a Future. | |
438 | ||
439 | The Future blocks until a free slot is available for `item`, or raises | |
440 | :exc:`toro.Timeout`. | |
441 | ||
442 | :Parameters: | |
443 | - `deadline`: Optional timeout, either an absolute timestamp | |
444 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
445 | deadline relative to the current time. | |
446 | """ | |
447 | _consume_expired_waiters(self.getters) | |
448 | future = _TimeoutFuture(deadline, self.io_loop) | |
449 | if self.getters: | |
450 | assert not self.queue, "queue non-empty, why are getters waiting?" | |
451 | getter = self.getters.popleft() | |
452 | ||
453 | # Use _put and _get instead of passing item straight to getter, in | |
454 | # case a subclass has logic that must run (e.g. JoinableQueue). | |
455 | self._put(item) | |
456 | getter.set_result(self._get()) | |
457 | future.set_result(None) | |
458 | else: | |
459 | if self.maxsize and self.maxsize <= self.qsize(): | |
460 | self.putters.append((item, future)) | |
461 | else: | |
462 | self._put(item) | |
463 | future.set_result(None) | |
464 | ||
465 | return future | |
466 | ||
467 | def put_nowait(self, item): | |
468 | """Put an item into the queue without blocking. | |
469 | ||
470 | If no free slot is immediately available, raise queue.Full. | |
471 | """ | |
472 | _consume_expired_waiters(self.getters) | |
473 | if self.getters: | |
474 | assert not self.queue, "queue non-empty, why are getters waiting?" | |
475 | getter = self.getters.popleft() | |
476 | ||
477 | self._put(item) | |
478 | getter.set_result(self._get()) | |
479 | elif self.maxsize and self.maxsize <= self.qsize(): | |
480 | raise Full | |
481 | else: | |
482 | self._put(item) | |
483 | ||
484 | def get(self, deadline=None): | |
485 | """Remove and return an item from the queue. Returns a Future. | |
486 | ||
487 | The Future blocks until an item is available, or raises | |
488 | :exc:`toro.Timeout`. | |
489 | ||
490 | :Parameters: | |
491 | - `deadline`: Optional timeout, either an absolute timestamp | |
492 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
493 | deadline relative to the current time. | |
494 | """ | |
495 | self._consume_expired_putters() | |
496 | future = _TimeoutFuture(deadline, self.io_loop) | |
497 | if self.putters: | |
498 | assert self.full(), "queue not full, why are putters waiting?" | |
499 | item, putter = self.putters.popleft() | |
500 | self._put(item) | |
501 | putter.set_result(None) | |
502 | future.set_result(self._get()) | |
503 | elif self.qsize(): | |
504 | future.set_result(self._get()) | |
505 | else: | |
506 | self.getters.append(future) | |
507 | ||
508 | return future | |
509 | ||
510 | def get_nowait(self): | |
511 | """Remove and return an item from the queue without blocking. | |
512 | ||
513 | Return an item if one is immediately available, else raise | |
514 | :exc:`queue.Empty`. | |
515 | """ | |
516 | self._consume_expired_putters() | |
517 | if self.putters: | |
518 | assert self.full(), "queue not full, why are putters waiting?" | |
519 | item, putter = self.putters.popleft() | |
520 | self._put(item) | |
521 | putter.set_result(None) | |
522 | return self._get() | |
523 | elif self.qsize(): | |
524 | return self._get() | |
525 | else: | |
526 | raise Empty | |
527 | ||
528 | ||
529 | class PriorityQueue(Queue): | |
530 | """A subclass of :class:`Queue` that retrieves entries in priority order | |
531 | (lowest first). | |
532 | ||
533 | Entries are typically tuples of the form: ``(priority number, data)``. | |
534 | ||
535 | :Parameters: | |
536 | - `maxsize`: Optional size limit (no limit by default). | |
537 | - `initial`: Optional sequence of initial items. | |
538 | - `io_loop`: Optional custom IOLoop. | |
539 | """ | |
540 | def _init(self, maxsize): | |
541 | self.queue = [] | |
542 | ||
543 | def _put(self, item, heappush=heapq.heappush): | |
544 | heappush(self.queue, item) | |
545 | ||
546 | def _get(self, heappop=heapq.heappop): | |
547 | return heappop(self.queue) | |
548 | ||
549 | ||
550 | class LifoQueue(Queue): | |
551 | """A subclass of :class:`Queue` that retrieves most recently added entries | |
552 | first. | |
553 | ||
554 | :Parameters: | |
555 | - `maxsize`: Optional size limit (no limit by default). | |
556 | - `initial`: Optional sequence of initial items. | |
557 | - `io_loop`: Optional custom IOLoop. | |
558 | """ | |
559 | def _init(self, maxsize): | |
560 | self.queue = [] | |
561 | ||
562 | def _put(self, item): | |
563 | self.queue.append(item) | |
564 | ||
565 | def _get(self): | |
566 | return self.queue.pop() | |
567 | ||
568 | ||
569 | class JoinableQueue(Queue): | |
570 | """A subclass of :class:`Queue` that additionally has :meth:`task_done` | |
571 | and :meth:`join` methods. | |
572 | ||
573 | .. seealso:: :doc:`examples/web_spider_example` | |
574 | ||
575 | :Parameters: | |
576 | - `maxsize`: Optional size limit (no limit by default). | |
577 | - `initial`: Optional sequence of initial items. | |
578 | - `io_loop`: Optional custom IOLoop. | |
579 | """ | |
580 | def __init__(self, maxsize=0, io_loop=None): | |
581 | Queue.__init__(self, maxsize=maxsize, io_loop=io_loop) | |
582 | self.unfinished_tasks = 0 | |
583 | self._finished = Event(io_loop) | |
584 | self._finished.set() | |
585 | ||
586 | def _format(self): | |
587 | result = Queue._format(self) | |
588 | if self.unfinished_tasks: | |
589 | result += ' tasks=%s' % self.unfinished_tasks | |
590 | return result | |
591 | ||
592 | def _put(self, item): | |
593 | self.unfinished_tasks += 1 | |
594 | self._finished.clear() | |
595 | Queue._put(self, item) | |
596 | ||
597 | def task_done(self): | |
598 | """Indicate that a formerly enqueued task is complete. | |
599 | ||
600 | Used by queue consumers. For each :meth:`get <Queue.get>` used to | |
601 | fetch a task, a subsequent call to :meth:`task_done` tells the queue | |
602 | that the processing on the task is complete. | |
603 | ||
604 | If a :meth:`join` is currently blocking, it will resume when all | |
605 | items have been processed (meaning that a :meth:`task_done` call was | |
606 | received for every item that had been :meth:`put <Queue.put>` into the | |
607 | queue). | |
608 | ||
609 | Raises ``ValueError`` if called more times than there were items | |
610 | placed in the queue. | |
611 | """ | |
612 | if self.unfinished_tasks <= 0: | |
613 | raise ValueError('task_done() called too many times') | |
614 | self.unfinished_tasks -= 1 | |
615 | if self.unfinished_tasks == 0: | |
616 | self._finished.set() | |
617 | ||
618 | def join(self, deadline=None): | |
619 | """Block until all items in the queue are processed. Returns a Future. | |
620 | ||
621 | The count of unfinished tasks goes up whenever an item is added to | |
622 | the queue. The count goes down whenever a consumer calls | |
623 | :meth:`task_done` to indicate that all work on the item is complete. | |
624 | When the count of unfinished tasks drops to zero, :meth:`join` | |
625 | unblocks. | |
626 | ||
627 | The Future raises :exc:`toro.Timeout` if the count is not zero before | |
628 | the deadline. | |
629 | ||
630 | :Parameters: | |
631 | - `deadline`: Optional timeout, either an absolute timestamp | |
632 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
633 | deadline relative to the current time. | |
634 | """ | |
635 | return self._finished.wait(deadline) | |
636 | ||
637 | ||
638 | class Semaphore(object): | |
639 | """A lock that can be acquired a fixed number of times before blocking. | |
640 | ||
641 | A Semaphore manages a counter representing the number of release() calls | |
642 | minus the number of acquire() calls, plus an initial value. The acquire() | |
643 | method blocks if necessary until it can return without making the counter | |
644 | negative. | |
645 | ||
646 | If not given, value defaults to 1. | |
647 | ||
648 | :meth:`acquire` supports the context manager protocol: | |
649 | ||
650 | >>> from tornado import gen | |
651 | >>> import toro | |
652 | >>> semaphore = toro.Semaphore() | |
653 | >>> | |
654 | >>> @gen.coroutine | |
655 | ... def f(): | |
656 | ... with (yield semaphore.acquire()): | |
657 | ... assert semaphore.locked() | |
658 | ... | |
659 | ... assert not semaphore.locked() | |
660 | ||
661 | .. note:: Unlike the standard threading.Semaphore_, a :class:`Semaphore` | |
662 | can tell you the current value of its :attr:`counter`, because code in a | |
663 | single-threaded Tornado app can check these values and act upon them | |
664 | without fear of interruption from another thread. | |
665 | ||
666 | .. _threading.Semaphore: http://docs.python.org/library/threading.html#threading.Semaphore | |
667 | ||
668 | .. seealso:: :doc:`examples/web_spider_example` | |
669 | ||
670 | :Parameters: | |
671 | - `value`: An int, the initial value (default 1). | |
672 | - `io_loop`: Optional custom IOLoop. | |
673 | """ | |
674 | def __init__(self, value=1, io_loop=None): | |
675 | if value < 0: | |
676 | raise ValueError('semaphore initial value must be >= 0') | |
677 | ||
678 | # The semaphore is implemented as a Queue with 'value' objects | |
679 | self.q = Queue(io_loop=io_loop) | |
680 | for _ in range(value): | |
681 | self.q.put_nowait(None) | |
682 | ||
683 | self._unlocked = Event(io_loop=io_loop) | |
684 | if value: | |
685 | self._unlocked.set() | |
686 | ||
687 | def __repr__(self): | |
688 | return '<%s at %s%s>' % ( | |
689 | type(self).__name__, hex(id(self)), self._format()) | |
690 | ||
691 | def __str__(self): | |
692 | return '<%s%s>' % ( | |
693 | self.__class__.__name__, self._format()) | |
694 | ||
695 | def _format(self): | |
696 | return ' counter=%s' % self.counter | |
697 | ||
698 | @property | |
699 | def counter(self): | |
700 | """An integer, the current semaphore value""" | |
701 | return self.q.qsize() | |
702 | ||
703 | def locked(self): | |
704 | """True if :attr:`counter` is zero""" | |
705 | return self.q.empty() | |
706 | ||
707 | def release(self): | |
708 | """Increment :attr:`counter` and wake one waiter. | |
709 | """ | |
710 | self.q.put(None) | |
711 | if not self.locked(): | |
712 | # No one was waiting on acquire(), so self.q.qsize() is positive | |
713 | self._unlocked.set() | |
714 | ||
715 | def wait(self, deadline=None): | |
716 | """Wait for :attr:`locked` to be False. Returns a Future. | |
717 | ||
718 | The Future raises :exc:`toro.Timeout` after the deadline. | |
719 | ||
720 | :Parameters: | |
721 | - `deadline`: Optional timeout, either an absolute timestamp | |
722 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
723 | deadline relative to the current time. | |
724 | """ | |
725 | return self._unlocked.wait(deadline) | |
726 | ||
727 | def acquire(self, deadline=None): | |
728 | """Decrement :attr:`counter`. Returns a Future. | |
729 | ||
730 | Block if the counter is zero and wait for a :meth:`release`. The | |
731 | Future raises :exc:`toro.Timeout` after the deadline. | |
732 | ||
733 | :Parameters: | |
734 | - `deadline`: Optional timeout, either an absolute timestamp | |
735 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
736 | deadline relative to the current time. | |
737 | """ | |
738 | queue_future = self.q.get(deadline) | |
739 | if self.q.empty(): | |
740 | self._unlocked.clear() | |
741 | future = _ContextManagerFuture(queue_future, self.release) | |
742 | return future | |
743 | ||
744 | def __enter__(self): | |
745 | raise RuntimeError( | |
746 | "Use Semaphore like 'with (yield semaphore)', not like" | |
747 | " 'with semaphore'") | |
748 | ||
749 | __exit__ = __enter__ | |
750 | ||
751 | ||
752 | class BoundedSemaphore(Semaphore): | |
753 | """A semaphore that prevents release() being called too often. | |
754 | ||
755 | A bounded semaphore checks to make sure its current value doesn't exceed | |
756 | its initial value. If it does, ``ValueError`` is raised. In most | |
757 | situations semaphores are used to guard resources with limited capacity. | |
758 | If the semaphore is released too many times it's a sign of a bug. | |
759 | ||
760 | If not given, *value* defaults to 1. | |
761 | ||
762 | .. seealso:: :doc:`examples/web_spider_example` | |
763 | """ | |
764 | def __init__(self, value=1, io_loop=None): | |
765 | super(BoundedSemaphore, self).__init__(value=value, io_loop=io_loop) | |
766 | self._initial_value = value | |
767 | ||
768 | def release(self): | |
769 | if self.counter >= self._initial_value: | |
770 | raise ValueError("Semaphore released too many times") | |
771 | return super(BoundedSemaphore, self).release() | |
772 | ||
773 | ||
774 | class Lock(object): | |
775 | """A lock for coroutines. | |
776 | ||
777 | It is created unlocked. When unlocked, :meth:`acquire` changes the state | |
778 | to locked. When the state is locked, yielding :meth:`acquire` waits until | |
779 | a call to :meth:`release`. | |
780 | ||
781 | The :meth:`release` method should only be called in the locked state; | |
782 | an attempt to release an unlocked lock raises RuntimeError. | |
783 | ||
784 | When more than one coroutine is waiting for the lock, the first one | |
785 | registered is awakened by :meth:`release`. | |
786 | ||
787 | :meth:`acquire` supports the context manager protocol: | |
788 | ||
789 | >>> from tornado import gen | |
790 | >>> import toro | |
791 | >>> lock = toro.Lock() | |
792 | >>> | |
793 | >>> @gen.coroutine | |
794 | ... def f(): | |
795 | ... with (yield lock.acquire()): | |
796 | ... assert lock.locked() | |
797 | ... | |
798 | ... assert not lock.locked() | |
799 | ||
800 | .. note:: Unlike with the standard threading.Lock_, code in a | |
801 | single-threaded Tornado application can check if a :class:`Lock` | |
802 | is :meth:`locked`, and act on that information without fear that another | |
803 | thread has grabbed the lock, provided you do not yield to the IOLoop | |
804 | between checking :meth:`locked` and using a protected resource. | |
805 | ||
806 | .. _threading.Lock: http://docs.python.org/2/library/threading.html#lock-objects | |
807 | ||
808 | .. seealso:: :doc:`examples/lock_example` | |
809 | ||
810 | :Parameters: | |
811 | - `io_loop`: Optional custom IOLoop. | |
812 | """ | |
813 | def __init__(self, io_loop=None): | |
814 | self._block = BoundedSemaphore(value=1, io_loop=io_loop) | |
815 | ||
816 | def __str__(self): | |
817 | return "<%s _block=%s>" % ( | |
818 | self.__class__.__name__, | |
819 | self._block) | |
820 | ||
821 | def acquire(self, deadline=None): | |
822 | """Attempt to lock. Returns a Future. | |
823 | ||
824 | The Future raises :exc:`toro.Timeout` if the deadline passes. | |
825 | ||
826 | :Parameters: | |
827 | - `deadline`: Optional timeout, either an absolute timestamp | |
828 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a | |
829 | deadline relative to the current time. | |
830 | """ | |
831 | return self._block.acquire(deadline) | |
832 | ||
833 | def release(self): | |
834 | """Unlock. | |
835 | ||
836 | If any coroutines are waiting for :meth:`acquire`, | |
837 | the first in line is awakened. | |
838 | ||
839 | If not locked, raise a RuntimeError. | |
840 | """ | |
841 | if not self.locked(): | |
842 | raise RuntimeError('release unlocked lock') | |
843 | self._block.release() | |
844 | ||
845 | def locked(self): | |
846 | """``True`` if the lock has been acquired""" | |
847 | return self._block.locked() | |
848 | ||
849 | def __enter__(self): | |
850 | raise RuntimeError( | |
851 | "Use Lock like 'with (yield lock)', not like" | |
852 | " 'with lock'") | |
853 | ||
854 | __exit__ = __enter__ | |
855 | ||
856 | ||
857 | class RWLock(object): | |
858 | """A reader-writer lock for coroutines. | |
859 | ||
860 | It is created unlocked. When unlocked, :meth:`acquire_write` always changes | |
861 | the state to locked. When unlocked, :meth:`acquire_read` can changed the | |
862 | state to locked, if :meth:`acquire_read` was called max_readers times. When | |
863 | the state is locked, yielding :meth:`acquire_read`/meth:`acquire_write` | |
864 | waits until a call to :meth:`release_write` in case of locking on write, or | |
865 | :meth:`release_read` in case of locking on read. | |
866 | ||
867 | The :meth:`release_read` method should only be called in the locked-on-read | |
868 | state; an attempt to release an unlocked lock raises RuntimeError. | |
869 | ||
870 | The :meth:`release_write` method should only be called in the locked on | |
871 | write state; an attempt to release an unlocked lock raises RuntimeError. | |
872 | ||
873 | When more than one coroutine is waiting for the lock, the first one | |
874 | registered is awakened by :meth:`release_read`/:meth:`release_write`. | |
875 | ||
876 | :meth:`acquire_read`/:meth:`acquire_write` support the context manager | |
877 | protocol: | |
878 | ||
879 | >>> from tornado import gen | |
880 | >>> import toro | |
881 | >>> lock = toro.RWLock(max_readers=10) | |
882 | >>> | |
883 | >>> @gen.coroutine | |
884 | ... def f(): | |
885 | ... with (yield lock.acquire_read()): | |
886 | ... assert not lock.locked() | |
887 | ... | |
888 | ... with (yield lock.acquire_write()): | |
889 | ... assert lock.locked() | |
890 | ... | |
891 | ... assert not lock.locked() | |
892 | ||
893 | .. note:: Unlike with the standard threading.Lock_, code in a | |
894 | single-threaded Tornado application can check if a :class:`RWLock` | |
895 | is :meth:`locked`, and act on that information without fear that another | |
896 | thread has grabbed the lock, provided you do not yield to the IOLoop | |
897 | between checking :meth:`locked` and using a protected resource. | |
898 | ||
899 | .. _threading.Lock: http://docs.python.org/2/library/threading.html#lock-objects | |
900 | ||
901 | :Parameters: | |
902 | - `max_readers`: Optional max readers value, default 1. | |
903 | - `io_loop`: Optional custom IOLoop. | |
904 | """ | |
905 | def __init__(self, max_readers=1, io_loop=None): | |
906 | self._max_readers = max_readers | |
907 | self._block = BoundedSemaphore(value=max_readers, io_loop=io_loop) | |
908 | ||
909 | def __str__(self): | |
910 | return "<%s _block=%s>" % ( | |
911 | self.__class__.__name__, | |
912 | self._block) | |
913 | ||
914 | def acquire_read(self, deadline=None): | |
915 | """Attempt to lock for read. Returns a Future. | |
916 | ||
917 | The Future raises :exc:`toro.Timeout` if the deadline passes. | |
918 | ||
919 | :Parameters: | |
920 | - `deadline`: Optional timeout, either an absolute timestamp | |
921 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for | |
922 | a deadline relative to the current time. | |
923 | """ | |
924 | return self._block.acquire(deadline) | |
925 | ||
926 | @gen.coroutine | |
927 | def acquire_write(self, deadline=None): | |
928 | """Attempt to lock for write. Returns a Future. | |
929 | ||
930 | The Future raises :exc:`toro.Timeout` if the deadline passes. | |
931 | ||
932 | :Parameters: | |
933 | - `deadline`: Optional timeout, either an absolute timestamp | |
934 | (as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for | |
935 | a deadline relative to the current time. | |
936 | """ | |
937 | futures = [self._block.acquire(deadline) for _ in | |
938 | xrange(self._max_readers)] | |
939 | try: | |
940 | managers = yield futures | |
941 | except Timeout: | |
942 | for f in futures: | |
943 | # Avoid traceback logging. | |
944 | f.exception() | |
945 | raise | |
946 | ||
947 | raise gen.Return(_ContextManagerList(managers)) | |
948 | ||
949 | def release_read(self): | |
950 | """Releases one reader. | |
951 | ||
952 | If any coroutines are waiting for :meth:`acquire_read` (in case of full | |
953 | readers queue), the first in line is awakened. | |
954 | ||
955 | If not locked, raise a RuntimeError. | |
956 | """ | |
957 | if not self.locked(): | |
958 | raise RuntimeError('release unlocked lock') | |
959 | self._block.release() | |
960 | ||
961 | def release_write(self): | |
962 | """Releases after write. | |
963 | ||
964 | The first in queue will be awakened after release. | |
965 | ||
966 | If not locked, raise a RuntimeError. | |
967 | """ | |
968 | if not self.locked(): | |
969 | raise RuntimeError('release unlocked lock') | |
970 | for i in xrange(self._max_readers): | |
971 | self._block.release() | |
972 | ||
973 | def locked(self): | |
974 | """``True`` if the lock has been acquired""" | |
975 | return self._block.locked() | |
976 | ||
977 | def __enter__(self): | |
978 | raise RuntimeError( | |
979 | "Use RWLock like 'with (yield lock)', not like" | |
980 | " 'with lock'") | |
981 | ||
982 | __exit__ = __enter__ |
0 | import re, os, tempfile | |
0 | import re, os | |
1 | 1 | from subprocess import Popen, PIPE |
2 | 2 | from tornado import gen |
3 | 3 | import tornado.process, tornado.iostream |
4 | try: # >=4.2 | |
5 | import tornado.locks as locks | |
6 | except ImportError: | |
7 | import toro as locks | |
4 | 8 | import logging |
5 | 9 | from select import PIPE_BUF |
10 | from contextlib import contextmanager | |
11 | from collections import namedtuple | |
12 | from time import time | |
13 | ||
14 | ||
15 | class Pipeline(object): | |
16 | def __init__(self): | |
17 | # The lock is needed so we don't let two coroutines write | |
18 | # simultaneously to a pipeline; then the first call to read might | |
19 | # read translations of text put there by the second call … | |
20 | self.lock = locks.Lock() | |
21 | # The users count is how many requests have picked this | |
22 | # pipeline for translation. If this is 0, we can safely shut | |
23 | # down the pipeline. | |
24 | self.users = 0 | |
25 | self.lastUsage = 0 | |
26 | self.useCount = 0 | |
27 | ||
28 | @contextmanager | |
29 | def use(self): | |
30 | self.lastUsage = time() | |
31 | self.users += 1 | |
32 | try: | |
33 | yield | |
34 | finally: | |
35 | self.users -= 1 | |
36 | self.lastUsage = time() | |
37 | self.useCount += 1 | |
38 | ||
39 | def __lt__(self, other): | |
40 | return self.users < other.users | |
41 | ||
42 | @gen.coroutine | |
43 | def translate(self, toTranslate, nosplit): | |
44 | raise Exception("Not implemented, subclass me!") | |
45 | ||
46 | ||
47 | class FlushingPipeline(Pipeline): | |
48 | def __init__(self, commands, *args, **kwargs): | |
49 | self.inpipe, self.outpipe = startPipeline(commands) | |
50 | super().__init__(*args, **kwargs) | |
51 | ||
52 | def __del__(self): | |
53 | logging.debug("shutting down FlushingPipeline that was used %d times", self.useCount) | |
54 | self.inpipe.stdin.close() | |
55 | self.inpipe.stdout.close() | |
56 | # TODO: It seems the process immediately becomes <defunct>, | |
57 | # but only completely removed after a second request to the | |
58 | # server – why? | |
59 | ||
60 | @gen.coroutine | |
61 | def translate(self, toTranslate, nosplit=False): | |
62 | with self.use(): | |
63 | if nosplit: | |
64 | res = yield translateNULFlush(toTranslate, self) | |
65 | return res | |
66 | else: | |
67 | all_split = splitForTranslation(toTranslate, n_users=self.users) | |
68 | parts = yield [translateNULFlush(part, self) for part in all_split] | |
69 | return "".join(parts) | |
70 | ||
71 | class SimplePipeline(Pipeline): | |
72 | def __init__(self, commands, *args, **kwargs): | |
73 | self.commands = list(commands) | |
74 | super().__init__(*args, **kwargs) | |
75 | ||
76 | @gen.coroutine | |
77 | def translate(self, toTranslate, nosplit="ignored"): | |
78 | with self.use(): | |
79 | with (yield self.lock.acquire()): | |
80 | res = yield translateSimple(toTranslate, self.commands) | |
81 | return res | |
82 | ||
83 | ||
84 | ParsedModes = namedtuple('ParsedModes', 'do_flush commands') | |
85 | ||
86 | def makePipeline(modes_parsed): | |
87 | if modes_parsed.do_flush: | |
88 | return FlushingPipeline(modes_parsed.commands) | |
89 | else: | |
90 | return SimplePipeline(modes_parsed.commands) | |
91 | ||
6 | 92 | |
7 | 93 | def startPipeline(commands): |
8 | 94 | procs = [] |
18 | 104 | procs.append(tornado.process.Subprocess(cmd, |
19 | 105 | stdin=in_from, |
20 | 106 | stdout=out_from)) |
21 | ||
22 | 107 | return procs[0], procs[-1] |
108 | ||
23 | 109 | |
24 | 110 | def parseModeFile(mode_path): |
25 | 111 | mode_str = open(mode_path, 'r').read().strip() |
26 | 112 | if mode_str: |
27 | if 'hfst-proc ' in mode_str or 'lrx-proc ' in mode_str: | |
113 | if 'ca-oc@aran' in mode_str: | |
28 | 114 | do_flush = False |
29 | 115 | modes_parentdir = os.path.dirname(os.path.dirname(mode_path)) |
30 | 116 | mode_name = os.path.splitext(os.path.basename(mode_path))[0] |
39 | 125 | do_flush = True |
40 | 126 | commands = [] |
41 | 127 | for cmd in mode_str.strip().split('|'): |
128 | # TODO: we should make language pairs install | |
129 | # modes.xml instead; this is brittle (what if a path | |
130 | # has | or " in it?) | |
42 | 131 | cmd = cmd.replace('$2', '').replace('$1', '-g') |
43 | cmd = re.sub('^(\S*)', '\g<1> -z', cmd) | |
44 | commands.append(cmd.split()) | |
45 | return do_flush, commands | |
46 | else: | |
47 | logging.error('Could not parse mode file %s' % mode_path) | |
48 | raise Exception('Could not parse mode file %s' % mode_path) | |
132 | cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) | |
133 | commands.append([c.strip("'") | |
134 | for c in cmd.split()]) | |
135 | return ParsedModes(do_flush, commands) | |
136 | else: | |
137 | logging.error('Could not parse mode file %s', mode_path) | |
138 | raise Exception('Could not parse mode file %s', mode_path) | |
49 | 139 | |
50 | 140 | |
51 | 141 | def upToBytes(string, max_bytes): |
55 | 145 | bytes of each char. |
56 | 146 | |
57 | 147 | """ |
58 | b = bytes(string,'utf-8') | |
148 | b = bytes(string, 'utf-8') | |
59 | 149 | l = max_bytes |
60 | 150 | while l: |
61 | 151 | try: |
65 | 155 | l -= 1 |
66 | 156 | return 0 |
67 | 157 | |
68 | def hardbreakFn(string, rush_hour): | |
158 | def hardbreakFn(string, n_users): | |
69 | 159 | """If others are queueing up to translate at the same time, we send |
70 | 160 | short requests, otherwise we try to minimise the number of |
71 | 161 | requests, but without letting buffers fill up. |
73 | 163 | These numbers could probably be tweaked a lot. |
74 | 164 | |
75 | 165 | """ |
76 | if rush_hour: | |
166 | if n_users > 2: | |
77 | 167 | return 1000 |
78 | 168 | else: |
79 | 169 | return upToBytes(string, PIPE_BUF) |
80 | 170 | |
81 | 171 | def preferPunctBreak(string, last, hardbreak): |
82 | 172 | """We would prefer to split on a period or space seen before the |
83 | hardbreak, if we can. | |
173 | hardbreak, if we can. If the remaining string is smaller or equal | |
174 | than the hardbreak, return end of the string | |
84 | 175 | |
85 | 176 | """ |
177 | ||
178 | if(len(string[last:]) <= hardbreak): | |
179 | return last+hardbreak+1 | |
180 | ||
86 | 181 | softbreak = int(hardbreak/2)+1 |
87 | 182 | softnext = last + softbreak |
88 | 183 | hardnext = last + hardbreak |
89 | 184 | dot = string.rfind(".", softnext, hardnext) |
90 | if dot>-1: | |
91 | return dot | |
185 | if dot > -1: | |
186 | return dot+1 | |
92 | 187 | else: |
93 | 188 | space = string.rfind(" ", softnext, hardnext) |
94 | if space>-1: | |
95 | return space | |
189 | if space > -1: | |
190 | return space+1 | |
96 | 191 | else: |
97 | 192 | return hardnext |
98 | 193 | |
99 | def splitForTranslation(toTranslate, rush_hour): | |
194 | ||
195 | def splitForTranslation(toTranslate, n_users): | |
100 | 196 | """Splitting it up a bit ensures we don't fill up FIFO buffers (leads |
101 | 197 | to processes hanging on read/write).""" |
102 | allSplit = [] # [].append and join faster than str += | |
103 | last=0 | |
104 | rounds=0 | |
105 | while last < len(toTranslate) and rounds<10: | |
106 | rounds+=1 | |
107 | hardbreak = hardbreakFn(toTranslate[last:], rush_hour) | |
198 | allSplit = [] # [].append and join faster than str += | |
199 | last = 0 | |
200 | rounds = 0 | |
201 | while last < len(toTranslate) and rounds < 10: | |
202 | rounds += 1 | |
203 | hardbreak = hardbreakFn(toTranslate[last:], n_users) | |
108 | 204 | next = preferPunctBreak(toTranslate, last, hardbreak) |
109 | 205 | allSplit.append(toTranslate[last:next]) |
206 | #logging.getLogger().setLevel(logging.DEBUG) | |
207 | logging.debug("splitForTranslation: last:%s hardbreak:%s next:%s appending:%s"%(last,hardbreak,next,toTranslate[last:next])) | |
110 | 208 | last = next |
111 | 209 | return allSplit |
112 | 210 | |
211 | ||
113 | 212 | @gen.coroutine |
114 | def translateNULFlush(toTranslate, lock, pipeline): | |
115 | with (yield lock.acquire()): | |
116 | proc_in, proc_out = pipeline | |
213 | def translateNULFlush(toTranslate, pipeline): | |
214 | with (yield pipeline.lock.acquire()): | |
215 | proc_in, proc_out = pipeline.inpipe, pipeline.outpipe | |
117 | 216 | |
118 | 217 | proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE) |
119 | 218 | proc_deformat.stdin.write(bytes(toTranslate, 'utf-8')) |
124 | 223 | # TODO: PipeIOStream has no flush, but seems to work anyway? |
125 | 224 | #proc_in.stdin.flush() |
126 | 225 | |
127 | output = yield proc_out.stdout.read_until(bytes('\0', 'utf-8')) | |
226 | output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8')) | |
128 | 227 | |
129 | 228 | proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE) |
130 | 229 | proc_reformat.stdin.write(output) |
131 | 230 | return proc_reformat.communicate()[0].decode('utf-8') |
132 | 231 | |
133 | 232 | |
134 | def translateWithoutFlush(toTranslate, lock, pipeline): | |
233 | def translateWithoutFlush(toTranslate, proc_in, proc_out): | |
135 | 234 | proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE) |
136 | 235 | proc_deformat.stdin.write(bytes(toTranslate, 'utf-8')) |
137 | 236 | deformatted = proc_deformat.communicate()[0] |
150 | 249 | proc_reformat.stdin.write(b"".join(output)) |
151 | 250 | return proc_reformat.communicate()[0].decode('utf-8') |
152 | 251 | |
252 | ||
153 | 253 | @gen.coroutine |
154 | 254 | def translatePipeline(toTranslate, commands): |
155 | 255 | |
163 | 263 | output.append(toTranslate) |
164 | 264 | output.append(towrite.decode('utf-8')) |
165 | 265 | |
166 | pipeline = [] | |
167 | pipeline.append("apertium-deshtml") | |
266 | all_cmds = [] | |
267 | all_cmds.append("apertium-deshtml") | |
168 | 268 | |
169 | 269 | for cmd in commands: |
170 | 270 | proc = Popen(cmd, stdin=PIPE, stdout=PIPE) |
172 | 272 | towrite = proc.communicate()[0] |
173 | 273 | |
174 | 274 | output.append(towrite.decode('utf-8')) |
175 | pipeline.append(cmd) | |
275 | all_cmds.append(cmd) | |
176 | 276 | |
177 | 277 | proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE) |
178 | 278 | proc_reformat.stdin.write(towrite) |
179 | 279 | towrite = proc_reformat.communicate()[0].decode('utf-8') |
180 | 280 | |
181 | 281 | output.append(towrite) |
182 | pipeline.append("apertium-rehtml-noent") | |
183 | ||
184 | return output, pipeline | |
282 | all_cmds.append("apertium-rehtml-noent") | |
283 | ||
284 | return output, all_cmds | |
285 | ||
185 | 286 | |
186 | 287 | @gen.coroutine |
187 | 288 | def translateSimple(toTranslate, commands): |
188 | 289 | proc_in, proc_out = startPipeline(commands) |
189 | assert(proc_in==proc_out) | |
190 | yield proc_in.stdin.write(bytes(toTranslate, 'utf-8')) | |
290 | assert proc_in == proc_out | |
291 | yield gen.Task(proc_in.stdin.write, bytes(toTranslate, 'utf-8')) | |
191 | 292 | proc_in.stdin.close() |
192 | translated = yield proc_out.stdout.read_until_close() | |
293 | translated = yield gen.Task(proc_out.stdout.read_until_close) | |
193 | 294 | proc_in.stdout.close() |
194 | 295 | return translated.decode('utf-8') |
195 | 296 | |
196 | def translateDoc(fileToTranslate, format, modeFile): | |
197 | modesdir=os.path.dirname(os.path.dirname(modeFile)) | |
198 | mode=os.path.splitext(os.path.basename(modeFile))[0] | |
199 | return Popen(['apertium', '-f', format, '-d', modesdir, mode], | |
200 | stdin=fileToTranslate, stdout=PIPE).communicate()[0] | |
201 | ||
202 | @gen.coroutine | |
203 | def translate(toTranslate, lock, pipeline, commands): | |
204 | if pipeline: | |
205 | allSplit = splitForTranslation(toTranslate, rush_hour = lock.locked()) | |
206 | parts = yield [translateNULFlush(part, lock, pipeline) for part in allSplit] | |
207 | return "".join(parts) | |
208 | else: | |
209 | with (yield lock.acquire()): | |
210 | res = yield translateSimple(toTranslate, commands) | |
211 | return res | |
297 | ||
298 | def translateDoc(fileToTranslate, fmt, modeFile, unknownMarks = False): | |
299 | modesdir = os.path.dirname(os.path.dirname(modeFile)) | |
300 | mode = os.path.splitext(os.path.basename(modeFile))[0] | |
301 | if unknownMarks: | |
302 | return Popen(['apertium', '-f', fmt, '-d', modesdir, mode], | |
303 | stdin=fileToTranslate, stdout=PIPE).communicate()[0] | |
304 | else: | |
305 | return Popen(['apertium', '-f', fmt, '-u', '-d', modesdir, mode], | |
306 | stdin=fileToTranslate, stdout=PIPE).communicate()[0] |
0 | import re, os | |
1 | from subprocess import Popen, PIPE | |
2 | from tornado import gen | |
3 | import tornado.process, tornado.iostream | |
4 | try: # >=4.2 | |
5 | import tornado.locks as locks | |
6 | except ImportError: | |
7 | import toro as locks | |
8 | import logging | |
9 | from select import PIPE_BUF | |
10 | from contextlib import contextmanager | |
11 | from collections import namedtuple | |
12 | from time import time | |
13 | ||
14 | ||
15 | class Pipeline(object): | |
16 | def __init__(self): | |
17 | # The lock is needed so we don't let two coroutines write | |
18 | # simultaneously to a pipeline; then the first call to read might | |
19 | # read translations of text put there by the second call … | |
20 | self.lock = locks.Lock() | |
21 | # The users count is how many requests have picked this | |
22 | # pipeline for translation. If this is 0, we can safely shut | |
23 | # down the pipeline. | |
24 | self.users = 0 | |
25 | self.lastUsage = 0 | |
26 | self.useCount = 0 | |
27 | ||
28 | @contextmanager | |
29 | def use(self): | |
30 | self.lastUsage = time() | |
31 | self.users += 1 | |
32 | try: | |
33 | yield | |
34 | finally: | |
35 | self.users -= 1 | |
36 | self.lastUsage = time() | |
37 | self.useCount += 1 | |
38 | ||
39 | def __lt__(self, other): | |
40 | return self.users < other.users | |
41 | ||
42 | @gen.coroutine | |
43 | def translate(self, toTranslate, nosplit): | |
44 | raise Exception("Not implemented, subclass me!") | |
45 | ||
46 | ||
47 | class FlushingPipeline(Pipeline): | |
48 | def __init__(self, commands, *args, **kwargs): | |
49 | self.inpipe, self.outpipe = startPipeline(commands) | |
50 | super().__init__(*args, **kwargs) | |
51 | ||
52 | def __del__(self): | |
53 | logging.debug("shutting down FlushingPipeline that was used %d times", self.useCount) | |
54 | self.inpipe.stdin.close() | |
55 | self.inpipe.stdout.close() | |
56 | # TODO: It seems the process immediately becomes <defunct>, | |
57 | # but only completely removed after a second request to the | |
58 | # server – why? | |
59 | ||
60 | @gen.coroutine | |
61 | def translate(self, toTranslate, nosplit=False): | |
62 | with self.use(): | |
63 | if nosplit: | |
64 | res = yield translateNULFlush(toTranslate, self) | |
65 | raise StopIteration(res) | |
66 | else: | |
67 | all_split = splitForTranslation(toTranslate, n_users=self.users) | |
68 | parts = yield [translateNULFlush(part, self) for part in all_split] | |
69 | raise StopIteration("".join(parts)) | |
70 | ||
71 | class SimplePipeline(Pipeline): | |
72 | def __init__(self, commands, *args, **kwargs): | |
73 | self.commands = list(commands) | |
74 | super().__init__(*args, **kwargs) | |
75 | ||
76 | @gen.coroutine | |
77 | def translate(self, toTranslate, nosplit="ignored"): | |
78 | with self.use(): | |
79 | with (yield self.lock.acquire()): | |
80 | res = yield translateSimple(toTranslate, self.commands) | |
81 | raise StopIteration(res) | |
82 | ||
83 | ||
84 | ParsedModes = namedtuple('ParsedModes', 'do_flush commands') | |
85 | ||
86 | def makePipeline(modes_parsed): | |
87 | if modes_parsed.do_flush: | |
88 | return FlushingPipeline(modes_parsed.commands) | |
89 | else: | |
90 | return SimplePipeline(modes_parsed.commands) | |
91 | ||
92 | ||
93 | def startPipeline(commands): | |
94 | procs = [] | |
95 | for i, cmd in enumerate(commands): | |
96 | if i == 0: | |
97 | in_from = tornado.process.Subprocess.STREAM | |
98 | else: | |
99 | in_from = procs[-1].stdout | |
100 | if i == len(commands)-1: | |
101 | out_from = tornado.process.Subprocess.STREAM | |
102 | else: | |
103 | out_from = PIPE | |
104 | procs.append(tornado.process.Subprocess(cmd, | |
105 | stdin=in_from, | |
106 | stdout=out_from)) | |
107 | return procs[0], procs[-1] | |
108 | ||
109 | ||
110 | def parseModeFile(mode_path): | |
111 | mode_str = open(mode_path, 'r').read().strip() | |
112 | if mode_str: | |
113 | if 'ca-oc@aran' in mode_str: | |
114 | do_flush = False | |
115 | modes_parentdir = os.path.dirname(os.path.dirname(mode_path)) | |
116 | mode_name = os.path.splitext(os.path.basename(mode_path))[0] | |
117 | commands = [[ | |
118 | 'apertium', | |
119 | '-f', 'html-noent', | |
120 | # Get the _parent_ dir of the mode file: | |
121 | '-d', modes_parentdir, | |
122 | mode_name | |
123 | ]] | |
124 | else: | |
125 | do_flush = True | |
126 | commands = [] | |
127 | for cmd in mode_str.strip().split('|'): | |
128 | # TODO: we should make language pairs install | |
129 | # modes.xml instead; this is brittle (what if a path | |
130 | # has | or " in it?) | |
131 | cmd = cmd.replace('$2', '').replace('$1', '-g') | |
132 | cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) | |
133 | commands.append([c.strip("'") | |
134 | for c in cmd.split()]) | |
135 | return ParsedModes(do_flush, commands) | |
136 | else: | |
137 | logging.error('Could not parse mode file %s', mode_path) | |
138 | raise Exception('Could not parse mode file %s', mode_path) | |
139 | ||
140 | ||
141 | def upToBytes(string, max_bytes): | |
142 | """Find the unicode string length of the first up-to-max_bytes bytes. | |
143 | ||
144 | At least it's much faster than going through the string adding | |
145 | bytes of each char. | |
146 | ||
147 | """ | |
148 | b = bytes(string, 'utf-8') | |
149 | l = max_bytes | |
150 | while l: | |
151 | try: | |
152 | dec = b[:l].decode('utf-8') | |
153 | return len(dec) | |
154 | except UnicodeDecodeError: | |
155 | l -= 1 | |
156 | return 0 | |
157 | ||
158 | def hardbreakFn(string, n_users): | |
159 | """If others are queueing up to translate at the same time, we send | |
160 | short requests, otherwise we try to minimise the number of | |
161 | requests, but without letting buffers fill up. | |
162 | ||
163 | These numbers could probably be tweaked a lot. | |
164 | ||
165 | """ | |
166 | if n_users > 2: | |
167 | return 1000 | |
168 | else: | |
169 | return upToBytes(string, PIPE_BUF) | |
170 | ||
171 | def preferPunctBreak(string, last, hardbreak): | |
172 | """We would prefer to split on a period or space seen before the | |
173 | hardbreak, if we can. If the remaining string is smaller or equal | |
174 | than the hardbreak, return end of the string | |
175 | ||
176 | """ | |
177 | ||
178 | if(len(string[last:]) <= hardbreak): | |
179 | return last+hardbreak+1 | |
180 | ||
181 | softbreak = int(hardbreak/2)+1 | |
182 | softnext = last + softbreak | |
183 | hardnext = last + hardbreak | |
184 | dot = string.rfind(".", softnext, hardnext) | |
185 | if dot > -1: | |
186 | return dot+1 | |
187 | else: | |
188 | space = string.rfind(" ", softnext, hardnext) | |
189 | if space > -1: | |
190 | return space+1 | |
191 | else: | |
192 | return hardnext | |
193 | ||
194 | ||
195 | def splitForTranslation(toTranslate, n_users): | |
196 | """Splitting it up a bit ensures we don't fill up FIFO buffers (leads | |
197 | to processes hanging on read/write).""" | |
198 | allSplit = [] # [].append and join faster than str += | |
199 | last = 0 | |
200 | rounds = 0 | |
201 | while last < len(toTranslate) and rounds < 10: | |
202 | rounds += 1 | |
203 | hardbreak = hardbreakFn(toTranslate[last:], n_users) | |
204 | next = preferPunctBreak(toTranslate, last, hardbreak) | |
205 | allSplit.append(toTranslate[last:next]) | |
206 | #logging.getLogger().setLevel(logging.DEBUG) | |
207 | logging.debug("splitForTranslation: last:%s hardbreak:%s next:%s appending:%s"%(last,hardbreak,next,toTranslate[last:next])) | |
208 | last = next | |
209 | return allSplit | |
210 | ||
211 | ||
212 | @gen.coroutine | |
213 | def translateNULFlush(toTranslate, pipeline): | |
214 | with (yield pipeline.lock.acquire()): | |
215 | proc_in, proc_out = pipeline.inpipe, pipeline.outpipe | |
216 | ||
217 | proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE) | |
218 | proc_deformat.stdin.write(bytes(toTranslate, 'utf-8')) | |
219 | deformatted = proc_deformat.communicate()[0] | |
220 | ||
221 | proc_in.stdin.write(deformatted) | |
222 | proc_in.stdin.write(bytes('\0', "utf-8")) | |
223 | # TODO: PipeIOStream has no flush, but seems to work anyway? | |
224 | #proc_in.stdin.flush() | |
225 | ||
226 | output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8')) | |
227 | ||
228 | proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE) | |
229 | proc_reformat.stdin.write(output) | |
230 | raise StopIteration(proc_reformat.communicate()[0].decode('utf-8')) | |
231 | ||
232 | ||
233 | def translateWithoutFlush(toTranslate, proc_in, proc_out): | |
234 | proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE) | |
235 | proc_deformat.stdin.write(bytes(toTranslate, 'utf-8')) | |
236 | deformatted = proc_deformat.communicate()[0] | |
237 | ||
238 | proc_in.stdin.write(deformatted) | |
239 | proc_in.stdin.write(bytes('\0', "utf-8")) | |
240 | proc_in.stdin.flush() | |
241 | ||
242 | d = proc_out.stdout.read(1) | |
243 | output = [] | |
244 | while d and d != b'\x00': | |
245 | output.append(d) | |
246 | d = proc_out.stdout.read(1) | |
247 | ||
248 | proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE) | |
249 | proc_reformat.stdin.write(b"".join(output)) | |
250 | raise StopIteration(proc_reformat.communicate()[0].decode('utf-8')) | |
251 | ||
252 | ||
253 | @gen.coroutine | |
254 | def translatePipeline(toTranslate, commands): | |
255 | ||
256 | proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE) | |
257 | proc_deformat.stdin.write(bytes(toTranslate, 'utf-8')) | |
258 | deformatted = proc_deformat.communicate()[0] | |
259 | ||
260 | towrite = deformatted | |
261 | ||
262 | output = [] | |
263 | output.append(toTranslate) | |
264 | output.append(towrite.decode('utf-8')) | |
265 | ||
266 | all_cmds = [] | |
267 | all_cmds.append("apertium-deshtml") | |
268 | ||
269 | for cmd in commands: | |
270 | proc = Popen(cmd, stdin=PIPE, stdout=PIPE) | |
271 | proc.stdin.write(towrite) | |
272 | towrite = proc.communicate()[0] | |
273 | ||
274 | output.append(towrite.decode('utf-8')) | |
275 | all_cmds.append(cmd) | |
276 | ||
277 | proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE) | |
278 | proc_reformat.stdin.write(towrite) | |
279 | towrite = proc_reformat.communicate()[0].decode('utf-8') | |
280 | ||
281 | output.append(towrite) | |
282 | all_cmds.append("apertium-rehtml-noent") | |
283 | ||
284 | return output, all_cmds | |
285 | ||
286 | ||
287 | @gen.coroutine | |
288 | def translateSimple(toTranslate, commands): | |
289 | proc_in, proc_out = startPipeline(commands) | |
290 | assert proc_in == proc_out | |
291 | yield gen.Task(proc_in.stdin.write, bytes(toTranslate, 'utf-8')) | |
292 | proc_in.stdin.close() | |
293 | translated = yield gen.Task(proc_out.stdout.read_until_close) | |
294 | proc_in.stdout.close() | |
295 | raise StopIteration(translated.decode('utf-8')) | |
296 | ||
297 | ||
298 | def translateDoc(fileToTranslate, fmt, modeFile, unknownMarks = False): | |
299 | modesdir = os.path.dirname(os.path.dirname(modeFile)) | |
300 | mode = os.path.splitext(os.path.basename(modeFile))[0] | |
301 | if unknownMarks: | |
302 | return Popen(['apertium', '-f', fmt, '-d', modesdir, mode], | |
303 | stdin=fileToTranslate, stdout=PIPE).communicate()[0] | |
304 | else: | |
305 | return Popen(['apertium', '-f', fmt, '-u', '-d', modesdir, mode], | |
306 | stdin=fileToTranslate, stdout=PIPE).communicate()[0] |
3 | 3 | import sqlite3, re, os, logging |
4 | 4 | from subprocess import Popen, PIPE |
5 | 5 | from datetime import datetime |
6 | import threading | |
7 | from collections import defaultdict | |
8 | 6 | |
9 | 7 | iso639Codes = {"abk":"ab","aar":"aa","afr":"af","aka":"ak","sqi":"sq","amh":"am","ara":"ar","arg":"an","hye":"hy","asm":"as","ava":"av","ave":"ae","aym":"ay","aze":"az","bam":"bm","bak":"ba","eus":"eu","bel":"be","ben":"bn","bih":"bh","bis":"bi","bos":"bs","bre":"br","bul":"bg","mya":"my","cat":"ca","cha":"ch","che":"ce","nya":"ny","zho":"zh","chv":"cv","cor":"kw","cos":"co","cre":"cr","hrv":"hr","ces":"cs","dan":"da","div":"dv","nld":"nl","dzo":"dz","eng":"en","epo":"eo","est":"et","ewe":"ee","fao":"fo","fij":"fj","fin":"fi","fra":"fr","ful":"ff","glg":"gl","kat":"ka","deu":"de","ell":"el","grn":"gn","guj":"gu","hat":"ht","hau":"ha","heb":"he","her":"hz","hin":"hi","hmo":"ho","hun":"hu","ina":"ia","ind":"id","ile":"ie","gle":"ga","ibo":"ig","ipk":"ik","ido":"io","isl":"is","ita":"it","iku":"iu","jpn":"ja","jav":"jv","kal":"kl","kan":"kn","kau":"kr","kas":"ks","kaz":"kk","khm":"km","kik":"ki","kin":"rw","kir":"ky","kom":"kv","kon":"kg","kor":"ko","kur":"ku","kua":"kj","lat":"la","ltz":"lb","lug":"lg","lim":"li","lin":"ln","lao":"lo","lit":"lt","lub":"lu","lav":"lv","glv":"gv","mkd":"mk","mlg":"mg","msa":"ms","mal":"ml","mlt":"mt","mri":"mi","mar":"mr","mah":"mh","mon":"mn","nau":"na","nav":"nv","nob":"nb","nde":"nd","nep":"ne","ndo":"ng","nno":"nn","nor":"no","iii":"ii","nbl":"nr","oci":"oc","oji":"oj","chu":"cu","orm":"om","ori":"or","oss":"os","pan":"pa","pli":"pi","fas":"fa","pol":"pl","pus":"ps","por":"pt","que":"qu","roh":"rm","run":"rn","ron":"ro","rus":"ru","san":"sa","srd":"sc","snd":"sd","sme":"se","smo":"sm","sag":"sg","srp":"sr","gla":"gd","sna":"sn","sin":"si","slk":"sk","slv":"sl","som":"so","sot":"st","azb":"az","spa":"es","sun":"su","swa":"sw","ssw":"ss","swe":"sv","tam":"ta","tel":"te","tgk":"tg","tha":"th","tir":"ti","bod":"bo","tuk":"tk","tgl":"tl","tsn":"tn","ton":"to","tur":"tr","tso":"ts","tat":"tt","twi":"tw","tah":"ty","uig":"ug","ukr":"uk","urd":"ur","uzb":"uz","ven":"ve","vie":"vi","vol":"vo","wln":"wa","cym":"cy","wol":"wo","fry":"fy","xho":"xh","yid":"yi","yor":"yo","zha":"za","zul":"zu", "hbs":"sh", "arg":"an", "pes":"fa"} |
10 | 8 | ''' |
14 | 12 | JSON.stringify(out); |
15 | 13 | ''' |
16 | 14 | |
15 | # TODO: does this need a lock? | |
17 | 16 | langNamesDBConn = None |
18 | missingFreqsDBConn = None | |
19 | 17 | |
20 | 18 | def toAlpha2Code(code): |
21 | 19 | if '_' in code: |
68 | 66 | output[languageResult[2]] = languageResult[3] |
69 | 67 | return output |
70 | 68 | |
71 | def noteUnknownToken(token, pair, dbPath): | |
72 | global missingFreqsDBConn | |
73 | if not missingFreqsDBConn: | |
74 | missingFreqsDBConn = sqlite3.connect(dbPath) | |
75 | c = missingFreqsDBConn.cursor() | |
76 | ||
77 | c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))') | |
78 | c.execute('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + 1)', {'pair': pair, 'token': token}) | |
79 | missingFreqsDBConn.commit() | |
80 | ||
81 | ||
82 | unknownLock = threading.RLock() | |
83 | unknownWords = defaultdict(lambda: defaultdict(lambda: 0)) | |
84 | unknownCount = 0 | |
85 | ||
86 | def inMemoryUnknownToken(token, pair, dbPath, limit): | |
87 | global unknownLock | |
88 | global unknownCount | |
89 | global unknownWords | |
90 | ||
91 | try: | |
92 | unknownLock.acquire() | |
93 | unknownWords[pair][token] += 1 | |
94 | unknownCount += 1 | |
95 | ||
96 | if unknownCount > limit: | |
97 | flushUnknownWords(dbPath) | |
98 | unknownWords.clear() | |
99 | unknownCount = 0 | |
100 | finally: | |
101 | unknownLock.release() | |
102 | ||
103 | ||
104 | def flushUnknownWords(dbPath): | |
105 | global unknownWords | |
106 | global missingFreqsDBConn | |
107 | ||
108 | timeBefore = datetime.now() | |
109 | ||
110 | if not missingFreqsDBConn: | |
111 | missingFreqsDBConn = sqlite3.connect(dbPath) | |
112 | ||
113 | c = missingFreqsDBConn.cursor() | |
114 | c.execute("PRAGMA synchronous = NORMAL") | |
115 | ||
116 | c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))') | |
117 | ||
118 | c.executemany('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + :amount)', | |
119 | ({'pair': pair, 'token': token, 'amount' : unknownWords[pair][token]} for pair in unknownWords for token in unknownWords[pair])) | |
120 | ||
121 | missingFreqsDBConn.commit() | |
122 | ||
123 | ms = timedeltaToMilliseconds(datetime.now() - timeBefore) | |
124 | logging.info("\tSaving %s unknown words to the DB (%s ms)", unknownCount, ms) | |
125 | ||
126 | def closeDb(): | |
127 | global missingFreqsDBConn | |
128 | if not missingFreqsDBConn: | |
129 | logging.warning('no connection') | |
130 | return | |
131 | logging.warning('closing connection') | |
132 | missingFreqsDBConn.close() | |
133 | missingFreqsDBConn = False | |
134 | 69 | |
135 | 70 | def apertium(input, dir, mode, formatting=None): |
136 | 71 | p1 = Popen(['echo', input], stdout=PIPE) |
72 | print(input, dir, mode, formatting) | |
137 | 73 | if formatting: |
138 | 74 | p2 = Popen(['apertium', '-d . -f %s' % formatting, mode], stdin=p1.stdout, stdout=PIPE, cwd=dir) |
139 | 75 | else: |
140 | p2 = Popen(['apertium', '-d .', mode], stdin=p1.stdout, stdout=PIPE, cwd=dir) | |
76 | p2 = Popen(['apertium', '-d {}'.format(dir), mode], stdin=p1.stdout, stdout=PIPE) | |
141 | 77 | p1.stdout.close() |
142 | 78 | output = p2.communicate()[0].decode('utf-8') |
143 | 79 | return output |
149 | 85 | output = p2.communicate()[0].decode('utf-8') |
150 | 86 | return output |
151 | 87 | |
152 | def removeLast(query, analyses): | |
88 | def removeDotFromDeformat(query, analyses): | |
89 | """When using the txt format, a dot is added at EOF (also, double line | |
90 | breaks) if the last part of the query isn't itself a dot""" | |
153 | 91 | if not query[-1] == '.': |
154 | 92 | return analyses[:-1] |
155 | 93 | else: |
169 | 107 | |
170 | 108 | def getCoverage(text, mode, modeDir, penalize=False): |
171 | 109 | analysis = apertium(text, mode, modeDir) |
172 | lexicalUnits = removeLast(text, re.findall(r'\^([^\$]*)\$([^\^]*)', analysis)) | |
110 | lexicalUnits = removeDotFromDeformat(text, re.findall(r'\^([^\$]*)\$([^\^]*)', analysis)) | |
173 | 111 | analyzedLexicalUnits = list(filter(lambda x: not x[0].split('/')[1][0] in '*&#', lexicalUnits)) |
174 | 112 | if len(lexicalUnits) and not penalize: |
175 | 113 | return len(analyzedLexicalUnits) / len(lexicalUnits) |
188 | 126 | if lang in analyzers: |
189 | 127 | modeInfo = analyzers[lang] |
190 | 128 | analysis = apertium(query, modeInfo[0], modeInfo[1]) |
191 | morph_lexicalUnits = removeLast(query, re.findall(lexicalUnitRE, analysis)) | |
129 | morph_lexicalUnits = removeDotFromDeformat(query, re.findall(lexicalUnitRE, analysis)) | |
192 | 130 | outputs['morph'] = [lexicalUnit.split('/')[1:] for lexicalUnit in morph_lexicalUnits] |
193 | 131 | outputs['morph_inputs'] = [stripTags(lexicalUnit.split('/')[0]) for lexicalUnit in morph_lexicalUnits] |
194 | 132 | else: |
198 | 136 | if lang in taggers: |
199 | 137 | modeInfo = taggers[lang] |
200 | 138 | analysis = apertium(query, modeInfo[0], modeInfo[1]) |
201 | tagger_lexicalUnits = removeLast(query, re.findall(lexicalUnitRE, analysis)) | |
139 | tagger_lexicalUnits = removeDotFromDeformat(query, re.findall(lexicalUnitRE, analysis)) | |
202 | 140 | outputs['tagger'] = [lexicalUnit.split('/')[1:] if '/' in lexicalUnit else lexicalUnit for lexicalUnit in tagger_lexicalUnits] |
203 | 141 | outputs['tagger_inputs'] = [stripTags(lexicalUnit.split('/')[0]) for lexicalUnit in tagger_lexicalUnits] |
204 | 142 | else: |
231 | 169 | return |
232 | 170 | |
233 | 171 | return (outputs, tagger_lexicalUnits, morph_lexicalUnits) |
234 | ||
172 | ||
235 | 173 | def getTimestamp(): |
236 | 174 | return datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] |
237 | 175 | |
238 | def timedeltaToMilliseconds(td): | |
239 | return td.days*86400000 + td.seconds*1000 + int(td.microseconds/1000) | |
240 | ||
241 | 176 | def scaleMtLog(status, time, tInfo, key, length): |
242 | logging.getLogger('scale-mt').error("%s %s %s html %s %s %s %s %s %s", | |
243 | getTimestamp(), | |
244 | timedeltaToMilliseconds(time), | |
245 | tInfo.langpair, | |
246 | key, | |
247 | tInfo.ip, | |
248 | tInfo.referer, | |
249 | status, | |
250 | length, | |
251 | 'null' | |
252 | ) | |
177 | logging.getLogger('scale-mt').error("%s %s %s html %s %s %s %s %s %s", | |
178 | getTimestamp(), | |
179 | timedeltaToMilliseconds(time), | |
180 | tInfo.langpair, | |
181 | key, | |
182 | tInfo.ip, | |
183 | tInfo.referer, | |
184 | status, | |
185 | length, | |
186 | 'null' | |
187 | ) | |
253 | 188 | |
254 | 189 | |
255 | 190 | class TranslationInfo: |