Commit d7e259d5c6ceaeb355fbdc8a17849ea4956a2343 - apertium-apy

Imported Upstream version 0.9.1~r343 Tino Didriksen 7 years ago

21 changed file(s) with 3883 addition(s) and 334 deletion(s). Raw diff Collapse all Expand all

-0

.gitignore less more

	0	/t/apertium-apy.err
	1	/t/apertium-apy.log
	2	/__pycache__/
	3	/langNames.db

+33

-0

.travis.yml less more

	0	language: python
	1
	2	python:
	3	- "3.2"
	4	- "3.3"
	5	- "3.4"
	6	- "3.5"
	7	- "3.5-dev"
	8	- "nightly"
	9
	10	install: pip3 install tornado
	11
	12	before_script:
	13	- wget http://apertium.projectjj.com/apt/install-nightly.sh -O - \| sudo bash
	14	- sudo apt-get -f install apertium-all-dev
	15	- sudo apt-get -f install apertium-sme-nob apertium-es-en
	16	- svn co https://svn.code.sf.net/p/apertium/svn/languages/apertium-nno /tmp/languages/apertium-nno
	17	- ( cd /tmp/languages/apertium-nno && ./autogen.sh && make -j2 )
	18
	19	script:
	20	- NONPAIRS=/tmp/languages t/run-tests
	21
	22
	23	notifications:
	24	irc:
	25	channels:
	26	- secure: "jMRqT7lndd2vTXON34HHdNQ6v+DSOSUQZwKXsJytw9F55pPPS4z+76vTChHKdNcp4nV5nWhCarZR2X4bK7qWO5i25XIXsffFm4TK/XWDdZgLKHCS431mzcSVrYYASC0nn15HgGAl/H6dMksNThuV5gaItSPcKNXC9amBYLbTeSt/H+rxDBuPs+m7yy049EySTWjUoFacel7HDoTEXQyl82Ks5I0oJM+ErgS1VAe6RPiXIH+eBGbPT0YoVYUS1WoKSZYaLhLZ/jOLDnnTiFhTn/f8+uZrbAfLGWzkp9/NbyMCByQFH48HFzjudOD6BgxRumJaDFhImrKrDvPpMulXB1MLl+JD9mK+lioLaqUiUu+2wkBJ90bvOCNqhKLR94upev7Skg52n96Wc91EfiVxuwiLDJOzwFAbZ9VaziNI/Ld3y1qHO84spV7R7AWFhI34xGNkAOCJ1sZZWcH2rGFjsldLNPHeyhNBaZhxGdGdVvrbTm0jbR4G0iD54BLK1IPuJt/eaETWrAnE2XgDnJ3PE9JKtFOe5tpuhF/R9s8qFYGEVmG9SnSxqJax7K6XcQ6utla10qFX0mR5Ub8S+Ryu9fN5+g+U2fCrfJrH44RhDCz961SGcRYVaWDITtDTeAtBGP6G0jOkNv8yi21ha50y4uDxvXq1ETfoHLeYwzzCHmU="
	27	on_failure: always
	28	on_success: never
	29	# The irc channel is encrypted for goavki/apertium-apy, so build notifications from forks won't show up on the IRC channel
	30	# Encrypt with:
	31	# $ gem install --user-install travis
	32	# $ ~/.gem/ruby/*/bin/travis encrypt -r goavki/apertium-apy 'chat.freenode.net#apertium'

+1468

-0

ChangeLog less more

	0	See NEWS for a per-release summary, this is just the git log.
	1
	2	2016-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	3
	4	* tools/sanity-test-apy.py: test swe-nor
	5
	6	2016-05-31 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	7
	8	* : commit 72eb4dad68cb8d4a12f2d74a2395cafec3b7af96 Author: Kevin
	9	Brubeck Unhammer <unhammer@fsfe.org> Date: Tue May 31 15:37:53
	10	2016 +0200
	11
	12	2016-05-27 Kevin Brubeck Unhammer <unhammer+dill@mm.st>
	13
	14	* : Merge pull request #21 from Putti/variable Remove unused variable
	15
	16	2016-05-27 Joonas Kylmälä <j.kylmala@gmail.com>
	17
	18	* modeSearch.py: Remove unused variable
	19
	20	2016-05-27 Joonas Kylmälä <j.kylmala@gmail.com>
	21
	22	* modeSearch.py: Move mode logging to it's own function
	23
	24	2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	25
	26	* .travis.yml: travis: lint
	27
	28	2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	29
	30	* .travis.yml: travis: irc notify
	31
	32	2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	33
	34	* .gitignore: ignores
	35
	36	2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	37
	38	* .gitignore, servlet.py, systemd.py, tools/systemd/apy.service:
	39	systemd watchdog; only used if started by systemd and the .service file has e.g. WatchdogSec=10s
	40
	41	2016-05-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	42
	43	* servlet.py: don't try to close missingFreqsDb unless open
	44
	45	2016-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	46
	47	* translation_py32.py: rm arbitrary diff from translation.py
	48
	49	2016-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	50
	51	* translation_py32.py: merge 8f8ffda to py32 as well
	52
	53	2016-05-14 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	54
	55	* translation.py: allow space around '\|' in .mode files really, we should just make all language pairs install modes.xml
	56	though
	57
	58	2016-04-29 Kevin Brubeck Unhammer <unhammer+dill@mm.st>
	59
	60	* : Merge pull request #16 from wolfgangth/master Update apertiumlangs.sql
	61
	62	2016-04-29 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	63
	64	* missingdb.py: missingdb: close cursor each time as well
	65
	66	2016-04-29 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	67
	68	* missingdb.py, servlet.py, util.py: rewrote missingFreqs-handling
	69	to fix locking in its own class, all relevant variables kept there, only one
	70	function that commits, lock at the right spot
	71
	72	2016-04-03 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	73
	74	* servlet.py: getPairOrError callers expect None if error should close #14
	75
	76	2016-03-01 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	77
	78	* tools/sanity-test-apy.py: dan→swe in tests
	79
	80	2016-02-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	81
	82	* ChangeLog, NEWS: NEWS with readable summaries; git2cl > ChangeLog should close #13
	83
	84	2016-02-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	85
	86	* servlet.py: ANN: New APY release! New features include: - version number
	87
	88	2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	89
	90	* servlet.py: usecounts rather useless without uptime
	91
	92	2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	93
	94	* servlet.py: hopefully more readable, informative /stats
	95
	96	2016-02-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	97
	98	* servlet.py: send len(q), not q to logAfterTranslation the joy of dynamic typing
	99
	100	2016-02-03 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	101
	102	* : commit 57960f74a878d65cd1b500c79ac8f6da5b6a9c44 Author: Kevin
	103	Brubeck Unhammer <unhammer@fsfe.org> Date: Wed Feb 3 11:33:10 2016
	104	+0100
	105
	106	2016-02-02 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	107
	108	* : commit 0fe763d71eb00b6dd0c977c9813301d9fa621955 Author: Kevin
	109	Brubeck Unhammer <unhammer@fsfe.org> Date: Tue Feb 2 10:42:19 2016
	110	+0100
	111
	112	2016-02-01 Kevin Brubeck Unhammer <unhammer+dill@mm.st>
	113
	114	* : Merge pull request #11 from danielmartinez/master Add option for unknown marks in translateDoc
	115
	116	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	117
	118	* servlet.py: no 'yield from' in py32
	119
	120	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	121
	122	* : commit 77b2b1b1ff8923fef392d7bba0f53182a1a7cc09 Author: Kevin
	123	Brubeck Unhammer <unhammer@fsfe.org> Date: Mon Jan 18 11:03:14
	124	2016 +0100
	125
	126	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	127
	128	* servlet.py: fix: /translatePage shouldn't block while fetching the
	129	page
	130
	131	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	132
	133	* servlet.py: let translatepagehandler use more of translatehandler
	134
	135	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	136
	137	* servlet.py, util.py: less redundant scalemt calls (get_status will
	138	DTRT if called after send_error)
	139
	140	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	141
	142	* t/run-tests: test for 400's
	143
	144	2016-01-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	145
	146	* t/run-tests: run_tests { run_test () }
	147
	148	2016-01-17 Sai Vineet <saivineet89@gmail.com>
	149
	150	* : commit 5ffe1ce2a1fd5afe2825c31a6cb9357ff8b42ca3 Author: Sai
	151	Vineet <saivineet89@gmail.com> Date: Sun Jan 17 16:05:30 2016
	152	+0530
	153
	154	2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	155
	156	* servlet.py: warnings consistency
	157
	158	2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	159
	160	* translation.py: don't split if we're not going to use it
	161
	162	2016-01-17 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	163
	164	* tools/sanity-test-apy.py: arg-cat tests
	165
	166	2016-01-17 Sai Vineet <saivineet89@gmail.com>
	167
	168	* servlet.py: Add more stats
	169
	170	2016-01-16 Kevin Brubeck Unhammer <unhammer+dill@mm.st>
	171
	172	* : Merge pull request #9 from sabertooth-cat/master Add page translation capability :) (waiting a bit with the html-side
	173	of things)
	174
	175	2016-01-15 E <ethanchi@ethanchi.attlocal.net>
	176
	177	* servlet.py, translation.py: Fix splitting error
	178
	179	2016-01-15 E <ethanchi@ethanchi.attlocal.net>
	180
	181	* servlet.py, translation.py: Subclassed TranslateHandler
	182
	183	2016-01-15 E <ethanchi@ethanchi.attlocal.net>
	184
	185	* servlet.py: Modified TranslatePageHandler to subclass
	186	TranslateHandler
	187
	188	2016-01-15 E <ethanchi@ethanchi.attlocal.net>
	189
	190	* servlet.py, translation.py: Modified page translation feature to
	191	be asynchronous
	192
	193	2016-01-15 E <ethanchi@ethanchi.attlocal.net>
	194
	195	* : Fix conflict in servlet.py
	196
	197	2016-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	198
	199	* servlet.py, translation.py, translation_py32.py: support both py32
	200	and py35+ by splitting translation.py into two modules (try-except won't work,
	201	since py32 calls return-from-gen a SyntaxError on _reading_ the
	202	file; StopIteration deprected in py35)
	203
	204	2016-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	205
	206	* README.org: travis: fix link
	207
	208	2016-01-13 E <ethanchi@ethanchi.attlocal.net>
	209
	210	* servlet.py, translation.py: Add page translation capability
	211
	212	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	213
	214	* t/run-tests: show logs on error; clear before run
	215
	216	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	217
	218	* .travis.yml: travis: try to stay in source dir
	219
	220	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	221
	222	* .travis.yml: travis: no --user
	223
	224	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	225
	226	* README, README.org: travis img
	227
	228	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	229
	230	* .travis.yml: travis: lang python, pip install tornado
	231
	232	2016-01-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	233
	234	* .travis.yml: travis
	235
	236	2016-01-12 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	237
	238	* tools/sanity-test-apy.py: test for ara-mlt_translit
	239
	240	2016-01-12 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	241
	242	* tools/sanity-test-apy.py: test for dan-nno
	243
	244	2016-01-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	245
	246	* translation.py: comment the return-from-generator, since it looks
	247	a bit weird
	248
	249	2016-01-05 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	250
	251	* translation.py: Made compatible with Python <=3.3 Squashed from commit dd129f7ce3fe996566b97c3d90b5480e45c1fb8d Author:
	252	jatinluthra14 <jatinluthra14@gmail.com> Date: Tue Jan 5 20:05:22
	253	2016 +0530 see http://stackoverflow.com/a/16780113/69663
	254
	255	2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	256
	257	* t/README: how to get data needed for t/run-tests
	258
	259	2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	260
	261	* t/run-tests: better installed-check
	262
	263	2016-01-04 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	264
	265	* t/run-tests: allow overriding data paths before running and better errors if data is simply not installed
	266
	267	2015-12-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	268
	269	* tools/apertium-viewer.html: translates as you type, gci work by
	270	ethan a chi
	271
	272	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	273
	274	* tools/apertiumlangs.sql: eus→eu
	275
	276	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	277
	278	* tools/apertiumlangs.sql: more crh
	279
	280	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	281
	282	* tools/apertiumlangs.sql: a few more kaz fixes
	283
	284	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	285
	286	* tools/apertiumlangs.sql: agh, kazakh
	287
	288	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	289
	290	* tools/apertiumlangs.sql: more langs
	291
	292	2015-11-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	293
	294	* tools/apertiumlangs.sql, tools/turkic.sql, util.py: some updates,
	295	including crh name in some Turkic languages
	296
	297	2015-10-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	298
	299	* translation.py: flush lrx-proc
	300
	301	2015-10-04 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	302
	303	* translation.py: don't keep pipes open for ca-oc@aran pair Seems this one keeps getting clogged, presumably transfer isn't
	304	outputting some <b pos=N/>
	305
	306	2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	307
	308	* t/run-tests: allow passing port as first arg
	309
	310	2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	311
	312	* translation.py: use flushing even if hfst-proc (lrx-proc still
	313	TODO)
	314
	315	2015-09-25 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	316
	317	* t/run-tests: don't run tests if port taken
	318
	319	2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	320
	321	* t/run-tests: extremely simple regression tests
	322
	323	2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	324
	325	* servlet.py: -old debug output
	326
	327	2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	328
	329	* servlet.py, util.py: much simplified GenerateHandler as well using
	330	translateSimple +cleanup
	331
	332	2015-08-21 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	333
	334	* servlet.py: much simplified AnalyzeHandler using translateSimple
	335
	336	2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com>
	337
	338	* tools/apertiumlangs.sql: some Turkic name defaults
	339
	340	2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com>
	341
	342	* tools/apertiumlangs.sql: a few qaraqalpaq additions
	343
	344	2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com>
	345
	346	* tools/apertiumlangs.sql: copy-pasta fix
	347
	348	2015-08-21 Jonathan Washington <jonathan.north.washington@gmail.com>
	349
	350	* tools/apertiumlangs.sql: uzbek names for some Turkic languages
	351
	352	2015-08-20 Jonathan Washington <jonathan.north.washington@gmail.com>
	353
	354	* tools/apertiumlangs.sql: some language names in Kyrgyz
	355
	356	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	357
	358	* translation.py: tornado3: use gen.Task wrapper for translateSimple
	359	as well
	360
	361	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	362
	363	* toro.py: add a license header for toro
	364
	365	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	366
	367	* translation.py: read_until requires a callback in tornado3,
	368	gen.Task wrapper turns callbacks into yieldables
	369
	370	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	371
	372	* servlet.py, translation.py: use toro if no tornado.locks
	373
	374	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	375
	376	* toro.py: s/Queue/queue for py3
	377
	378	2015-08-13 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	379
	380	* toro.py: __init__.py from 524fbe5b8f of
	381	https://github.com/ajdavis/toro/
	382
	383	2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	384
	385	* servlet.py: doc more
	386
	387	2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	388
	389	* servlet.py, translation.py: --restart-pipe-after it has had that
	390	many requests Pipelines to be shut down now put in a holding area until there are
	391	no more users of that object, so we can restart high-traffic
	392	pipelines
	393
	394	2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	395
	396	* servlet.py: --min-pipes-per-pair implemented e.g. to keep at least one pipeline per pair open
	397
	398	2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	399
	400	* servlet.py, translation.py: implement --max-pipes-per-pair and
	401	--max-users-per-pipe Pipeline is now an object, subclasses: Simple~ and Flushing~ locks
	402	and use-count initialised in Pipeline FlushingPipeline opens pipes
	403	on init and closes on del translate is a method of a Pipeline http server has a list of pipelines per pair, default max-len 1, but
	404	e.g. -i3 does help with high loads even on my 3-core test machine
	405
	406	2015-08-11 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	407
	408	* servlet.py, translation.py: use tornado 4.2 instead of toro so that was easy
	409
	410	2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	411
	412	* servlet.py: bump lastUsage _before_ translating, decrease chances
	413	of concurrent requests running shutdownPair on us --max-idle-secs is still a bit dangerous in the case where a long
	414	request takes more time to process than max_idle_secs, but that
	415	value should hopefully be larger than the time taken to translate
	416	the largest possible request … Or we can keep a semaphore for each pair counting how many requests
	417	are waiting for it, and only shutdown if none are (but should switch
	418	from toro to tornado.lock first)
	419
	420	2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	421
	422	* servlet.py: default to -j1 since we don't want to default to
	423	eating all your ram Use -j0 (or -j -- ) to run one http server per core, but note that
	424	for each http server you will have all available language pairs in
	425	memory (and each pair uses around 7 processes).
	426
	427	2015-08-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	428
	429	* translation.py: put the dot/space found when splitting long
	430	strings at end of prev (not beginning of next) string
	431
	432	2015-08-01 Xavi Ivars <xavi.ivars@gmail.com>
	433
	434	* translation.py: [apy] Fixing bug incorrectly splitting short
	435	strings
	436
	437	2015-08-01 Xavi Ivars <xavi.ivars@gmail.com>
	438
	439	* translation.py: [apy] Fixing bug incorrectly splitting short
	440	strings
	441
	442	2015-07-11 Jonathan Washington <jonathan.north.washington@gmail.com>
	443
	444	* tools/apertiumlangs.sql: some minor fixes to Turkic language names
	445
	446	2015-07-03 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	447
	448	* servlet.py: bail out if it doesn't seem we have a utf-8 locale
	449
	450	2015-04-27 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	451
	452	* tools/sanity-test-apy.py: skip some
	453
	454	2015-04-27 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	455
	456	* translation.py: each arg gets quoted individually (did this ever
	457	work?)
	458
	459	2015-03-19 Xavi Ivars <xavi.ivars@gmail.com>
	460
	461	* servlet.py, tools/apertium-viewer.html, translation.py: [APY]
	462	Adding /pipedebug mode
	463
	464	2014-12-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	465
	466	* tools/apertium-viewer.html: apertium-viewer.html by GCI student
	467	Rap /pipedebug not implemented in apy yet, so this is only using a fake
	468	hardcoded response
	469	http://www.google-melange.com/gci/task/view/google/gci2014/6466660768677888
	470
	471	2014-11-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	472
	473	* translation.py: hardbreakFn now sends up to PIPE_BUF bytes;
	474	rush-hour test checks if lock.locked()
	475
	476	2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	477
	478	* servlet.py: / redirect to wiki/apy
	479
	480	2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	481
	482	* servlet.py, translation.py: select/poll-style translation instead
	483	of threading We now yield a Future from a PipeIOStream in translate, using
	484	tornado.process.Subprocess instead of Popen. Depends on toro for the locking, so do "pip3 install toro"
	485	(unfortunately not in Debian yet). Much cleanup, e.g. startPipeline really belongs in translation.py;
	486	pipeline_cmds now contains the do_flush bool and commands string. Shows vmsize increases if verbosity>1.
	487
	488	2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	489
	490	* tools/README: deps for langNamesScraper
	491
	492	2014-11-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	493
	494	* README, servlet.py: servlet doesn't actually use lxml.etree
	495
	496	2014-11-05 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	497
	498	* COPYING: copying
	499
	500	2014-11-04 Xavi Ivars <xavi.ivars@gmail.com>
	501
	502	* servlet.py, util.py: [apy] Better handling DB
	503
	504	2014-11-04 Xavi Ivars <xavi.ivars@gmail.com>
	505
	506	* servlet.py: Only close de DB once
	507
	508	2014-10-21 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	509
	510	* servlet.py: let missingFreqs.db=None by default since it really
	511	shouldn't be required for apy to work
	512
	513	2014-10-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	514
	515	* tools/sanity-test-apy.py: test hin-urd, hbs-eng; timeout 15
	516
	517	2014-10-17 Xavi Ivars <xavi.ivars@gmail.com>
	518
	519	* servlet.py: [APY] New mode -M to keep unknown words in memory
	520	until a threshold is achieved (fix indent)
	521
	522	2014-10-15 Xavi Ivars <xavi.ivars@gmail.com>
	523
	524	* util.py: [APY] New mode -M to keep unknown words in memory until a
	525	threshold is achieved (missing file)
	526
	527	2014-10-15 Xavi Ivars <xavi.ivars@gmail.com>
	528
	529	* servlet.py: [APY] New mode -M to keep unknown words in memory
	530	until a threshold is achieved
	531
	532	2014-10-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	533
	534	* Makefile: rm -f
	535
	536	2014-10-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	537
	538	* Makefile: a clean target
	539
	540	2014-10-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	541
	542	* translation.py: https://sourceforge.net/p/apertium/tickets/45/
	543	don't flush with lrx-proc yet
	544
	545	2014-10-01 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	546
	547	* tools/sanity-test-apy.py: exit(1) if anything failed
	548
	549	2014-09-16 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	550
	551	* servlet.py: simply remove async_callback
	552	https://github.com/tornadoweb/tornado/blob/branch3.1/tornado/web.py#L1036says async_callback is Obsolete and unnecessary since Tornado 1.1
	553
	554	2014-09-07 Xavi Ivars <xavi.ivars@gmail.com>
	555
	556	* util.py: [fix] Real IP behind a proxy
	557
	558	2014-08-18 Sushain Cherivirala <sushain@skc.name>
	559
	560	* servlet.py: odt support
	561
	562	2014-08-05 Sushain Cherivirala <sushain@skc.name>
	563
	564	* servlet.py: check extracted file list if detected as
	565	application/zip
	566
	567	2014-08-03 Sushain Cherivirala <sushain@skc.name>
	568
	569	* servlet.py: break
	570
	571	2014-08-01 Sushain Cherivirala <sushain@skc.name>
	572
	573	* servlet.py: mimetype > xdg-mime > file
	574
	575	2014-08-01 Sushain Cherivirala <sushain@skc.name>
	576
	577	* servlet.py, translation.py: fixes
	578
	579	2014-07-28 Sushain Cherivirala <sushain@skc.name>
	580
	581	* servlet.py: Better file command parsing
	582
	583	2014-07-28 Sushain Cherivirala <sushain@skc.name>
	584
	585	* servlet.py: Switch errors
	586
	587	2014-07-27 Sushain Cherivirala <sushain@skc.name>
	588
	589	* servlet.py: Add download headers to /translateDoc
	590
	591	2014-07-27 Sushain Cherivirala <sushain@skc.name>
	592
	593	* servlet.py, translation.py: Use file command to detect MIME type
	594
	595	2014-07-27 Sushain Cherivirala <sushain@skc.name>
	596
	597	* servlet.py, translation.py: Functional document translation --
	598	still needs threading and logging
	599
	600	2014-07-26 Sushain Cherivirala <sushain@skc.name>
	601
	602	* servlet.py, translation.py: cleanup
	603
	604	2014-07-26 Sushain Cherivirala <sushain@skc.name>
	605
	606	* servlet.py: Start on document translation support
	607
	608	2014-07-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	609
	610	* tools/apertiumlangs.sql: better ʻ
	611
	612	2014-07-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	613
	614	* tools/apertiumlangs.sql: a couple kaa names
	615
	616	2014-07-24 Sushain Cherivirala <sushain@skc.name>
	617
	618	* servlet.py: '?mode' -> '?lang', '/coverage' -> '/calcCoverage' NOT
	619	BACKWARDS COMPATIBILITY
	620
	621	2014-07-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	622
	623	* servlet.py: possibly stop complaining about unicode chars in
	624	source code
	625
	626	2014-07-06 Xavi Ivars <xavi.ivars@gmail.com>
	627
	628	* servlet.py, translation.py: Code review feedback
	629
	630	2014-07-06 Xavi Ivars <xavi.ivars@gmail.com>
	631
	632	* keys.py, servlet.py, translation.py, util.py: [Softcatalà]
	633	Integrate ScaleMT-like logs into APY
	634
	635	2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	636
	637	* servlet.py: pretty sure a star before a space can't be an unknown
	638	word mark, that'd only work if spaces were in <alphabet> this should hopefully fix the overly greedy noteUnknownTokens
	639
	640	2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	641
	642	* tools/sanity-test-apy.py: some output while running so it doesn't
	643	seem like we've stalled
	644
	645	2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	646
	647	* tools/langNamesScraper.py: s/unicode/langNames/
	648
	649	2014-06-28 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	650
	651	* Makefile: WARNING: unicode.db now called langNames.db (fix your
	652	apy startup scripts)
	653
	654	2014-06-27 Sushain Cherivirala <sushain@skc.name>
	655
	656	* Makefile, servlet.py, util.py: Add missing freqs database (ticket
	657	#30)
	658
	659	2014-06-26 Sushain Cherivirala <sushain@skc.name>
	660
	661	* servlet.py, util.py: formatting/style changes
	662
	663	2014-06-19 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	664
	665	* tools/systemd/README, tools/systemd/apy.service: systemd example
	666	service file
	667
	668	2014-06-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	669
	670	* servlet.py, translation.py: use -f html-noent instead of -f html
	671
	672	2014-06-18 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	673
	674	* servlet.py: rename confusing fn name
	675
	676	2014-06-17 Sushain Cherivirala <sushain@skc.name>
	677
	678	* servlet.py: Make all modes accept ISO 639-1 codes
	679
	680	2014-06-17 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	681
	682	* servlet.py: /listPairs?include_deprecated_codes to include
	683	two-letter codes in output
	684
	685	2014-06-17 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	686
	687	* servlet.py: silently fail to fail if client passes two-letter
	688	codes to /translate
	689
	690	2014-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	691
	692	* translation.py: -unused fluff
	693
	694	2014-06-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	695
	696	* servlet.py: stripUnknownMarks in a fn
	697
	698	2014-06-09 Sushain Cherivirala <sushain@skc.name>
	699
	700	* servlet.py: sub-optimal implementation of markUnknown for
	701	/translate
	702
	703	2014-06-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	704
	705	* tools/apertiumlangs.sql: -چا to چە
	706
	707	2014-06-05 Jonathan Washington <jonathan.north.washington@gmail.com>
	708
	709	* tools/apertiumlangs.sql: db updates
	710
	711	2014-06-04 Jonathan Washington <jonathan.north.washington@gmail.com>
	712
	713	* tools/turkic.sql: uy>ug
	714
	715	2014-05-30 Jonathan Washington <jonathan.north.washington@gmail.com>
	716
	717	* tools/apertiumlangs.sql: reversed
	718
	719	2014-05-30 Jonathan Washington <jonathan.north.washington@gmail.com>
	720
	721	* tools/apertiumlangs.sql, tools/turkic.sql: updated databases or
	722	something
	723
	724	2014-05-15 Sushain Cherivirala <sushain@skc.name>
	725
	726	* Makefile: don't try deleting non-existent unicode.db
	727
	728	2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	729
	730	* servlet.py: def isFlushable in case other cmds are unflushable as
	731	well
	732
	733	2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	734
	735	* servlet.py, translation.py: No flushing for hfst-proc pairs
	736	(restart on each translation) Still uses a lock, so should at least not spawn a several processes
	737	per pipeline at once. Entries in BaseHandler.pipelines now have a third arg do_flush Closes https://sourceforge.net/p/apertium/tickets/22/
	738
	739	2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	740
	741	* servlet.py: start pipeline (if necessary) in the _worker, since
	742	this is the function that starts translation
	743
	744	2014-05-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	745
	746	* servlet.py: cleanPairs/notePairUsage only used by /translate,
	747	don't need to be in BaseHandler
	748
	749	2014-05-13 Sushain Cherivirala <sushain@skc.name>
	750
	751	* servlet.py: attempt to fix gen.coroutine import error
	752
	753	2014-05-13 Sushain Cherivirala <sushain@skc.name>
	754
	755	* servlet.py: python 3.3. doesn't like generators with return None
	756
	757	2014-05-09 Sushain Cherivirala <sushain@skc.name>
	758
	759	* servlet.py: 400 -> 408 HTTP errors
	760
	761	2014-05-09 Sushain Cherivirala <sushain@skc.name>
	762
	763	* servlet.py: Enable CORS
	764
	765	2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	766
	767	* servlet.py: json error responses, closes
	768	https://sourceforge.net/p/apertium/tickets/19/
	769
	770	2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	771
	772	* servlet.py: fixed: /listPairs?q=adsf was treated as /listPairs
	773
	774	2014-05-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	775
	776	* servlet.py: enable_pretty_logging doesn't seem to work if
	777	logging's been used once already
	778
	779	2014-05-08 Sushain Cherivirala <sushain@skc.name>
	780
	781	* servlet.py: Make /analyze, /generate, /perWord and /coverage
	782	non-blocking
	783
	784	2014-05-08 Sushain Cherivirala <sushain@skc.name>
	785
	786	* servlet.py: Use CLD for language detection and fallback to naive
	787	approach if CLD not installed
	788
	789	2014-05-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	790
	791	* servlet.py: /stats and -mN to shut down pipelines that haven't
	792	been used in the last N secs (not too useful, but was so easy to
	793	implement …)
	794
	795	2014-05-07 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	796
	797	* tools/sanity-test-apy.py: more tests
	798
	799	2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	800
	801	* tools/sanity-test-apy.py: output some stuff
	802
	803	2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	804
	805	* tools/sanity-test-apy.py: more tests, still just kaz-tat/sme-nob
	806	failing
	807
	808	2014-05-06 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	809
	810	* tools/sanity-test-apy.py: insanity-untest
	811
	812	2014-04-28 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	813
	814	* Makefile: rm unicode.db before running sqlite in case it already
	815	exists
	816
	817	2014-04-11 Francis Tyers <ftyers@users.noreply.github.com>
	818
	819	* tools/apertiumlangs.sql: add some avar translations
	820
	821	2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	822
	823	* servlet.py: return 400 on modefile parse fail (better than
	824	nothing)
	825
	826	2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	827
	828	* servlet.py, translation.py: per-pipeline locks! zomg.
	829
	830	2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	831
	832	* servlet.py: doh! use sendResponse, not just finish, got wrong
	833	headers etc
	834
	835	2014-04-09 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	836
	837	* servlet.py, translation.py: Start translate() in a separate thread
	838	while still reusing pipelines, using ThreadableMixin Made it split on fairly short segments, might want to tweak
	839	"hardbreak" constants
	840
	841	2014-04-09 Sushain Cherivirala <sushain@skc.name>
	842
	843	* servlet.py: Non-blocking requests
	844
	845	2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	846
	847	* servlet.py: wops :)
	848
	849	2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	850
	851	* modeSearch.py, servlet.py: Main arg to servlet.py now includes
	852	_all_ .mode files under that path. This now finds all my 88 /usr/local/share/apertium/modes, vs only 14
	853	before … To specify a path for non-pair modes only, pass the
	854	-s/--nonpairs-path argument: ./servlet.py /path/to/include/all/modes/from -s
	855	/path/to/include/tagger/analyser/generator/modes/from Also has real symlink loop checking. Pass -v2 to show the discovered modes on startup.
	856
	857	2014-04-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	858
	859	* Makefile: how to make unicode.db, rather important …
	860
	861	2014-04-07 Sushain Cherivirala <sushain@skc.name>
	862
	863	* util.py: oops, precedence rules are weird
	864
	865	2014-04-07 Sushain Cherivirala <sushain@skc.name>
	866
	867	* modeSearch.py, util.py: Support variations in mode search
	868
	869	2014-04-07 Francis Tyers <ftyers@users.noreply.github.com>
	870
	871	* modeSearch.py: add support for finding variants
	872
	873	2014-04-05 selimcan <selimcan@72bbbca6-d526-0410-a7d9-f06f51895060>
	874
	875	* tools/apertiumlangs.sql: add tatar words for 'english' and
	876	'spanish'
	877
	878	2014-04-05 Francis Tyers <ftyers@users.noreply.github.com>
	879
	880	* tools/apertiumlangs.sql: add new langs in aragonese
	881
	882	2014-04-04 Sushain Cherivirala <sushain@skc.name>
	883
	884	* tools/apertiumlangs.sql: Regenerate language name DB
	885
	886	2014-04-04 Sushain Cherivirala <sushain@skc.name>
	887
	888	* tools/langNamesScraper.py: Fix bug with language list generation,
	889	add some languages to list manually
	890
	891	2014-04-04 Francis Tyers <ftyers@users.noreply.github.com>
	892
	893	* tools/apertiumlangs.sql: new langs
	894
	895	2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com>
	896
	897	* modeSearch.py, translation.py: linux line breaks on all py files
	898
	899	2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com>
	900
	901	* util.py: vim modeline
	902
	903	2014-02-02 Sushain Cherivirala <sushain@skc.name>
	904
	905	* util.py: Fix indentation and add error message
	906
	907	2014-02-02 Jonathan Washington <jonathan.north.washington@gmail.com>
	908
	909	* util.py: keep global track of dbConn
	910
	911	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	912
	913	* tools/apertiumlangs.sql: fixes: az in tt, ky in ru
	914
	915	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	916
	917	* tools/apertiumlangs.sql: Tatar names for languages
	918
	919	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	920
	921	* tools/sysvinit/apy.sh: updated script, points to new db
	922
	923	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	924
	925	* tools/apertiumlangs.sql: line to update languageNames table with
	926	fixes table
	927
	928	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	929
	930	* tools/apertiumlangs.sql: fixes table
	931
	932	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	933
	934	* tools/apertiumlangs.sql: apertium languages, gonna fuċ more later
	935
	936	2014-02-01 Jonathan Washington <jonathan.north.washington@gmail.com>
	937
	938	* tools/turkic.sql: two tables
	939
	940	2014-01-26 Jonathan Washington <jonathan.north.washington@gmail.com>
	941
	942	* tools/turkic.sql: a few names for Chuvash
	943
	944	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	945
	946	* modeSearch.py: Follow symlinks when searching for modes Impose
	947	limit on search depth
	948
	949	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	950
	951	* modeSearch.py, servlet.py, tools.py, tools/langNamesScraper.py,
	952	util.py: Add a few ISO code mappings Make lang name scraper depend
	953	on util for ISO code conversion Rename tools.py to util.py
	954
	955	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	956
	957	* modeSearch.py, translation.py: Optimize mode search performance
	958	(even more)
	959
	960	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	961
	962	* modeSearch.py: Optimize mode search performance (more)
	963
	964	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	965
	966	* modeSearch.py: Optimize mode search performance (hopefully)
	967
	968	2014-01-25 Sushain Cherivirala <sushain@skc.name>
	969
	970	* modeSearch.py, servlet.py: Improve mode searching functionality
	971	(walk through all dirs recursively)
	972
	973	2014-01-24 Sushain Cherivirala <sushain@skc.name>
	974
	975	* tools/langNamesScraper.py: Make usage appear only when no
	976	arguments provided
	977
	978	2014-01-23 Sushain Cherivirala <sushain@skc.name>
	979
	980	* modeSearch.py, servlet.py, tools.py: Regularize all APY language
	981	listings to 3 alpha language codes
	982
	983	2014-01-20 Jonathan Washington <jonathan.north.washington@gmail.com>
	984
	985	* tools/turkic.sql: use ша/ше, not тілі :\
	986
	987	2014-01-20 Jonathan Washington <jonathan.north.washington@gmail.com>
	988
	989	* translation.py: extra DEBUG lines, but all commented out
	990
	991	2014-01-15 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	992
	993	* servlet.py: removed unused constant
	994
	995	2014-01-14 Jonathan Washington <jonathan.north.washington@gmail.com>
	996
	997	* tools/sysvinit/apy.sh: init script
	998
	999	2014-01-14 Jonathan Washington <jonathan.north.washington@gmail.com>
	1000
	1001	* servlet.py: logging
	1002
	1003	2014-01-14 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1004
	1005	* servlet.py: well ioloop, if you're too spineless to kill them I'll
	1006	have to do it myself
	1007
	1008	2014-01-13 Jonathan Washington <jonathan.north.washington@gmail.com>
	1009
	1010	* tools/turkic.sql: Kyrgyz and Karakalpak spelling in English
	1011
	1012	2014-01-12 Jonathan Washington <jonathan.north.washington@gmail.com>
	1013
	1014	* servlet.py: signal stuff
	1015
	1016	2014-01-12 Jonathan Washington <jonathan.north.washington@gmail.com>
	1017
	1018	* tools/turkic.sql: bak in bak
	1019
	1020	2014-01-11 Sushain Cherivirala <sushain@skc.name>
	1021
	1022	* servlet.py: Prevent APY 500 on empty text for /identifyLang and
	1023	/coverage
	1024
	1025	2014-01-11 Jonathan Washington <jonathan.north.washington@gmail.com>
	1026
	1027	* tools/turkic.sql: bak = ba
	1028
	1029	2014-01-11 Jonathan Washington <jonathan.north.washington@gmail.com>
	1030
	1031	* tools/turkic.sql: bak, kum
	1032
	1033	2014-01-09 Jonathan Washington <jonathan.north.washington@gmail.com>
	1034
	1035	* tools/turkic.sql: some fixes that shouldn't've been necessary.
	1036	native speakers need to go through and clean this stuff up :(
	1037
	1038	2014-01-09 Jonathan Washington <jonathan.north.washington@gmail.com>
	1039
	1040	* tools/langNamesScraper.py, tools/turkic.sql: Apertium language
	1041	names in Turkic languages, English, and Russian Proper header for
	1042	langNamesScraper.py
	1043
	1044	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1045
	1046	* tools/upstart/apertium-apy.conf: fixed logging
	1047
	1048	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1049
	1050	* tools/upstart/apertium-all.conf,
	1051	tools/upstart/apertium-apy-gateway.conf,
	1052	tools/upstart/apertium-apy.conf,
	1053	tools/upstart/apertium-html-tools.conf: updated so apertium-all
	1054	controls stuff
	1055
	1056	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1057
	1058	* tools/upstart/apertium-all.conf: oh, and this too
	1059
	1060	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1061
	1062	* tools/upstart/apertium-apy-gateway.conf,
	1063	tools/upstart/apertium-apy.conf,
	1064	tools/upstart/apertium-html-tools.conf: new scripts
	1065
	1066	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1067
	1068	* tools/upstart/apertium-all.conf,
	1069	tools/upstart/apertium-apy-gateway.conf,
	1070	tools/upstart/apertium-apy.conf,
	1071	tools/upstart/apertium-html-tools.conf: logging added
	1072
	1073	2014-01-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1074
	1075	* tools/upstart/apertium, tools/upstart/apertium-all.conf,
	1076	tools/upstart/apertium-apy-gateway.conf,
	1077	tools/upstart/apertium-apy.conf,
	1078	tools/upstart/apertium-html-tools.conf: all apertium upstart configs
	1079
	1080	2014-01-07 Jonathan Washington <jonathan.north.washington@gmail.com>
	1081
	1082	* tools/upstart/apertium-apy-gateway.conf,
	1083	tools/upstart/apertium-apy.conf: upstart scripts
	1084
	1085	2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com>
	1086
	1087	* langNamesScraper.py, tools/langNamesScraper.py: here's the right
	1088	one
	1089
	1090	2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com>
	1091
	1092	* tools.py, tools/lgNameScraper.py: GAAH
	1093
	1094	2014-01-04 Jonathan Washington <jonathan.north.washington@gmail.com>
	1095
	1096	* tools.py, tools/lgNameScraper.py: hmm
	1097
	1098	2014-01-04 Sushain Cherivirala <sushain@skc.name>
	1099
	1100	* tools.py: Fix bug in /getLocalizedLanguages in APY when both two
	1101	and three character code for same language are requested
	1102
	1103	2014-01-04 Sushain Cherivirala <sushain@skc.name>
	1104
	1105	* gateway.py: Simplify APY gateway routing and fix bug with
	1106	/getLocale
	1107
	1108	2014-01-03 Sushain Cherivirala <sushain@skc.name>
	1109
	1110	* gateway.py: Switch APY Gateway load balancer
	1111
	1112	2014-01-02 Sushain Cherivirala <sushain@skc.name>
	1113
	1114	* gateway.py: Fix bug with translation request routing in APY
	1115	gateway
	1116
	1117	2014-01-01 Sushain Cherivirala <sushain@skc.name>
	1118
	1119	* servlet.py: Restore compatibility with Python 3.2
	1120
	1121	2013-12-31 Ng Wei En <wei2912@users.noreply.github.com>
	1122
	1123	* servlet.py, tools.py: Fix high memory usage issue.
	1124
	1125	2013-12-30 Sushain Cherivirala <sushain@skc.name>
	1126
	1127	* gateway.py, tools.py: Make Fastest paradigm balancer work with all
	1128	APY modes Fix minor bugs in gateway request handling (evident on
	1129	304s) Fix minor bugs in APY coverage mode
	1130
	1131	2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1132
	1133	* gateway.py, serverlist-example: Added example serverlist,
	1134	commented what needs to be done for gateway-scaleMT compatibility
	1135
	1136	2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1137
	1138	* gateway.py: Beginning support for scaleMT
	1139
	1140	2013-12-28 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1141
	1142	* gateway.py: Fixed /perWord
	1143
	1144	2013-12-27 Sushain Cherivirala <sushain@skc.name>
	1145
	1146	* gateway.py: Improve Fastest paradigm load balancer in APY gateway
	1147
	1148	2013-12-27 Sushain Cherivirala <sushain@skc.name>
	1149
	1150	* gateway.py: Make some APY gateway balancers compatible with
	1151	changes to get_server call
	1152
	1153	2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1154
	1155	* gateway.py: Fixed JS bug
	1156
	1157	2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1158
	1159	* gateway.py: Changed structure of capabilities dictionary, more
	1160	documentation
	1161
	1162	2013-12-27 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1163
	1164	* gateway.py: Smarter balancing -- going for servers with the
	1165	correct langpairs
	1166
	1167	2013-12-26 Sushain Cherivirala <sushain@skc.name>
	1168
	1169	* servlet.py, tools.py: Add coverage mode to APY Add preliminary
	1170	language identification mode to APY
	1171
	1172	2013-12-25 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1173
	1174	* gateway.py: Resolved the conflict, moved things around
	1175
	1176	2013-12-25 Sushain Cherivirala <sushain@skc.name>
	1177
	1178	* gateway.py, servlet.py: Add Fastest paradigm load balancer for APY
	1179	Make pool terminate properly to prevent blocking
	1180
	1181	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1182
	1183	* servlet.py, translation.py: Modularize translation functionality
	1184	in APY Make translation calls asynchronous with timeout Handle
	1185	invalid translation pair errors properly (not in translateSplitting
	1186	and with 400)
	1187
	1188	2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1189
	1190	* gateway.py: now dropping dead servers, checking against empty
	1191	serverlists
	1192
	1193	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1194
	1195	* servlet.py: Fix bug in APY translate mode from port to Tornado
	1196
	1197	2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1198
	1199	* gateway.py: forwarding request headers
	1200
	1201	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1202
	1203	* servlet.py: Reject /getLocale requests without proper headers in
	1204	APY Send back proper HTTP 408 response for timed out requests
	1205
	1206	2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1207
	1208	* gateway.py: should keep going now
	1209
	1210	2013-12-23 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1211
	1212	* gateway.py: if our port's being used, use the next
	1213
	1214	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1215
	1216	* gateway.py: Change server list file format and add debug mode to
	1217	APY gateway Remove empty folder
	1218
	1219	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1220
	1221	* langNamesScraper.py: Move language name scraper
	1222
	1223	2013-12-23 Sushain Cherivirala <sushain@skc.name>
	1224
	1225	* gateway.py: Generalize balancer class in APY gateway Use generator
	1226	for Round Robin balancer Add Least Connections balancer (incomplete)
	1227	Add Random balancer Improve logging Add testing interval support
	1228
	1229	2013-12-22 Sushain Cherivirala <sushain@skc.name>
	1230
	1231	* gateway.py: Add a few preliminary tests to APY Gateway
	1232
	1233	2013-12-22 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1234
	1235	* servlet.py: Compatibility with python3.2 and earlier by not using
	1236	with statements
	1237
	1238	2013-12-22 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1239
	1240	* gateway.py, servlet.py: including support for tornado 3.1.1, a bit
	1241	for python3.2
	1242
	1243	2013-12-22 Sushain Cherivirala <sushain@skc.name>
	1244
	1245	* gateway.py, servlet.py: Modify APY gateway arguments Add
	1246	preliminary testing functionality to gateway Remove unnecessary
	1247	self.finish() in post handlers
	1248
	1249	2013-12-21 Sushain Cherivirala <sushain@skc.name>
	1250
	1251	* modeSearch.py, servlet.py, tools.py: Make more parts of APY
	1252	asynchronous Make APY arguments more user-friendly Allow APY to run
	1253	with multiple request serving threads Start modularizing APY
	1254
	1255	2013-12-21 Sushain Cherivirala <sushain@skc.name>
	1256
	1257	* servlet.py: Add alternative approaches to /perWord output in APY
	1258	Fix response headers Improve logging
	1259
	1260	2013-12-21 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1261
	1262	* gateway.py: With headers now
	1263
	1264	2013-12-21 ericye16 <ericye16@72bbbca6-d526-0410-a7d9-f06f51895060>
	1265
	1266	* gateway.py: a very unintelligent round-robin gateway
	1267
	1268	2013-12-21 Sushain Cherivirala <sushain@skc.name>
	1269
	1270	* servlet.py: Make analyzation and generation in APY non-blocking
	1271
	1272	2013-12-20 Sushain Cherivirala <sushain@skc.name>
	1273
	1274	* servlet.py: Clean up BaseHandler in APY Add pos argument to
	1275	/perWords in APY
	1276
	1277	2013-12-20 Sushain Cherivirala <sushain@skc.name>
	1278
	1279	* servlet.py: Improve /perWord functionality in APY
	1280
	1281	2013-12-20 Sushain Cherivirala <sushain@skc.name>
	1282
	1283	* servlet.py: Make it clearer when APY launches Prevent extra token
	1284	in /perWord requests Fix SSL support bug
	1285
	1286	2013-12-19 Sushain Cherivirala <sushain@skc.name>
	1287
	1288	* servlet.py: Fix bugs in APY
	1289
	1290	2013-12-19 Francis Tyers <ftyers@users.noreply.github.com>
	1291
	1292	* servlet.py: updates
	1293
	1294	2013-12-19 Sushain Cherivirala <sushain@skc.name>
	1295
	1296	* servlet.py: Port Apertium APY to Tornado web framework
	1297
	1298	2013-12-19 Sushain Cherivirala <sushain@skc.name>
	1299
	1300	* servlet.py: Simplify output of list analyzers/generators/taggers
	1301	in APY Fix bug with POST to APY
	1302
	1303	2013-12-18 Sushain Cherivirala <sushain@skc.name>
	1304
	1305	* servlet.py: Add alias for /list?q=taggers and fix input value in
	1306	/perWords outputs
	1307
	1308	2013-12-18 Sushain Cherivirala <sushain@skc.name>
	1309
	1310	* servlet.py: Condense list URL mappings Add perWord URL mapping
	1311	with functionality General cleanup Search for taggers in Apertium
	1312	path
	1313
	1314	2013-12-17 Sushain Cherivirala <sushain@skc.name>
	1315
	1316	* servlet.py: Add translation by word support to APY
	1317
	1318	2013-12-15 Sushain Cherivirala <sushain@skc.name>
	1319
	1320	* servlet.py: Make html-tools translation interface auto-detect
	1321	locale correctly
	1322
	1323	2013-12-14 Sushain Cherivirala <sushain@skc.name>
	1324
	1325	* servlet.py: Make localized language names database optional
	1326	argument Allow ISO 639-2 codes in localized language name requests
	1327
	1328	2013-12-12 Sushain Cherivirala <sushain@skc.name>
	1329
	1330	* servlet.py: Add HTTP access to APY localized language names
	1331	functionality
	1332
	1333	2013-12-10 Sushain Cherivirala <sushain@skc.name>
	1334
	1335	* servlet.py: Improve Unicode.org language name scraper and APY
	1336	localized language function
	1337
	1338	2013-12-10 Sushain Cherivirala <sushain@skc.name>
	1339
	1340	* servlet.py: Add localized languages function to APY Add
	1341	Unicode.org localized language name scraper
	1342
	1343	2013-12-08 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1344
	1345	* servlet.py: support en_APERTIUM
	1346
	1347	2013-12-07 Sushain Cherivirala <sushain@skc.name>
	1348
	1349	* servlet.py: Add SSL support to Apertium APY
	1350
	1351	2013-12-04 Sushain Cherivirala <sushain@skc.name>
	1352
	1353	* servlet.py: Fix morphological analyzer mode detection regexp in
	1354	APY
	1355
	1356	2013-12-01 Sushain Cherivirala <sushain@skc.name>
	1357
	1358	* servlet.py: Make morphological generator work for single input
	1359	without ^...$
	1360
	1361	2013-12-01 Sushain Cherivirala <sushain@skc.name>
	1362
	1363	* servlet.py: Add preliminary morphological analyzer and generator
	1364	support to simple-html interface
	1365
	1366	2013-12-01 Sushain Cherivirala <sushain@skc.name>
	1367
	1368	* servlet.py: Add list generators/analyzers functions to APY
	1369
	1370	2013-12-01 Sushain Cherivirala <sushain@skc.name>
	1371
	1372	* servlet.py: Add morphological analysis and generation support to
	1373	APY
	1374
	1375	2013-10-10 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1376
	1377	* servlet.py: Threading request handler, to handle multiple users Uses TCPServer inheriting ThreadingMixIn. A lock on translateMode
	1378	(which has to have at most one thread per pipeline) ensures that
	1379	part stays single-threaded (to avoid Alice getting Bob's text). http://stackoverflow.com/a/487281/69663 recommends select/polling
	1380	over threading (http://docs.python.org/3.3/library/socketserver.html
	1381	for diffs) but requires either lots of manually written dispatching
	1382	code (http://pymotw.com/2/select/) or a framework like Twisted. Try testing with e.g. python3 servlet "$APERTIUMPATH" 2737 & curl -s --data-urlencode 'langpair=nb\|nn' --data-urlencode \
	1383	'q@/tmp/reallybigfile' 'http://localhost:2737/translate'
	1384	>/tmp/output & curl 'http://localhost:2737/translate?langpair=nb\|nn&q=men+ikke+den'
	1385	curl 'http://localhost:2737/translate?langpair=nb\|nn&q=men+ikke+den'
	1386	curl 'http://localhost:2737/translate?langpair=nb\|nn&q=men+ikke+den' And see how the last three (after a slight wait) start outputting
	1387	before the first request is done.
	1388
	1389	2013-09-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1390
	1391	* servlet.py: doh, typo in do_GET
	1392
	1393	2013-09-20 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1394
	1395	* servlet.py: split up big requests before sending to pipeline so we
	1396	don't fill up buffers This doesn't handle parallel requests, simply to avoid deadlocking
	1397	on read/write. Also, support POST, e.g. curl --data-urlencode
	1398	'langpair=nb\|nn' --data-urlencode 'q@file' localhost:2737/translate
	1399
	1400	2013-09-20 Jonathan Washington <jonathan.north.washington@gmail.com>
	1401
	1402	* servlet.py: a little debugging
	1403
	1404	2013-09-19 Kevin Brubeck Unhammer <unhammer@fsfe.org>
	1405
	1406	* servlet.py: fflush after writing the NUL (instead of writing it
	1407	twice); use deformat; read apertiumpath/port from command line
	1408	arguments
	1409
	1410	2013-09-14 Jonathan Washington <jonathan.north.washington@gmail.com>
	1411
	1412	* servlet.py: proper handling of apertium-pretransfer
	1413
	1414	2013-09-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1415
	1416	* servlet.py: a small bug fix and some more debugs
	1417
	1418	2013-09-08 Jonathan Washington <jonathan.north.washington@gmail.com>
	1419
	1420	* servlet.py: skip pretransfer
	1421
	1422	2013-09-04 Jonathan Washington <jonathan.north.washington@gmail.com>
	1423
	1424	* servlet.py: apertium-transfer -z fails. also, script needs to be
	1425	async / threaded
	1426
	1427	2013-09-03 Jonathan Washington <jonathan.north.washington@gmail.com>
	1428
	1429	* servlet.py: another test to prevent failure
	1430
	1431	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1432
	1433	* servlet.py: return False stack could be collapsed to a single line
	1434
	1435	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1436
	1437	* servlet.py: getting stuck at procOut.stdout.read(1)
	1438
	1439	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1440
	1441	* servlet.py: now requires r46823 for lrx-proc
	1442
	1443	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1444
	1445	* servlet.py: three different ways to translate: modes file direct,
	1446	modes file deep, and apertium direct
	1447
	1448	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1449
	1450	* servlet.py: uses .mode files
	1451
	1452	2013-08-25 Jonathan Washington <jonathan.north.washington@gmail.com>
	1453
	1454	* servlet.py: programming fail
	1455
	1456	2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	1457
	1458	* servlet.py: now with callback support, I think
	1459
	1460	2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	1461
	1462	* servlet.py: some cleaning up of commented-out code
	1463
	1464	2013-08-24 Jonathan Washington <jonathan.north.washington@gmail.com>
	1465
	1466	* apertium-apy, where apy=APy, and stands for API in Python
	1467

+85

-0

NEWS less more

	0	# -- mode:org --
	1	#+STARTUP: showall
	2
	3	* Version 0.9.1, 2016-06-10
	4	Git rev: 3c536b37def552d073ddda4d27d9358103e304c4
	5
	6	Changes since 0.9.0:
	7
	8	- Various minor cleanups and lints
	9
	10	- systemd watchdog – if APY was started by systemd and the .service
	11	file has e.g. WatchdogSec=10s, we send a watchdog ping every <10s.
	12	See tools/systemd for example .service file.
	13
	14	- rewrote missingFreqs-handling to fix locking (in its own class
	15	now); and don't try to close missingFreqsDb unless it was actually
	16	open
	17
	18	- Handle .mode files from newest Apertium (which now quotes paths
	19	correctly there)
	20
	21	- fix #14 false was sent by getPairOrError instead of None
	22
	23
	24	* Version 0.9.0, 2016-02-11
	25	Git rev: 1d0e110cd1e9a74f0099138823ec31f4c145ea73
	26
	27	Changes since r61425 / 0dd2cd:
	28
	29	- First official release with a version number :-)
	30
	31	- Should now be compatible with all Python versions from 3.2 to
	32	3.5-dev (raises StopIteration in 3.2, returns from generator in
	33	higher versions)
	34
	35	- APY now has some simple regression tests and Continuous
	36	Integration. See t/README if you want to run them yourself, but
	37	they are also run at https://travis-ci.org/goavki/apertium-apy on
	38	every pull request or push to
	39	https://github.org/goavki/apertium-apy
	40
	41	- /translateDoc now accepts optional argument &markUnknown which can
	42	be either "yes" or "no", as /translate already does. Defaults to
	43	"yes", which will put a "*" in front of any unknown word.
	44
	45	- New, experimental endpoint
	46	/translatePage?url=URL&langpair=FROM\|TO which will retrieve an URL
	47	and return HTML with translated text. Like /translate, accepts
	48	optional argument &markUnknown which can be either "yes" or "no",
	49	and does not block other requests (but if there are few pipelines
	50	per pair, long page translations may currently hold up shorter
	51	requests to the same language pair, since we translate the whole
	52	page in one go instead of splitting it up).
	53
	54	- New/improved language name localisations (Kyrgyz, Uzbek,
	55	Qaraqalpaq, Kazakh, Basque, Crimean Tatar, …)
	56
	57	- More information in /stats: Now shows number of requests, chars
	58	and time spent actively translating during the last N seconds
	59	(defaults to 3600, changable with new optional argument -T /
	60	--stat-period-max-age), and total server uptime.
	61
	62	- Now does NUL-flushing (keeps pipelines open) even if the pair uses
	63	hfst-proc or lrx-proc (NUL-flushing works in those programs as of
	64	2015-10-15).
	65
	66	- Analysis/generation now both use translateSimple, less duplicate
	67	code.
	68
	69	- tools/apertium-viewer.html – a simple one-page HTML interface to
	70	APY translation, meant for debugging – now does translate as you
	71	type.
	72
	73	- tools/sanity-test-apy.py updated to do a healthcheck on all pairs
	74	currently running on https://apertium.org
	75
	76	- Minor cleanups suggested by flake8.
	77
	78	* r61425, 2015-08-13
	79	Git rev: 0dd2cdba544e228b6268f0c0babbdf1698af27f8
	80
	81	- Unofficial release, but in Debian sid as 0.1.0~r61425-1
	82
	83	- See http://wiki.apertium.org/wiki/Apy for an overview of what APY
	84	can do.

-1

~~README~~ less more

See http://wiki.apertium.org/wiki/Apertium-apy

-0

README.org less more

	0	See http://wiki.apertium.org/wiki/Apertium-apy
	1
	2	#+CAPTION: Build Status
	3	[[https://travis-ci.org/goavki/apertium-apy][https://travis-ci.org/goavki/apertium-apy.svg]]
	4

+58

-0

missingdb.py less more

	0	#!/usr/bin/env python3
	1	# vim: set ts=4 sw=4 sts=4 et :
	2
	3	import sqlite3, logging
	4	from datetime import datetime
	5	import threading
	6	from collections import defaultdict
	7	from contextlib import closing
	8
	9	class MissingDb(object):
	10	def __init__(self, dbPath, wordmemlimit):
	11	self.lock = threading.RLock()
	12	self.conn = None
	13	self.dbPath = dbPath
	14	self.words = defaultdict(lambda: defaultdict(lambda: 0))
	15	self.wordcount = 0
	16	self.wordmemlimit = wordmemlimit
	17
	18	def noteUnknown(self, token, pair):
	19	self.words[pair][token] += 1
	20	self.wordcount += 1
	21	# so if wordmemlimit is 0, we commit on each word
	22	if self.wordcount > self.wordmemlimit:
	23	self.commit()
	24	self.words.clear()
	25	self.wordcount = 0
	26
	27	def commit(self):
	28	timeBefore = datetime.now()
	29	with self.lock:
	30	if not self.conn:
	31	self.conn = sqlite3.connect(self.dbPath)
	32
	33	with closing(self.conn.cursor()) as c:
	34	c.execute("PRAGMA synchronous = NORMAL")
	35	c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))')
	36	c.executemany('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + :amount)',
	37	({'pair': pair,
	38	'token': token,
	39	'amount' : self.words[pair][token]}
	40	for pair in self.words
	41	for token in self.words[pair]))
	42	self.conn.commit()
	43	ms = timedeltaToMilliseconds(datetime.now() - timeBefore)
	44	logging.info("\tSaving %s unknown words to the DB (%s ms)", self.wordcount, ms)
	45
	46	def closeDb(self):
	47	if not self.conn:
	48	logging.warning('no connection on closeDb')
	49	return
	50	logging.warning('closing connection')
	51	self.conn.commit()
	52	self.conn.close()
	53	self.conn = None
	54
	55
	56	def timedeltaToMilliseconds(td):
	57	return td.days86400000 + td.seconds1000 + int(td.microseconds/1000)

+10

-8

modeSearch.py less more

48	48	if mtype != 'pair':
49	49	modename = m.group(1) # e.g. en-es-anmorph
50	50	langlist = [toAlpha3Code(l) for l in m.group(2).split('-')]
51		lang_src = langlist[0] # e.g. en
52	51	lang_pair = '-'.join(langlist) # e.g. en-es
53	52	dir_of_modes = os.path.dirname(dirpath)
54	53	mode = (dir_of_modes,

63	62	toAlpha3Code(lang_trg))
64	63	modes[mtype].append(mode)
65	64
66		if verbosity>1:
67		for mtype in modes:
68		if modes[mtype]:
69		logging.info("\"%s\" modes found:\n%s" % (
70		mtype,
71		"\n".join(["\t".join(m) for m in modes[mtype]])))
72
	65	if verbosity > 1:
	66	_log_modes(modes)
73	67
74	68	return modes
	69
	70	def _log_modes(modes):
	71	"""Print given modes to log."""
	72	for mtype in modes:
	73	if modes[mtype]:
	74	logging.info("\"%s\" modes found:\n%s" % (
	75	mtype,
	76	"\n".join(["\t".join(m) for m in modes[mtype]])))

+337

-181

servlet.py less more

2	2	# coding=utf-8
3	3	# -- encoding: utf-8 --
4	4
5		import sys, os, re, ssl, argparse, logging, time, signal, tempfile, zipfile
	5	import sys, os, re, argparse, logging, time, signal, tempfile, zipfile
6	6	from subprocess import Popen, PIPE
7	7	from multiprocessing import Pool, TimeoutError
8	8	from functools import wraps
9	9	from threading import Thread
10		from datetime import datetime
	10	from datetime import datetime, timedelta
	11	import heapq
11	12
12	13	import tornado, tornado.web, tornado.httpserver, tornado.process, tornado.iostream
13		from tornado import escape, gen
	14	from tornado import httpclient
	15	from tornado import gen
	16	from tornado import escape
14	17	from tornado.escape import utf8
15		try: #3.1
	18	try: # 3.1
16	19	from tornado.log import enable_pretty_logging
17		except ImportError: #2.1
	20	except ImportError: # 2.1
18	21	from tornado.options import enable_pretty_logging
19	22
20		import toro
21
22	23	from modeSearch import searchPath
23		from util import getLocalizedLanguages, apertium, bilingualTranslate, removeLast, stripTags, processPerWord, getCoverage, getCoverages, toAlpha3Code, toAlpha2Code, noteUnknownToken, scaleMtLog, TranslationInfo, closeDb, flushUnknownWords, inMemoryUnknownToken
24		import translation
	24	from util import getLocalizedLanguages, stripTags, processPerWord, getCoverage, getCoverages, toAlpha3Code, toAlpha2Code, scaleMtLog, TranslationInfo
	25
	26	import systemd
	27	import missingdb
	28
	29	from urllib.parse import urlparse
	30
	31	if sys.version_info.minor < 3:
	32	import translation_py32 as translation
	33	else:
	34	import translation
	35
	36	import util
25	37	from keys import getKey
26	38
27	39	try:

29	41	except:
30	42	cld2 = None
31	43
	44	try:
	45	import chardet
	46	except:
	47	chardet = None
	48
	49	__version__ = "0.9.1"
	50
32	51	def run_async_thread(func):
33	52	@wraps(func)
34	53	def async_func(args, *kwargs):
35		func_hl = Thread(target = func, args = args, kwargs = kwargs)
	54	func_hl = Thread(target=func, args=args, kwargs=kwargs)
36	55	func_hl.start()
37	56	return func_hl
38	57
39	58	return async_func
40	59
	60
	61	missingFreqsDb = None # has to be global for sig_handler :-/
	62
41	63	def sig_handler(sig, frame):
42	64	global missingFreqsDb
43		if missingFreqsDb:
	65	if missingFreqsDb is not None:
44	66	if 'children' in frame.f_locals:
45	67	for child in frame.f_locals['children']:
46	68	os.kill(child, signal.SIGTERM)
47		flushUnknownWords(missingFreqsDb)
48		else: # we are one of the children
49		flushUnknownWords(missingFreqsDb)
	69	missingFreqsDb.commit()
	70	else:
	71	# we are one of the children
	72	missingFreqsDb.commit()
	73	missingFreqsDb.closeDb()
50	74	logging.warning('Caught signal: %s', sig)
51		closeDb()
52	75	exit()
	76
53	77
54	78	class BaseHandler(tornado.web.RequestHandler):
55	79	pairs = {}
56	80	analyzers = {}
57	81	generators = {}
58	82	taggers = {}
59		pipelines = {} # (l1, l2): (inpipe, outpipe), only contains flushing pairs!
	83	pipelines = {} # (l1, l2): [translation.Pipeline], only contains flushing pairs!
	84	pipelines_holding = []
60	85	callback = None
61	86	timeout = None
62	87	scaleMtLogs = False
63		inMemoryUnknown = False
64		inMemoryLimit = -1
65	88	verbosity = 0
66	89
67	90	stats = {
	91	'startdate': datetime.now(),
68	92	'useCount': {},
69		'lastUsage': {},
70	93	'vmsize': 0,
	94	'timing': []
71	95	}
72	96
73		# The lock is needed so we don't let two coroutines write
74		# simultaneously to a pipeline; then the first call to read might
75		# read translations of text put there by the second call …
76		pipeline_locks = {} # (l1, l2): lock for (l1, l2) in pairs
77		pipeline_cmds = {} # (l1, l2): (do_flush, commands)
	97	pipeline_cmds = {} # (l1, l2): translation.ParsedModes
	98	max_pipes_per_pair = 1
	99	min_pipes_per_pair = 0
	100	max_users_per_pipe = 5
	101	max_idle_secs = 0
	102	restart_pipe_after = 1000
78	103
79	104	def initialize(self):
80	105	self.callback = self.get_argument('callback', default=None)

150	175	self.set_status(204)
151	176	self.finish()
152	177
	178
153	179	class ListHandler(BaseHandler):
154	180	@tornado.web.asynchronous
155	181	def get(self):

172	198	else:
173	199	self.send_error(400, explanation='Expecting q argument to be one of analysers, generators, disambiguators or pairs')
174	200
	201
175	202	class StatsHandler(BaseHandler):
176	203	@tornado.web.asynchronous
177	204	def get(self):
	205	numRequests = self.get_argument('requests', 1000)
	206	try:
	207	numRequests = int(numRequests)
	208	except ValueError:
	209	numRequests = 1000
	210
	211	periodStats = self.stats['timing'][-numRequests:]
	212	times = sum([x[1]-x[0] for x in periodStats],
	213	timedelta())
	214	chars = sum(x[2] for x in periodStats)
	215	if times.total_seconds() != 0:
	216	charsPerSec = round(chars/times.total_seconds(), 2)
	217	else:
	218	charsPerSec = 0.0
	219	nrequests = len(periodStats)
	220	maxAge = (datetime.now()-periodStats[0][0]).total_seconds() if periodStats else 0
	221
	222	uptime = int((datetime.now()-self.stats['startdate']).total_seconds())
	223	useCount = { '%s-%s' % pair: useCount
	224	for pair, useCount in self.stats['useCount'].items() }
	225	runningPipes = { '%s-%s' % pair: len(pipes)
	226	for pair, pipes in self.pipelines.items()
	227	if pipes != [] }
	228	holdingPipes = len(self.pipelines_holding)
	229
178	230	self.sendResponse({
179		'responseData': { '%s-%s' % pair: useCount for pair, useCount in self.stats['useCount'].items() },
	231	'responseData': {
	232	'uptime': uptime,
	233	'useCount': useCount,
	234	'runningPipes': runningPipes,
	235	'holdingPipes': holdingPipes,
	236	'periodStats': {
	237	'charsPerSec': charsPerSec,
	238	'totChars': chars,
	239	'totTimeSpent': times.total_seconds(),
	240	'requests': nrequests,
	241	'ageFirstRequest': maxAge
	242	}
	243	},
180	244	'responseDetails': None,
181	245	'responseStatus': 200
182	246	})
183	247
	248
184	249	class RootHandler(BaseHandler):
185	250	@tornado.web.asynchronous
186	251	def get(self):

189	254	class TranslateHandler(BaseHandler):
190	255	def notePairUsage(self, pair):
191	256	self.stats['useCount'][pair] = 1 + self.stats['useCount'].get(pair, 0)
192		if self.max_idle_secs:
193		self.stats['lastUsage'][pair] = time.time()
194	257
195	258	unknownMarkRE = re.compile(r'\([^.,;:\t\ ]+)')
196		def maybeStripMarks(self, markUnknown, l1, l2, translated):
197		self.noteUnknownTokens("%s-%s" % (l1, l2), translated)
	259	def maybeStripMarks(self, markUnknown, pair, translated):
	260	self.noteUnknownTokens("%s-%s" % pair, translated)
198	261	if markUnknown:
199	262	return translated
200	263	else:
201	264	return re.sub(self.unknownMarkRE, r'\1', translated)
202	265
203	266	def noteUnknownTokens(self, pair, text):
204		if self.missingFreqs:
	267	global missingFreqsDb
	268	if missingFreqsDb is not None:
205	269	for token in re.findall(self.unknownMarkRE, text):
206		if self.inMemoryUnknown:
207		inMemoryUnknownToken(token, pair, self.missingFreqs, self.inMemoryLimit)
208		else:
209		noteUnknownToken(token, pair, self.missingFreqs)
210
211		def shutdownPair(self, pair):
212		logging.info("shutting down")
213		self.pipelines[pair][0].stdin.close()
214		self.pipelines[pair][0].stdout.close()
215		self.pipelines.pop(pair)
	270	missingFreqsDb.noteUnknown(token, pair)
	271
	272	def cleanable(self, i, pair, pipe):
	273	if pipe.useCount > self.restart_pipe_after:
	274	# Not affected by min_pipes_per_pair
	275	logging.info('A pipe for pair %s-%s has handled %d requests, scheduling restart',
	276	pair[0], pair[1], self.restart_pipe_after)
	277	return True
	278	elif (i >= self.min_pipes_per_pair
	279	and self.max_idle_secs != 0
	280	and time.time() - pipe.lastUsage > self.max_idle_secs):
	281	logging.info("A pipe for pair %s-%s hasn't been used in %d secs, scheduling shutdown",
	282	pair[0], pair[1], self.max_idle_secs)
	283	return True
	284	else:
	285	return False
216	286
217	287	def cleanPairs(self):
218		if self.max_idle_secs:
219		for pair, lastUsage in self.stats['lastUsage'].items():
220		if pair in self.pipelines and time.time() - lastUsage > self.max_idle_secs:
221		logging.info('Shutting down pair %s-%s since it has not been used in %d seconds' % (
222		pair[0], pair[1], self.max_idle_secs))
223		self.shutdownPair(pair)
224
225		def getPipeLock(self, l1, l2):
226		if (l1, l2) not in self.pipeline_locks:
227		self.pipeline_locks[(l1, l2)] = toro.Lock()
228		return self.pipeline_locks[(l1, l2)]
	288	for pair in self.pipelines:
	289	pipes = self.pipelines[pair]
	290	to_clean = set(p for i, p in enumerate(pipes)
	291	if self.cleanable(i, pair, p))
	292	self.pipelines_holding += to_clean
	293	pipes[:] = [p for p in pipes if p not in to_clean]
	294	heapq.heapify(pipes)
	295	# The holding area lets us restart pipes after n usages next
	296	# time round, since with lots of traffic an active pipe may
	297	# never reach 0 users
	298	self.pipelines_holding[:] = [p for p in self.pipelines_holding
	299	if p.users > 0]
	300	if self.pipelines_holding:
	301	logging.info("%d pipelines still scheduled for shutdown", len(self.pipelines_holding))
229	302
230	303	def getPipeCmds(self, l1, l2):
231	304	if (l1, l2) not in self.pipeline_cmds:

233	306	self.pipeline_cmds[(l1, l2)] = translation.parseModeFile(mode_path)
234	307	return self.pipeline_cmds[(l1, l2)]
235	308
236		def getPipeline(self, l1, l2):
237		do_flush, commands = self.getPipeCmds(l1, l2)
238		if not do_flush:
239		return None
240		if (l1, l2) not in self.pipelines:
241		logging.info('%s-%s not in pipelines of this process, starting …' % (l1, l2))
242		self.pipelines[(l1, l2)] = translation.startPipeline(commands)
243		return self.pipelines[(l1, l2)]
	309	def shouldStartPipe(self, l1, l2):
	310	pipes = self.pipelines.get((l1, l2), [])
	311	if pipes == []:
	312	logging.info("%s-%s not in pipelines of this process",
	313	l1, l2)
	314	return True
	315	else:
	316	min_p = pipes[0]
	317	if len(pipes) < self.max_pipes_per_pair and min_p.users > self.max_users_per_pipe:
	318	logging.info("%s-%s has ≥%d users per pipe but only %d pipes",
	319	l1, l2, min_p.users, len(pipes))
	320	return True
	321	else:
	322	return False
	323
	324	def getPipeline(self, pair):
	325	(l1, l2) = pair
	326	if self.shouldStartPipe(l1, l2):
	327	logging.info("Starting up a new pipeline for %s-%s …", l1, l2)
	328	if pair not in self.pipelines:
	329	self.pipelines[pair] = []
	330	p = translation.makePipeline(self.getPipeCmds(l1, l2))
	331	heapq.heappush(self.pipelines[pair], p)
	332	return self.pipelines[pair][0]
244	333
245	334	def logBeforeTranslation(self):
	335	return datetime.now()
	336
	337	def logAfterTranslation(self, before, length):
	338	after = datetime.now()
246	339	if self.scaleMtLogs:
247		return datetime.now()
248		return
249
250		def logAfterTranslation(self, before, toTranslate):
251		if self.scaleMtLogs:
252		after = datetime.now()
253	340	tInfo = TranslationInfo(self)
254	341	key = getKey(tInfo.key)
255		scaleMtLog(self.get_status(), after-before, tInfo, key, len(toTranslate))
256
257		@gen.coroutine
258		def get(self):
259		toTranslate = self.get_argument('q')
260		markUnknown = self.get_argument('markUnknown', default='yes') in ['yes', 'true', '1']
261
	342	scaleMtLog(self.get_status(), after-before, tInfo, key, length)
	343
	344	if self.get_status() == 200:
	345	oldest = self.stats['timing'][0][0] if self.stats['timing'] else datetime.now()
	346	if datetime.now() - oldest > self.STAT_PERIOD_MAX_AGE:
	347	self.stats['timing'].pop(0)
	348	self.stats['timing'].append(
	349	(before, after, length))
	350
	351	def getPairOrError(self, langpair, text_length):
262	352	try:
263		l1, l2 = map(toAlpha3Code, self.get_argument('langpair').split('\|'))
	353	l1, l2 = map(toAlpha3Code, langpair.split('\|'))
264	354	except ValueError:
265	355	self.send_error(400, explanation='That pair is invalid, use e.g. eng\|spa')
266		if self.scaleMtLogs:
267		before = datetime.now()
268		tInfo = TranslationInfo(self)
269		key = getKey(tInfo.key)
270		after = datetime.now()
271		scaleMtLog(400, after-before, tInfo, key, len(toTranslate))
272		return
273
274		if '%s-%s' % (l1, l2) in self.pairs:
275		before = self.logBeforeTranslation()
276		lock = self.getPipeLock(l1, l2)
277		_, commands = self.getPipeCmds(l1, l2)
278		pipeline = self.getPipeline(l1, l2)
279		translated = yield translation.translate(toTranslate, lock, pipeline, commands)
280		self.logAfterTranslation(before, toTranslate)
281		self.sendResponse({
282		'responseData': {
283		'translatedText': self.maybeStripMarks(markUnknown, l1, l2, translated)
284		},
285		'responseDetails': None,
286		'responseStatus': 200
287		})
288		self.notePairUsage((l1, l2))
289		self.cleanPairs()
290		else:
	356	self.logAfterTranslation(self.logBeforeTranslation(), text_length)
	357	return None
	358	if '%s-%s' % (l1, l2) not in self.pairs:
291	359	self.send_error(400, explanation='That pair is not installed')
292		if self.scaleMtLogs:
293		before = datetime.now()
294		tInfo = TranslationInfo(self)
295		key = getKey(tInfo.key)
296		after = datetime.now()
297		scaleMtLog(400, after-before, tInfo, key, len(toTranslate))
	360	self.logAfterTranslation(self.logBeforeTranslation(), text_length)
	361	return None
	362	else:
	363	return (l1, l2)
	364
	365	@gen.coroutine
	366	def translateAndRespond(self, pair, pipeline, toTranslate, markUnknown, nosplit=False):
	367	markUnknown = markUnknown in ['yes', 'true', '1']
	368	self.notePairUsage(pair)
	369	before = self.logBeforeTranslation()
	370	translated = yield pipeline.translate(toTranslate, nosplit)
	371	self.logAfterTranslation(before, len(toTranslate))
	372	self.sendResponse({
	373	'responseData': {
	374	'translatedText': self.maybeStripMarks(markUnknown, pair, translated)
	375	},
	376	'responseDetails': None,
	377	'responseStatus': 200
	378	})
	379	self.cleanPairs()
	380
	381	@gen.coroutine
	382	def get(self):
	383	pair = self.getPairOrError(self.get_argument('langpair'),
	384	len(self.get_argument('q')))
	385	if pair is not None:
	386	pipeline = self.getPipeline(pair)
	387	yield self.translateAndRespond(pair,
	388	pipeline,
	389	self.get_argument('q'),
	390	self.get_argument('markUnknown', default='yes'))
	391
	392
	393	class TranslatePageHandler(TranslateHandler):
	394	def htmlToText(self, html, url):
	395	if chardet:
	396	encoding = chardet.detect(html).get("encoding", "utf-8")
	397	else:
	398	encoding = "utf-8"
	399	text = html.decode(encoding)
	400	text = text.replace('href="/', 'href="{uri.scheme}://{uri.netloc}/'.format(uri=urlparse(url)))
	401	text = re.sub(r'a([^>]+)href=[\'"]?([^\'" >]+)', 'a \\1 href="#" onclick=\'window.parent.translateLink("\\2");\'', text)
	402	return text
	403
	404	@gen.coroutine
	405	def get(self):
	406	pair = self.getPairOrError(self.get_argument('langpair'),
	407	# Don't yet know the size of the text, and don't want to fetch it unnecessarily:
	408	-1)
	409	if pair is not None:
	410	pipeline = self.getPipeline(pair)
	411	http_client = httpclient.AsyncHTTPClient()
	412	url = self.get_argument('url')
	413	request = httpclient.HTTPRequest(url=url,
	414	# TODO: tweak
	415	connect_timeout=20.0,
	416	request_timeout=20.0)
	417	response = yield http_client.fetch(request)
	418	toTranslate = self.htmlToText(response.body, url)
	419	yield self.translateAndRespond(pair,
	420	pipeline,
	421	toTranslate,
	422	self.get_argument('markUnknown', default='yes'),
	423	nosplit=True)
	424
298	425
299	426	class TranslateDocHandler(TranslateHandler):
300	427	mimeTypeCommand = None

333	460	else:
334	461	return mimeType
335	462
	463	# TODO: Some kind of locking. Although we can't easily re-use open
	464	# pairs here (would have to reimplement lots of
	465	# /usr/bin/apertium), we still want some limits on concurrent doc
	466	# translation.
336	467	@tornado.web.asynchronous
337	468	def get(self):
338	469	try:
339	470	l1, l2 = map(toAlpha3Code, self.get_argument('langpair').split('\|'))
340	471	except ValueError:
341	472	self.send_error(400, explanation='That pair is invalid, use e.g. eng\|spa')
	473
	474	markUnknown = self.get_argument('markUnknown', default='yes') in ['yes', 'true', '1']
342	475
343	476	allowedMimeTypes = {
344	477	'text/plain': 'txt',

368	501	self.request.headers['Content-Type'] = 'application/octet-stream'
369	502	self.request.headers['Content-Disposition'] = 'attachment'
370	503
371		self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)]))
	504	if markUnknown:
	505	self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)],True))
	506	else:
	507	self.write(translation.translateDoc(tempFile, allowedMimeTypes[mtype], self.pairs['%s-%s' % (l1, l2)],False))
372	508	self.finish()
373	509	else:
374	510	self.send_error(400, explanation='Invalid file type %s' % mtype)
375	511	else:
376	512	self.send_error(400, explanation='That pair is not installed')
377	513
	514
378	515	class AnalyzeHandler(BaseHandler):
379		@tornado.web.asynchronous
380		@tornado.gen.coroutine
381		def get(self):
382		mode = toAlpha3Code(self.get_argument('lang'))
383		toAnalyze = self.get_argument('q')
384
385		def handleAnalysis(analysis):
386		if analysis is None:
387		self.send_error(408, explanation='Request timed out')
388		else:
389		lexicalUnits = removeLast(toAnalyze, re.findall(r'\^([^\$])\$([^\^])', analysis))
390		self.sendResponse([(lexicalUnit[0], lexicalUnit[0].split('/')[0] + lexicalUnit[1]) for lexicalUnit in lexicalUnits])
391
392		if mode in self.analyzers:
393		pool = Pool(processes=1)
394		result = pool.apply_async(apertium, [toAnalyze, self.analyzers[mode][0], self.analyzers[mode][1]])
395		pool.close()
396
397		@run_async_thread
398		def worker(callback):
399		try:
400		callback(result.get(timeout=self.timeout))
401		except TimeoutError:
402		pool.terminate()
403		callback(None)
404
405		analysis = yield tornado.gen.Task(worker)
406		handleAnalysis(analysis)
	516	def postproc_text(self, in_text, result):
	517	lexical_units = util.removeDotFromDeformat(in_text, re.findall(r'\^([^\$])\$([^\^])', result))
	518	return [(lu[0], lu[0].split('/')[0] + lu[1])
	519	for lu
	520	in lexical_units]
	521
	522	@tornado.web.asynchronous
	523	@gen.coroutine
	524	def get(self):
	525	in_text = self.get_argument('q')
	526	in_mode = toAlpha3Code(self.get_argument('lang'))
	527	if in_mode in self.analyzers:
	528	[path, mode] = self.analyzers[in_mode]
	529	formatting = 'txt'
	530	commands = [['apertium', '-d', path, '-f', formatting, mode]]
	531	result = yield translation.translateSimple(in_text, commands)
	532	self.sendResponse(self.postproc_text(in_text, result))
407	533	else:
408	534	self.send_error(400, explanation='That mode is not installed')
409	535
	536
410	537	class GenerateHandler(BaseHandler):
	538	def preproc_text(self, in_text):
	539	lexical_units = re.findall(r'(\^[^\$]\$[^\^])', in_text)
	540	if len(lexical_units) == 0:
	541	lexical_units = ['^%s$' % (in_text,)]
	542	return lexical_units, '[SEP]'.join(lexical_units)
	543
	544	def postproc_text(self, lexical_units, result):
	545	return [(generation, lexical_units[i])
	546	for (i, generation)
	547	in enumerate(result.split('[SEP]'))]
	548
411	549	@tornado.web.asynchronous
412	550	@gen.coroutine
413	551	def get(self):
414		mode = toAlpha3Code(self.get_argument('lang'))
415		toGenerate = self.get_argument('q')
416
417		def handleGeneration(generated):
418		if generated is None:
419		self.send_error(408, explanation='Request timed out')
420		else:
421		generated = removeLast(toGenerate, generated)
422		self.sendResponse([(generation, lexicalUnits[index]) for (index, generation) in enumerate(generated.split('[SEP]'))])
423
424		if mode in self.generators:
425		lexicalUnits = re.findall(r'(\^[^\$]\$[^\^])', toGenerate)
426		if len(lexicalUnits) == 0:
427		lexicalUnits = ['^%s$' % toGenerate]
428		pool = Pool(processes=1)
429		result = pool.apply_async(apertium, ('[SEP]'.join(lexicalUnits), self.generators[mode][0], self.generators[mode][1]), {'formatting': 'none'})
430		pool.close()
431
432		@run_async_thread
433		def worker(callback):
434		try:
435		callback(result.get(timeout=self.timeout))
436		except TimeoutError:
437		pool.terminate()
438		callback(None)
439
440		generated = yield tornado.gen.Task(worker)
441		handleGeneration(generated)
	552	in_text = self.get_argument('q')
	553	in_mode = toAlpha3Code(self.get_argument('lang'))
	554	if in_mode in self.generators:
	555	[path, mode] = self.generators[in_mode]
	556	formatting = 'none'
	557	commands = [['apertium', '-d', path, '-f', formatting, mode]]
	558	lexical_units, to_generate = self.preproc_text(in_text)
	559	result = yield translation.translateSimple(to_generate, commands)
	560	self.sendResponse(self.postproc_text(lexical_units, result))
442	561	else:
443	562	self.send_error(400, explanation='That mode is not installed')
	563
444	564
445	565	class ListLanguageNamesHandler(BaseHandler):
446	566	@tornado.web.asynchronous

467	587	else:
468	588	self.sendResponse({})
469	589
	590
470	591	class PerWordHandler(BaseHandler):
471	592	@tornado.web.asynchronous
472	593	@gen.coroutine

536	657	output = yield tornado.gen.Task(worker)
537	658	handleOutput(output)
538	659
	660
539	661	class CoverageHandler(BaseHandler):
540	662	@tornado.web.asynchronous
541	663	@gen.coroutine

570	692	else:
571	693	self.send_error(400, explanation='That mode is not installed')
572	694
	695
573	696	class IdentifyLangHandler(BaseHandler):
574	697	@tornado.web.asynchronous
575	698	def get(self):

583	706	possibleLangs = filter(lambda x: x[1] != 'un', cldResults[2])
584	707	self.sendResponse({toAlpha3Code(possibleLang[1]): possibleLang[2] for possibleLang in possibleLangs})
585	708	else:
586		self.sendResponse({'nob': 100}) # TODO: Some more reasonable response
	709	self.sendResponse({'nob': 100}) # TODO: Some more reasonable response
587	710	else:
588	711	def handleCoverages(coverages):
589	712	self.sendResponse(coverages)

593	716	pool.close()
594	717	try:
595	718	coverages = result.get(timeout=self.timeout)
	719	# TODO: Coverages are not actually sent!!
596	720	except TimeoutError:
597	721	self.send_error(408, explanation='Request timed out')
598	722	pool.terminate()
599	723
	724
600	725	class GetLocaleHandler(BaseHandler):
601	726	@tornado.web.asynchronous
602	727	def get(self):

605	730	self.sendResponse(locales)
606	731	else:
607	732	self.send_error(400, explanation='Accept-Language missing from request headers')
	733
608	734
609	735	class PipeDebugHandler(BaseHandler):
610	736

634	760
635	761	self.sendResponse({
636	762	'responseData': {'output': output, 'pipeline': pipeline},
637		'responseDetails': None,
638		'responseStatus': 200
	763	'responseDetails': None,
	764	'responseStatus': 200
639	765	})
640	766
641		missingFreqsDb = ''
642
643		def setupHandler(port, pairs_path, nonpairs_path, langNames, missingFreqs, timeout, max_idle_secs, verbosity=0, scaleMtLogs=False, memory=0):
	767
	768	def setupHandler(port, pairs_path, nonpairs_path, langNames, missingFreqsPath, timeout, max_pipes_per_pair, min_pipes_per_pair, max_users_per_pipe, max_idle_secs, restart_pipe_after, verbosity=0, scaleMtLogs=False, memory=1000):
644	769
645	770	global missingFreqsDb
646		missingFreqsDb= missingFreqs
	771	if missingFreqsPath:
	772	missingFreqsDb = missingdb.MissingDb(missingFreqsPath, memory)
647	773
648	774	Handler = BaseHandler
649	775	Handler.langNames = langNames
650		Handler.missingFreqs = missingFreqs
651	776	Handler.timeout = timeout
	777	Handler.max_pipes_per_pair = max_pipes_per_pair
	778	Handler.min_pipes_per_pair = min_pipes_per_pair
	779	Handler.max_users_per_pipe = max_users_per_pipe
652	780	Handler.max_idle_secs = max_idle_secs
	781	Handler.restart_pipe_after = restart_pipe_after
653	782	Handler.scaleMtLogs = scaleMtLogs
654		Handler.inMemoryUnknown = True if memory > 0 else False
655		Handler.inMemoryLimit = memory
656	783	Handler.verbosity = verbosity
657	784
658	785	modes = searchPath(pairs_path, verbosity=verbosity)

673	800	Handler.taggers[lang_pair] = (dirpath, modename)
674	801
675	802
	803	def sanity_check():
	804	locale_vars = ["LANG", "LC_ALL"]
	805	u8 = re.compile("UTF-?8", re.IGNORECASE)
	806	if not any(re.search(u8, os.environ.get(key, ""))
	807	for key in locale_vars):
	808	print("servlet.py: error: APY needs a UTF-8 locale, please set LANG or LC_ALL",
	809	file=sys.stderr)
	810	sys.exit(1)
	811
676	812	if __name__ == '__main__':
677		parser = argparse.ArgumentParser(description='Start Apertium APY')
	813	sanity_check()
	814	parser = argparse.ArgumentParser(description='Apertium APY -- API server for machine translation and language analysis')
678	815	parser.add_argument('pairs_path', help='path to Apertium installed pairs (all modes files in this path are included)')
679	816	parser.add_argument('-s', '--nonpairs-path', help='path to Apertium SVN (only non-translator debug modes are included from this path)')
680	817	parser.add_argument('-l', '--lang-names', help='path to localised language names sqlite database (default = langNames.db)', default='langNames.db')

683	820	parser.add_argument('-c', '--ssl-cert', help='path to SSL Certificate', default=None)
684	821	parser.add_argument('-k', '--ssl-key', help='path to SSL Key File', default=None)
685	822	parser.add_argument('-t', '--timeout', help='timeout for requests (default = 10)', type=int, default=10)
686		parser.add_argument('-j', '--num-processes', help='number of processes to run (default = number of cores)', type=int, default=0)
	823	parser.add_argument('-j', '--num-processes', help='number of processes to run (default = 1; use 0 to run one http server per core, where each http server runs all available language pairs)', nargs='?', type=int, default=1)
687	824	parser.add_argument('-d', '--daemon', help='daemon mode: redirects stdout and stderr to files apertium-apy.log and apertium-apy.err ; use with --log-path', action='store_true')
688	825	parser.add_argument('-P', '--log-path', help='path to log output files to in daemon mode; defaults to local directory', default='./')
689		parser.add_argument('-m', '--max-idle-secs', help='shut down pipelines it have not been used in this many seconds', type=int, default=0)
	826	parser.add_argument('-i', '--max-pipes-per-pair', help='how many pipelines we can spin up per language pair (default = 1)', type=int, default=1)
	827	parser.add_argument('-n', '--min-pipes-per-pair', help='when shutting down pipelines, keep at least this many open per language pair (default = 0)', type=int, default=0)
	828	parser.add_argument('-u', '--max-users-per-pipe', help='how many concurrent requests per pipeline before we consider spinning up a new one (default = 5)', type=int, default=5)
	829	parser.add_argument('-m', '--max-idle-secs', help='if specified, shut down pipelines that have not been used in this many seconds', type=int, default=0)
	830	parser.add_argument('-r', '--restart-pipe-after', help='restart a pipeline if it has had this many requests (default = 1000)', type=int, default=1000)
690	831	parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0)
	832	parser.add_argument('-V', '--version', help='show APY version', action='version', version="%(prog)s version " + __version__)
691	833	parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true')
692		parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached", type=int, default=0)
	834	parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached (default = 1000)", type=int, default=1000)
	835	parser.add_argument('-T', '--stat-period-max-age', help="How many seconds back to keep track request timing stats (default = 3600)", type=int, default=3600)
693	836	args = parser.parse_args()
694	837
695	838	if args.daemon:

706	849	logger = logging.getLogger('scale-mt')
707	850	logger.propagate = False
708	851	smtlog = os.path.join(args.log_path, 'ScaleMTRequests.log')
709		loggingHandler = logging.handlers.TimedRotatingFileHandler(smtlog,'midnight',0)
	852	loggingHandler = logging.handlers.TimedRotatingFileHandler(smtlog, 'midnight', 0)
710	853	loggingHandler.suffix = "%Y-%m-%d"
711	854	logger.addHandler(loggingHandler)
712	855

714	857	if(args.daemon):
715	858	logging.getLogger("tornado.access").propagate = False
716	859
	860	if args.stat_period_max_age:
	861	BaseHandler.STAT_PERIOD_MAX_AGE = timedelta(0, args.stat_period_max_age, 0)
	862
717	863	if not cld2:
718		logging.warning('Unable to import CLD2, continuing using naive method of language detection')
719
720		setupHandler(args.port, args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout, args.max_idle_secs, args.verbosity, args.scalemt_logs, args.unknown_memory_limit)
	864	logging.warning("Unable to import CLD2, continuing using naive method of language detection")
	865	if not chardet:
	866	logging.warning("Unable to import chardet, assuming utf-8 encoding for all websites")
	867
	868	setupHandler(args.port, args.pairs_path, args.nonpairs_path, args.lang_names, args.missing_freqs, args.timeout, args.max_pipes_per_pair, args.min_pipes_per_pair, args.max_users_per_pipe, args.max_idle_secs, args.restart_pipe_after, args.verbosity, args.scalemt_logs, args.unknown_memory_limit)
721	869
722	870	application = tornado.web.Application([
723	871	(r'/', RootHandler),

726	874	(r'/stats', StatsHandler),
727	875	(r'/translate', TranslateHandler),
728	876	(r'/translateDoc', TranslateDocHandler),
	877	(r'/translatePage', TranslatePageHandler),
729	878	(r'/analy[sz]e', AnalyzeHandler),
730	879	(r'/generate', GenerateHandler),
731	880	(r'/listLanguageNames', ListLanguageNamesHandler),

738	887
739	888	global http_server
740	889	if args.ssl_cert and args.ssl_key:
741		http_server = tornado.httpserver.HTTPServer(application, ssl_options = {
	890	http_server = tornado.httpserver.HTTPServer(application, ssl_options={
742	891	'certfile': args.ssl_cert,
743	892	'keyfile': args.ssl_key,
744	893	})

752	901
753	902	http_server.bind(args.port)
754	903	http_server.start(args.num_processes)
755		tornado.ioloop.IOLoop.instance().start()
	904
	905	loop = tornado.ioloop.IOLoop.instance()
	906	wd = systemd.setup_watchdog()
	907	if wd is not None:
	908	wd.systemd_ready()
	909	logging.info("Initialised systemd watchdog, pinging every {}s".format(1000*wd.period))
	910	tornado.ioloop.PeriodicCallback(wd.watchdog_ping, 1000*wd.period, loop).start()
	911	loop.start()

+121

-0

systemd.py less more

	0	#!/usr/bin/env python3
	1	# -- indent-tabs-mode: nil --
	2	# coding=utf-8
	3	# -- encoding: utf-8 --
	4
	5	"""
	6	Copyright (C) 2016 Kevin Brubeck Unhammer
	7	based on https://gist.github.com/Spindel/1d07533ef94a4589d348 / watchdogged.py
	8	Copyright (C) 2015 D.S. Ljungmark, Modio AB
	9	This program is free software: you can redistribute it and/or modify
	10	it under the terms of the GNU General Public License as published by
	11	the Free Software Foundation, either version 3 of the License, or
	12	(at your option) any later version.
	13	This program is distributed in the hope that it will be useful,
	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	GNU General Public License for more details.
	17	You should have received a copy of the GNU General Public License
	18	along with this program. If not, see <http://www.gnu.org/licenses/>.
	19	"""
	20
	21	import logging
	22	import socket
	23	import os
	24
	25	# All singletons are prefixed "the"
	26	theLog = logging.getLogger(__name__)
	27
	28
	29	def watchdog_period():
	30	"""Return the time (in seconds) that we need to ping within."""
	31	val = os.environ.get("WATCHDOG_USEC", None)
	32	if not val:
	33	return None
	34	return int(val)/1000000
	35
	36
	37	def notify_socket(clean_environment=True):
	38	"""Return a tuple of address, socket for future use.
	39	clean_environment removes the variables from env to prevent children
	40	from inheriting it and doing something wrong.
	41	"""
	42	_empty = None, None
	43	address = os.environ.get("NOTIFY_SOCKET", None)
	44	if clean_environment:
	45	address = os.environ.pop("NOTIFY_SOCKET", None)
	46
	47	if not address:
	48	return _empty
	49
	50	if len(address) == 1:
	51	return _empty
	52
	53	if address[0] not in ("@", "/"):
	54	return _empty
	55
	56	if address[0] == "@":
	57	address = "\0" + address[1:]
	58
	59	# SOCK_CLOEXEC was added in Python 3.2 and requires Linux >= 2.6.27.
	60	# It means "close this socket after fork/exec()
	61	try:
	62	sock = socket.socket(socket.AF_UNIX,
	63	socket.SOCK_DGRAM \| socket.SOCK_CLOEXEC)
	64	except AttributeError:
	65	sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
	66
	67	return address, sock
	68
	69
	70	class Watchdog(object):
	71	def __init__(self, period, address, sock):
	72	# "The daemon should then issue sd_notify("WATCHDOG=1") calls every half of that interval."
	73	self.period = float(period)/2.0
	74	self.address = address
	75	self.sock = sock
	76
	77	def __del__(self):
	78	self.systemd_stop()
	79
	80	def sd_message(self, message):
	81	"""Send a message to the systemd bus/socket.
	82	message is expected to be bytes.
	83	"""
	84	if not (self.address and self.sock and message):
	85	theLog.info("Couldn't message! {} {} {}".format(self.address, self.sock, message))
	86	return False
	87	assert isinstance(message, bytes)
	88
	89	try:
	90	retval = self.sock.sendto(message, self.address)
	91	except socket.error:
	92	return False
	93	return (retval > 0)
	94
	95	def watchdog_ping(self):
	96	"""Helper function to send a watchdog ping."""
	97	return self.sd_message(b"WATCHDOG=1")
	98
	99	def systemd_ready(self):
	100	"""Helper function to send a ready signal."""
	101	return self.sd_message(b"READY=1")
	102
	103	def systemd_stop(self):
	104	"""Helper function to signal service stopping."""
	105	return self.sd_message(b"STOPPING=1")
	106
	107
	108	def setup_watchdog():
	109	# Get our settings from the environment
	110	notify = notify_socket()
	111	period = watchdog_period()
	112	# Validate some in-data
	113	if not notify[0]:
	114	theLog.info("No notification socket, not launched via systemd?")
	115	return None
	116	if not period:
	117	theLog.warning("Found systemd notification socket, but no watchdog period set in the unit file!")
	118	return None
	119	wd = Watchdog(period, *notify)
	120	return wd

+18

-0

t/README less more

	0	The tests require some test data.
	1
	2	To install the test data on Debian-based systems, first install core
	3	tools as show at http://wiki.apertium.org/wiki/Debian and then do
	4
	5	sudo apt-get install apertium-sme-nob apertium-es-en
	6	mkdir ~/apy-testdata
	7	cd ~/apy-testdata
	8	svn co https://svn.code.sf.net/p/apertium/svn/languages/apertium-nno
	9	cd apertium-nno
	10	./autogen.sh
	11	make -j4
	12
	13	Now go back to the apy directory, and do
	14
	15	NONPAIRS=~/apy-testdata t/run-tests
	16
	17	to run the tests.

+189

-0

t/run-tests less more

	0	#!/bin/bash
	1
	2	### Put e.g. in
	3	#
	4	# #!/bin/sh
	5	# NONPAIRS=/path/to/apertium-nno t/run-tests
	6	#
	7	### in your .git/hooks/pre-commit and chmod +x .git/hooks/pre-commit
	8	###
	9	### Optional first argument is a free port number to use.
	10
	11	### Tests (TODO: get these from a file instead):
	12	declare -ar INPUTS=( "government" "ja" "ikkje" "ja<ij>" "^ja<ij>$" "ignored" "ignored")
	13	declare -ar OUTPUTS=( "Gobierno" "og" "ikkje/ikkje<adv>" "ja" "ja" "400" "400")
	14	declare -ar MODES=( "eng\|spa" "sme\|nob" "nno" "nno" "nno" "typomode" "non\|mod")
	15	declare -ar TYPES=( "translate" "translate" "analyse" "generate" "generate" "translate?" "translate?")
	16	declare -ar EXTRACTS=( "" "" "" "" "" ".code" ".code")
	17
	18	### Paths to apertium test data:
	19	### The tests assume you have apertium-sme-nob and apertium-en-es
	20	### installed, and apertium-nno checked out from SVN and compiled and
	21	### available from INSTALLEDPAIRS and NONPAIRS directories
	22	### respectively:
	23	declare -r INSTALLEDPAIRS=${INSTALLEDPAIRS:-/usr/share/apertium}
	24	declare -r NONPAIRS=${NONPAIRS:-/l/a/languages}
	25	### You don't have to change these variables here, instead run the
	26	### script like this:
	27	# $ NONPAIRS=/path/to/apertium-nno t/run-tests
	28	### to set the path to apertium-nno before running, or
	29	# $ NONPAIRS=/path/to/apertium-nno INSTALLEDPAIRS=~/local t/run-tests
	30	### to set both paths before running.
	31
	32
	33	### Actual script follows:
	34	set -e -u -o pipefail
	35
	36	PORT="${1:-2737}"
	37	APYPID=
	38	cleanup () {
	39	[[ -n ${APYPID} ]] && kill "${APYPID}"
	40	}
	41	trap cleanup EXIT
	42
	43	declare -r CHECK="[2K[999D[1;32m✓[00m"
	44	declare -r CROSS="[2K[999D[1;31m❌[00m"
	45
	46	wait_for_startup () {
	47	local -i max_secs=10
	48	local -i i=0
	49	while [[ $i -lt ${max_secs} ]]; do
	50	echo -n "."
	51	sleep 1
	52	if curl -s "http://localhost:${PORT}" >/dev/null; then
	53	echo "${CHECK} APY seems to have started up"
	54	return 0
	55	fi
	56	(( i++ )) \|\| true
	57	done
	58	echo "${CROSS} Waited ${max_secs} secs without any response from APY"
	59	return 1
	60	}
	61
	62	extract_response () {
	63	local type=${TYPES[$1]}
	64	local extract=${EXTRACTS[$1]}
	65	if [[ ${extract} = "" ]]; then
	66	case ${type} in
	67	translate)
	68	jq -r .responseData.translatedText
	69	;;
	70	generate\|analyse)
	71	jq -r .[][] \| awk 'NR%2==1'
	72	;;
	73	*)
	74	echo "Unknown test type ${type} and no method given in EXTRACTS" >&2
	75	exit 1
	76	;;
	77	esac
	78	else
	79	jq -r "${extract}"
	80	fi
	81	}
	82
	83	ensure_installed () {
	84	local type=${TYPES[$1]}
	85	local mode=${MODES[$1]}
	86	case ${type} in
	87	translate)
	88	curl -s "http://localhost:${PORT}/list?q=pairs" \
	89	\| jq -e ".responseData\|map(.sourceLanguage+\"\|\"+.targetLanguage)\|index(\"$mode\")" &>/dev/null
	90	;;
	91	generate)
	92	curl -s "http://localhost:${PORT}/list?q=generators" \
	93	\| jq -e "has(\"${mode}\")" &>/dev/null
	94	;;
	95	analyse)
	96	curl -s "http://localhost:${PORT}/list?q=analysers" \
	97	\| jq -e "has(\"${mode}\")" &>/dev/null
	98	;;
	99	# Anything else we let slide
	100	esac
	101	}
	102
	103	run_test () {
	104	local -ri i=$1
	105	local -r in=${INPUTS[$i]}
	106	local -r mode=${MODES[$i]}
	107	local -r type=${TYPES[$i]}
	108	local url="http://localhost:${PORT}/${type}?lang=${mode}&q=${in}"
	109	if [[ ${type} = translate ]]; then
	110	url="http://localhost:${PORT}/translate?langpair=${mode}&q=${in}"
	111	fi
	112	if ! ensure_installed "$i"; then
	113	cat <<EOF
	114	[1;31m❌[00m TEST FAILED FOR ${mode} ${type}
	115	It seems like ${mode} (${type}) is not installed; install ${mode} and
	116	set NONPAIRS/INSTALLEDPAIRS variables before running this script.
	117
	118	EOF
	119	return 1
	120	fi
	121	local -r got=$(curl -s "${url}" \| extract_response "$i")
	122	local -r want=${OUTPUTS[$i]}
	123	if [[ ${got} != ${want} ]]; then
	124	cat <<EOF
	125
	126	[1;31m❌[00m TEST FAILED FOR ${mode} ${type}
	127	WANTED: "${want}"
	128	GOT: "${got}"
	129
	130	EOF
	131	return 1
	132	fi
	133	return 0
	134	}
	135
	136	run_tests () {
	137	local -i failures=0
	138
	139	for (( i=0; i<${#INPUTS[@]}; i++ )); do
	140	if ! run_test "$i"; then
	141	(( failures++ )) \|\| true
	142	fi
	143	done
	144
	145	local got=$(curl -s "http://localhost:${PORT}/translate?langpair=typo&q=whatever" \| jq -r .code)
	146	if [[ "${got}" -ne 400 ]]; then
	147	(( failures++ )) \|\| true
	148	fi
	149
	150	if [[ ${failures} -eq 0 ]]; then
	151	cat <<EOF
	152	${CHECK} All $i tests passed
	153	EOF
	154	else
	155	cat <<EOF
	156	${CROSS} Ran $i tests, ${failures} failures
	157	EOF
	158	return "${failures}"
	159	fi
	160	return 0
	161	}
	162
	163
	164	if ! command -V jq &>/dev/null; then
	165	echo "Please install jq, e.g. 'sudo apt-get install jq'" 2>/dev/null
	166	fi
	167
	168	if netstat -lnt\|awk "\$4~/:${PORT}\$/"\|grep -q .; then
	169	lsof -i :"${PORT}"
	170	echo >&2
	171	echo "Port ${PORT} seems taken, can't run tests" >&2
	172	echo "(you can pass an alternative port as first argument to this script)" >&2
	173	exit 10
	174	fi
	175
	176	cd "$(dirname "$0")"
	177	rm -f apertium-apy.log apertium-apy.err
	178	../servlet.py -p "${PORT}" -d -j1 -i3 -u1 -n1 -m3 -s "${NONPAIRS}" -- "${INSTALLEDPAIRS}" & APYPID=$!
	179	wait_for_startup
	180	if run_tests; then
	181	exit $?
	182	else
	183	result=$?
	184	tail -n 999 apertium-apy.log apertium-apy.err
	185	exit "${result}"
	186	fi
	187
	188	# TODO: concurrency tests

+45

-2

tools/apertium-viewer.html less more

80	80	<select name="langpair">
81	81	</select>
82	82	<br>
	83	<input type="checkbox" id="instant"> Instant translation
	84	<br>
83	85	<label>Type text here.</label>
84		<textarea name='q' class='block'></textarea>
	86	<textarea name='q' id='q' class='block'></textarea>
85	87	<br>
86	88	<input type="button" value="Submit" onClick="sendRequest()">
87	89	</form>
88	90	<div id="responsediv">
89	91	</div>
	92
	93	<script>
	94
	95	var timer, lastPunct = false, punct = [46, 33, 58, 63, 47, 45, 190, 171, 49], timeoutPunct = 500, timeoutOther = 1000;
	96	var q = document.getElementById("q");
	97	function timeout (event) {
	98	if(lastPunct && event.keyCode === 32 \|\| event.keyCode === 13) {
	99	// Don't override the short timeout for simple space-after-punctuation
	100	return;
	101	}
	102
	103	if(timer && document.getElementById("instant").checked) {
	104	clearTimeout(timer);
	105	}
	106
	107	var timeout;
	108
	109	if (punct.indexOf(event.keyCode) !== -1) {
	110	timeout = timeoutPunct;
	111	lastPunct = true;
	112	}
	113	else {
	114	timeout = timeoutOther;
	115	lastPunct = false;
	116	}
	117
	118	timer = setTimeout(function () {
	119	if(document.getElementById("instant").checked) {
	120	sendRequest();
	121	}
	122	}, timeout);
	123	}
	124	var query = document.getElementById("q");
	125	q.addEventListener("keyup", timeout);
	126	q.addEventListener("paste", timeout);
	127
	128
	129
	130
	131
	132	</script>
90	133	</body>
91		</html>⏎
	134	</html>

+37

-2

tools/apertiumlangs.sql less more

2720	2720	INSERT INTO "languageNames" VALUES(2735,'en','nl','Dutch');
2721	2721	INSERT INTO "languageNames" VALUES(2736,'en','nn','Norwegian Nynorsk');
2722	2722	INSERT INTO "languageNames" VALUES(2737,'en','no','Norwegian');
2723		INSERT INTO "languageNames" VALUES(2738,'en','nog','Nogai');
	2723	INSERT INTO "languageNames" VALUES(2738,'en','nog','Nogay');
2724	2724	INSERT INTO "languageNames" VALUES(2739,'en','oc','Occitan');
2725	2725	INSERT INTO "languageNames" VALUES(2740,'en','os','Ossetic');
2726	2726	INSERT INTO "languageNames" VALUES(2741,'en','pa','Punjabi');

2755	2755	INSERT INTO "languageNames" VALUES(2770,'en','tl','Tagalog');
2756	2756	INSERT INTO "languageNames" VALUES(2771,'en','tr','Turkish');
2757	2757	INSERT INTO "languageNames" VALUES(2772,'en','tt','Tatar');
2758		INSERT INTO "languageNames" VALUES(2773,'en','tyv','Tuvinian');
	2758	INSERT INTO "languageNames" VALUES(2773,'en','tyv','Tuvan');
2759	2759	INSERT INTO "languageNames" VALUES(2774,'en','udm','Udmurt');
2760	2760	INSERT INTO "languageNames" VALUES(2775,'en','uk','Ukrainian');
2761	2761	INSERT INTO "languageNames" VALUES(2776,'en','ur','Urdu');

8893	8893	INSERT INTO "fixes" VALUES(NULL,'ky','tk','түркмөнчө');
8894	8894	INSERT INTO "fixes" VALUES(NULL,'ky','tr','түркчө');
8895	8895	INSERT INTO "fixes" VALUES(NULL,'ky','uz','өзбекче');
	8896	INSERT INTO "fixes" VALUES(NULL,'ky','kaa','каракалпакча');
	8897	INSERT INTO "fixes" VALUES(NULL,'ky','tyv','тывача');
	8898	INSERT INTO "fixes" VALUES(NULL,'ky','sah','сахача (якутча)');
	8899	INSERT INTO "fixes" VALUES(NULL,'ky','nog','ногойчо');
8896	8900	INSERT INTO "fixes" VALUES(NULL,'kk','az','әзірбайжан тілі');
8897	8901	INSERT INTO "fixes" VALUES(NULL,'kk','ba','башқортша');
8898	8902	INSERT INTO "fixes" VALUES(NULL,'kk','en','ағылшынша');

9109	9113	INSERT INTO "fixes" VALUES(NULL,'kaa','kk','qazaqsha');
9110	9114	INSERT INTO "fixes" VALUES(NULL,'kaa','kaa','qaraqalpaqsha');
9111	9115	INSERT INTO "fixes" VALUES(NULL,'kaa','uz','oʻzbekshe');
	9116	INSERT INTO "fixes" VALUES(NULL,'kaa','tt','tatarsha');
	9117	INSERT INTO "fixes" VALUES(NULL,'kaa','ky','qırgʻızsha');
	9118	INSERT INTO "fixes" VALUES(NULL,'uz','kaa','qoraqalpoqcha');
	9119	INSERT INTO "fixes" VALUES(NULL,'uz','nog','noʻgʻaycha');
	9120	INSERT INTO "fixes" VALUES(NULL,'uz','sah','saxacha (yoqutcha)');
	9121	INSERT INTO "fixes" VALUES(NULL,'uz','chv','chuvashcha');
	9122	INSERT INTO "fixes" VALUES(NULL,'uz','tyv','tuvacha');
	9123	INSERT INTO "fixes" VALUES(NULL,'uz','kum','qoʻmiqcha');
	9124	INSERT INTO "fixes" VALUES(NULL,'uz','bak','boshqircha');
	9125	INSERT INTO "fixes" VALUES(NULL,'tyv','tyv','тыва дылда');
	9126	INSERT INTO "fixes" VALUES(NULL,'nog','nog','ногъайша');
	9127	INSERT INTO "fixes" VALUES(NULL,'sah','sah','сахалыы');
	9128	INSERT INTO "fixes" VALUES(NULL,'en','crh','Crimean Tatar');
	9129	INSERT INTO "fixes" VALUES(NULL,'uz','crh','qrimtatarcha');
	9130	INSERT INTO "fixes" VALUES(NULL,'az','crh','krımtatarca');
	9131	INSERT INTO "fixes" VALUES(NULL,'bak','crh','Ҡырымтатарса');
	9132	INSERT INTO "fixes" VALUES(NULL,'chv','crh','крымтутарла');
	9133	INSERT INTO "fixes" VALUES(NULL,'crh','crh','qırımtatarca');
	9134	INSERT INTO "fixes" VALUES(NULL,'kaa','crh','qırımtatarsha');
	9135	INSERT INTO "fixes" VALUES(NULL,'tt','crh','кырымтатарча');
	9136	INSERT INTO "fixes" VALUES(NULL,'ky','crh','кырымтатарча');
	9137	INSERT INTO "fixes" VALUES(NULL,'kk','crh','қырымтатарша');
	9138	INSERT INTO "fixes" VALUES(NULL,'tr','crh','Kırımtatarca');
	9139	INSERT INTO "fixes" VALUES(NULL,'uig','crh','قرىمتاتارچا');
	9140	INSERT INTO "fixes" VALUES(NULL,'kk','sah','сахаша (якутша)');
	9141	INSERT INTO "fixes" VALUES(NULL,'kk','kaa','қарақалпақша');
	9142	INSERT INTO "fixes" VALUES(NULL,'kk','tyv','тываша');
	9143	INSERT INTO "fixes" VALUES(NULL,'ca','crh','tàtar de Crimea');
	9144	INSERT INTO "fixes" VALUES(NULL,'eu','crh','Krimeako tatarera');
	9145	INSERT INTO "fixes" VALUES(NULL,'en','oct_aran','Occitan Aranese');
	9146	INSERT INTO "fixes" VALUES(NULL,'de','oct_aran','Okzitanisch Aranesisch');
9112	9147
9113	9148	INSERT INTO "languageNames" (lg, inLg, name) select lg, inLg, name from "fixes";
9114	9149	COMMIT;

-0

tools/sanity-test-apy.py less more

15	15	"urd-hin": ("تحریر ہیں", "लेख हैं"),
16	16	"afr-nld": ("ek", "ik"),
17	17	"ara-mlt": ("و", "u"),
	18	"ara-mlt_translit": ("و", "u"),
	19	"arg-cat": ("e", "és"),
	20	"cat-arg": ("la", "a"),
18	21	"arg-spa": ("e", "es"),
19	22	"spa-arg": ("la", "a"),
20	23	"ast-spa": ("nin", "ni"),

53	56	"mkd-eng": ("триесет", "thirty"),
54	57	"mlt-ara": ("u", "و"),
55	58	"nld-afr": ("ik", "ek"),
	59	"nno-swe": ("kva", "vad"),
	60	"swe-nno": ("vad", "kva"),
	61	"swe-nob": ("vad", "hva"),
56	62	"nno-dan": ("kva", "hvad"),
	63	"dan-nno": ("hvad", "kva"),
	64	"dan-nob": ("hvad", "hva"),
57	65	"nno_e-nno": ("korleis", "korleis"),
58	66	"nno-nob": ("korleis", "hvordan"),
59	67	"nno-nno_e": ("korleis", "korleis"),

88	96	"spa-por": ("tengo", "tenho"),
89	97	"spa-por_BR": ("tengo", "tenho"),
90	98	"swe-dan": ("vad", "hvad"),
	99	"dan-swe": ("hvad", "vad"),
91	100	"swe-isl": ("Av", "Af"),
92	101	"tat-kaz": ("ул", "ол"),
93	102	}

-0

tools/systemd/apy.service less more

11	11	# By default, if it restarts >10 times within 5 secs, it marks it as failed and gives up:
12	12	Restart=always
13	13
	14	# Restart the service if it doesn't do a watchdog ping within 10 seconds:
	15	WatchdogSec=10s
	16
14	17	# No reason to have access to shared tmp files:
15	18	PrivateTmp=yes
16	19

-0

tools/turkic.sql less more

51	51	INSERT INTO "fixes" VALUES(NULL,'uz','ug','uyg\'urcha');
52	52	INSERT INTO "fixes" VALUES(NULL,'kk','ug','ұйғұрша');
53	53	INSERT INTO "fixes" VALUES(NULL,'ky','ug','уйгурча');
	54	INSERT INTO "fixes" VALUES(NULL,'en','crh','Crimean Tatar');
54	55	CREATE TABLE languageNames (id integer primary key, lg text, inLg text, name text, unique(lg, inLg) on conflict replace);
55	56	INSERT INTO "languageNames" VALUES(NULL,'sah','af','Аппырыкааныстыы');
56	57	INSERT INTO "languageNames" VALUES(NULL,'sah','ar','Араабтыы');

+983

-0

toro.py less more

	0	# From https://github.com/ajdavis/toro/
	1
	2	# Toro Copyright (c) 2012 A. Jesse Jiryu Davis
	3
	4	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	5	# not use this file except in compliance with the License. You may obtain
	6	# a copy of the License at
	7
	8	# http://www.apache.org/licenses/LICENSE-2.0
	9
	10	# Unless required by applicable law or agreed to in writing, software
	11	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	12	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	13	# License for the specific language governing permissions and limitations
	14	# under the License.
	15
	16
	17	import contextlib
	18	import heapq
	19	import collections
	20	from functools import partial
	21	from queue import Full, Empty
	22
	23	from tornado import ioloop
	24	from tornado import gen
	25	from tornado.concurrent import Future
	26
	27
	28	version_tuple = (0, 8, '+')
	29
	30	version = '.'.join(map(str, version_tuple))
	31	"""Current version of Toro."""
	32
	33
	34	__all__ = [
	35	# Exceptions
	36	'NotReady', 'AlreadySet', 'Full', 'Empty', 'Timeout',
	37
	38	# Primitives
	39	'AsyncResult', 'Event', 'Condition', 'Semaphore', 'BoundedSemaphore',
	40	'Lock',
	41
	42	# Queues
	43	'Queue', 'PriorityQueue', 'LifoQueue', 'JoinableQueue'
	44	]
	45
	46
	47	class NotReady(Exception):
	48	"""Raised when accessing an :class:`AsyncResult` that has no value yet."""
	49	pass
	50
	51
	52	class AlreadySet(Exception):
	53	"""Raised when setting a value on an :class:`AsyncResult` that already
	54	has one."""
	55	pass
	56
	57
	58	class Timeout(Exception):
	59	"""Raised when a deadline passes before a Future is ready."""
	60
	61	def __str__(self):
	62	return "Timeout"
	63
	64
	65	class _TimeoutFuture(Future):
	66
	67	def __init__(self, deadline, io_loop):
	68	"""Create a Future with optional deadline.
	69
	70	If deadline is not None, it may be a number denoting a unix timestamp
	71	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` object
	72	for a deadline relative to the current time.
	73
	74	set_exception(toro.Timeout()) is executed after a timeout.
	75	"""
	76
	77	super(_TimeoutFuture, self).__init__()
	78	self.io_loop = io_loop
	79	if deadline is not None:
	80	callback = partial(self.set_exception, Timeout())
	81	self._timeout_handle = io_loop.add_timeout(deadline, callback)
	82	else:
	83	self._timeout_handle = None
	84
	85	def set_result(self, result):
	86	self._cancel_timeout()
	87	super(_TimeoutFuture, self).set_result(result)
	88
	89	def set_exception(self, exception):
	90	self._cancel_timeout()
	91	super(_TimeoutFuture, self).set_exception(exception)
	92
	93	def _cancel_timeout(self):
	94	if self._timeout_handle:
	95	self.io_loop.remove_timeout(self._timeout_handle)
	96	self._timeout_handle = None
	97
	98
	99	class _ContextManagerList(list):
	100	def __enter__(self, args, *kwargs):
	101	for obj in self:
	102	obj.__enter__(args, *kwargs)
	103
	104	def __exit__(self, args, *kwargs):
	105	for obj in self:
	106	obj.__exit__(args, *kwargs)
	107
	108
	109	class _ContextManagerFuture(Future):
	110	"""A Future that can be used with the "with" statement.
	111
	112	When a coroutine yields this Future, the return value is a context manager
	113	that can be used like:
	114
	115	with (yield future):
	116	pass
	117
	118	At the end of the block, the Future's exit callback is run. Used for
	119	Lock.acquire() and Semaphore.acquire().
	120	"""
	121	def __init__(self, wrapped, exit_callback):
	122	super(_ContextManagerFuture, self).__init__()
	123	wrapped.add_done_callback(self._done_callback)
	124	self.exit_callback = exit_callback
	125
	126	def _done_callback(self, wrapped):
	127	if wrapped.exception():
	128	self.set_exception(wrapped.exception())
	129	else:
	130	self.set_result(wrapped.result())
	131
	132	def result(self):
	133	if self.exception():
	134	raise self.exception()
	135
	136	# Otherwise return a context manager that cleans up after the block.
	137	@contextlib.contextmanager
	138	def f():
	139	try:
	140	yield
	141	finally:
	142	self.exit_callback()
	143	return f()
	144
	145
	146	def _consume_expired_waiters(waiters):
	147	# Delete waiters at the head of the queue who've timed out
	148	while waiters and waiters[0].done():
	149	waiters.popleft()
	150
	151
	152	_null_result = object()
	153
	154
	155	class AsyncResult(object):
	156	"""A one-time event that stores a value or an exception.
	157
	158	The only distinction between AsyncResult and a simple Future is that
	159	AsyncResult lets coroutines wait with a deadline. The deadline can be
	160	configured separately for each waiter.
	161
	162	An :class:`AsyncResult` instance cannot be reset.
	163
	164	:Parameters:
	165	- `io_loop`: Optional custom IOLoop.
	166	"""
	167
	168	def __init__(self, io_loop=None):
	169	self.io_loop = io_loop or ioloop.IOLoop.current()
	170	self.value = _null_result
	171	self.waiters = []
	172
	173	def __str__(self):
	174	result = '<%s ' % (self.__class__.__name__, )
	175	if self.ready():
	176	result += 'value=%r' % self.value
	177	else:
	178	result += 'unset'
	179	if self.waiters:
	180	result += ' waiters[%s]' % len(self.waiters)
	181
	182	return result + '>'
	183
	184	def set(self, value):
	185	"""Set a value and wake up all the waiters."""
	186	if self.ready():
	187	raise AlreadySet
	188
	189	self.value = value
	190	waiters, self.waiters = self.waiters, []
	191	for waiter in waiters:
	192	if not waiter.done(): # Might have timed out
	193	waiter.set_result(value)
	194
	195	def ready(self):
	196	return self.value is not _null_result
	197
	198	def get(self, deadline=None):
	199	"""Get a value once :meth:`set` is called. Returns a Future.
	200
	201	The Future's result will be the value. The Future raises
	202	:exc:`toro.Timeout` if no value is set before the deadline.
	203
	204	:Parameters:
	205	- `deadline`: Optional timeout, either an absolute timestamp
	206	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for
	207	a deadline relative to the current time.
	208	"""
	209	future = _TimeoutFuture(deadline, self.io_loop)
	210	if self.ready():
	211	future.set_result(self.value)
	212	else:
	213	self.waiters.append(future)
	214
	215	return future
	216
	217	def get_nowait(self):
	218	"""Get the value if ready, or raise :class:`NotReady`."""
	219	if self.ready():
	220	return self.value
	221	else:
	222	raise NotReady
	223
	224
	225	class Condition(object):
	226	"""A condition allows one or more coroutines to wait until notified.
	227
	228	Like a standard Condition_, but does not need an underlying lock that
	229	is acquired and released.
	230
	231	.. _Condition: http://docs.python.org/library/threading.html#threading.Condition
	232
	233	:Parameters:
	234	- `io_loop`: Optional custom IOLoop.
	235	"""
	236
	237	def __init__(self, io_loop=None):
	238	self.io_loop = io_loop or ioloop.IOLoop.current()
	239	self.waiters = collections.deque() # Queue of _Waiter objects
	240
	241	def __str__(self):
	242	result = '<%s' % (self.__class__.__name__, )
	243	if self.waiters:
	244	result += ' waiters[%s]' % len(self.waiters)
	245	return result + '>'
	246
	247	def wait(self, deadline=None):
	248	"""Wait for :meth:`notify`. Returns a Future.
	249
	250	:exc:`~toro.Timeout` is executed after a timeout.
	251
	252	:Parameters:
	253	- `deadline`: Optional timeout, either an absolute timestamp
	254	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	255	deadline relative to the current time.
	256	"""
	257	future = _TimeoutFuture(deadline, self.io_loop)
	258	self.waiters.append(future)
	259	return future
	260
	261	def notify(self, n=1):
	262	"""Wake up `n` waiters.
	263
	264	:Parameters:
	265	- `n`: The number of waiters to awaken (default: 1)
	266	"""
	267	waiters = [] # Waiters we plan to run right now
	268	while n and self.waiters:
	269	waiter = self.waiters.popleft()
	270	if not waiter.done(): # Might have timed out
	271	n -= 1
	272	waiters.append(waiter)
	273
	274	for waiter in waiters:
	275	waiter.set_result(None)
	276
	277	def notify_all(self):
	278	"""Wake up all waiters."""
	279	self.notify(len(self.waiters))
	280
	281
	282	# TODO: show correct examples that avoid thread / process issues w/ concurrent.futures.Future
	283	class Event(object):
	284	"""An event blocks coroutines until its internal flag is set to True.
	285
	286	Similar to threading.Event_.
	287
	288	.. _threading.Event: http://docs.python.org/library/threading.html#threading.Event
	289
	290	.. seealso:: :doc:`examples/event_example`
	291
	292	:Parameters:
	293	- `io_loop`: Optional custom IOLoop.
	294	"""
	295
	296	def __init__(self, io_loop=None):
	297	self.io_loop = io_loop or ioloop.IOLoop.current()
	298	self.condition = Condition(io_loop=io_loop)
	299	self._flag = False
	300
	301	def __str__(self):
	302	return '<%s %s>' % (
	303	self.__class__.__name__, 'set' if self._flag else 'clear')
	304
	305	def is_set(self):
	306	"""Return ``True`` if and only if the internal flag is true."""
	307	return self._flag
	308
	309	def set(self):
	310	"""Set the internal flag to ``True``. All waiters are awakened.
	311	Calling :meth:`wait` once the flag is true will not block.
	312	"""
	313	self._flag = True
	314	self.condition.notify_all()
	315
	316	def clear(self):
	317	"""Reset the internal flag to ``False``. Calls to :meth:`wait`
	318	will block until :meth:`set` is called.
	319	"""
	320	self._flag = False
	321
	322	def wait(self, deadline=None):
	323	"""Block until the internal flag is true. Returns a Future.
	324
	325	The Future raises :exc:`~toro.Timeout` after a timeout.
	326
	327	:Parameters:
	328	- `callback`: Function taking no arguments.
	329	- `deadline`: Optional timeout, either an absolute timestamp
	330	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	331	deadline relative to the current time.
	332	"""
	333	if self._flag:
	334	future = _TimeoutFuture(None, self.io_loop)
	335	future.set_result(None)
	336	return future
	337	else:
	338	return self.condition.wait(deadline)
	339
	340
	341	class Queue(object):
	342	"""Create a queue object with a given maximum size.
	343
	344	If `maxsize` is 0 (the default) the queue size is unbounded.
	345
	346	Unlike the `standard Queue`_, you can reliably know this Queue's size
	347	with :meth:`qsize`, since your single-threaded Tornado application won't
	348	be interrupted between calling :meth:`qsize` and doing an operation on the
	349	Queue.
	350
	351	Examples:
	352
	353	:doc:`examples/producer_consumer_example`
	354
	355	:doc:`examples/web_spider_example`
	356
	357	:Parameters:
	358	- `maxsize`: Optional size limit (no limit by default).
	359	- `io_loop`: Optional custom IOLoop.
	360
	361	.. _`Gevent's Queue`: http://www.gevent.org/gevent.queue.html
	362
	363	.. _`standard Queue`: http://docs.python.org/library/queue.html#Queue.Queue
	364	"""
	365	def __init__(self, maxsize=0, io_loop=None):
	366	self.io_loop = io_loop or ioloop.IOLoop.current()
	367	if maxsize is None:
	368	raise TypeError("maxsize can't be None")
	369
	370	if maxsize < 0:
	371	raise ValueError("maxsize can't be negative")
	372
	373	self._maxsize = maxsize
	374
	375	# _TimeoutFutures
	376	self.getters = collections.deque([])
	377	# Pairs of (item, _TimeoutFuture)
	378	self.putters = collections.deque([])
	379	self._init(maxsize)
	380
	381	def _init(self, maxsize):
	382	self.queue = collections.deque()
	383
	384	def _get(self):
	385	return self.queue.popleft()
	386
	387	def _put(self, item):
	388	self.queue.append(item)
	389
	390	def __repr__(self):
	391	return '<%s at %s %s>' % (
	392	type(self).__name__, hex(id(self)), self._format())
	393
	394	def __str__(self):
	395	return '<%s %s>' % (type(self).__name__, self._format())
	396
	397	def _format(self):
	398	result = 'maxsize=%r' % (self.maxsize, )
	399	if getattr(self, 'queue', None):
	400	result += ' queue=%r' % self.queue
	401	if self.getters:
	402	result += ' getters[%s]' % len(self.getters)
	403	if self.putters:
	404	result += ' putters[%s]' % len(self.putters)
	405	return result
	406
	407	def _consume_expired_putters(self):
	408	# Delete waiters at the head of the queue who've timed out
	409	while self.putters and self.putters[0][1].done():
	410	self.putters.popleft()
	411
	412	def qsize(self):
	413	"""Number of items in the queue"""
	414	return len(self.queue)
	415
	416	@property
	417	def maxsize(self):
	418	"""Number of items allowed in the queue."""
	419	return self._maxsize
	420
	421	def empty(self):
	422	"""Return ``True`` if the queue is empty, ``False`` otherwise."""
	423	return not self.queue
	424
	425	def full(self):
	426	"""Return ``True`` if there are `maxsize` items in the queue.
	427
	428	.. note:: if the Queue was initialized with `maxsize=0`
	429	(the default), then :meth:`full` is never ``True``.
	430	"""
	431	if self.maxsize == 0:
	432	return False
	433	else:
	434	return self.maxsize <= self.qsize()
	435
	436	def put(self, item, deadline=None):
	437	"""Put an item into the queue. Returns a Future.
	438
	439	The Future blocks until a free slot is available for `item`, or raises
	440	:exc:`toro.Timeout`.
	441
	442	:Parameters:
	443	- `deadline`: Optional timeout, either an absolute timestamp
	444	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	445	deadline relative to the current time.
	446	"""
	447	_consume_expired_waiters(self.getters)
	448	future = _TimeoutFuture(deadline, self.io_loop)
	449	if self.getters:
	450	assert not self.queue, "queue non-empty, why are getters waiting?"
	451	getter = self.getters.popleft()
	452
	453	# Use _put and _get instead of passing item straight to getter, in
	454	# case a subclass has logic that must run (e.g. JoinableQueue).
	455	self._put(item)
	456	getter.set_result(self._get())
	457	future.set_result(None)
	458	else:
	459	if self.maxsize and self.maxsize <= self.qsize():
	460	self.putters.append((item, future))
	461	else:
	462	self._put(item)
	463	future.set_result(None)
	464
	465	return future
	466
	467	def put_nowait(self, item):
	468	"""Put an item into the queue without blocking.
	469
	470	If no free slot is immediately available, raise queue.Full.
	471	"""
	472	_consume_expired_waiters(self.getters)
	473	if self.getters:
	474	assert not self.queue, "queue non-empty, why are getters waiting?"
	475	getter = self.getters.popleft()
	476
	477	self._put(item)
	478	getter.set_result(self._get())
	479	elif self.maxsize and self.maxsize <= self.qsize():
	480	raise Full
	481	else:
	482	self._put(item)
	483
	484	def get(self, deadline=None):
	485	"""Remove and return an item from the queue. Returns a Future.
	486
	487	The Future blocks until an item is available, or raises
	488	:exc:`toro.Timeout`.
	489
	490	:Parameters:
	491	- `deadline`: Optional timeout, either an absolute timestamp
	492	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	493	deadline relative to the current time.
	494	"""
	495	self._consume_expired_putters()
	496	future = _TimeoutFuture(deadline, self.io_loop)
	497	if self.putters:
	498	assert self.full(), "queue not full, why are putters waiting?"
	499	item, putter = self.putters.popleft()
	500	self._put(item)
	501	putter.set_result(None)
	502	future.set_result(self._get())
	503	elif self.qsize():
	504	future.set_result(self._get())
	505	else:
	506	self.getters.append(future)
	507
	508	return future
	509
	510	def get_nowait(self):
	511	"""Remove and return an item from the queue without blocking.
	512
	513	Return an item if one is immediately available, else raise
	514	:exc:`queue.Empty`.
	515	"""
	516	self._consume_expired_putters()
	517	if self.putters:
	518	assert self.full(), "queue not full, why are putters waiting?"
	519	item, putter = self.putters.popleft()
	520	self._put(item)
	521	putter.set_result(None)
	522	return self._get()
	523	elif self.qsize():
	524	return self._get()
	525	else:
	526	raise Empty
	527
	528
	529	class PriorityQueue(Queue):
	530	"""A subclass of :class:`Queue` that retrieves entries in priority order
	531	(lowest first).
	532
	533	Entries are typically tuples of the form: ``(priority number, data)``.
	534
	535	:Parameters:
	536	- `maxsize`: Optional size limit (no limit by default).
	537	- `initial`: Optional sequence of initial items.
	538	- `io_loop`: Optional custom IOLoop.
	539	"""
	540	def _init(self, maxsize):
	541	self.queue = []
	542
	543	def _put(self, item, heappush=heapq.heappush):
	544	heappush(self.queue, item)
	545
	546	def _get(self, heappop=heapq.heappop):
	547	return heappop(self.queue)
	548
	549
	550	class LifoQueue(Queue):
	551	"""A subclass of :class:`Queue` that retrieves most recently added entries
	552	first.
	553
	554	:Parameters:
	555	- `maxsize`: Optional size limit (no limit by default).
	556	- `initial`: Optional sequence of initial items.
	557	- `io_loop`: Optional custom IOLoop.
	558	"""
	559	def _init(self, maxsize):
	560	self.queue = []
	561
	562	def _put(self, item):
	563	self.queue.append(item)
	564
	565	def _get(self):
	566	return self.queue.pop()
	567
	568
	569	class JoinableQueue(Queue):
	570	"""A subclass of :class:`Queue` that additionally has :meth:`task_done`
	571	and :meth:`join` methods.
	572
	573	.. seealso:: :doc:`examples/web_spider_example`
	574
	575	:Parameters:
	576	- `maxsize`: Optional size limit (no limit by default).
	577	- `initial`: Optional sequence of initial items.
	578	- `io_loop`: Optional custom IOLoop.
	579	"""
	580	def __init__(self, maxsize=0, io_loop=None):
	581	Queue.__init__(self, maxsize=maxsize, io_loop=io_loop)
	582	self.unfinished_tasks = 0
	583	self._finished = Event(io_loop)
	584	self._finished.set()
	585
	586	def _format(self):
	587	result = Queue._format(self)
	588	if self.unfinished_tasks:
	589	result += ' tasks=%s' % self.unfinished_tasks
	590	return result
	591
	592	def _put(self, item):
	593	self.unfinished_tasks += 1
	594	self._finished.clear()
	595	Queue._put(self, item)
	596
	597	def task_done(self):
	598	"""Indicate that a formerly enqueued task is complete.
	599
	600	Used by queue consumers. For each :meth:`get <Queue.get>` used to
	601	fetch a task, a subsequent call to :meth:`task_done` tells the queue
	602	that the processing on the task is complete.
	603
	604	If a :meth:`join` is currently blocking, it will resume when all
	605	items have been processed (meaning that a :meth:`task_done` call was
	606	received for every item that had been :meth:`put <Queue.put>` into the
	607	queue).
	608
	609	Raises ``ValueError`` if called more times than there were items
	610	placed in the queue.
	611	"""
	612	if self.unfinished_tasks <= 0:
	613	raise ValueError('task_done() called too many times')
	614	self.unfinished_tasks -= 1
	615	if self.unfinished_tasks == 0:
	616	self._finished.set()
	617
	618	def join(self, deadline=None):
	619	"""Block until all items in the queue are processed. Returns a Future.
	620
	621	The count of unfinished tasks goes up whenever an item is added to
	622	the queue. The count goes down whenever a consumer calls
	623	:meth:`task_done` to indicate that all work on the item is complete.
	624	When the count of unfinished tasks drops to zero, :meth:`join`
	625	unblocks.
	626
	627	The Future raises :exc:`toro.Timeout` if the count is not zero before
	628	the deadline.
	629
	630	:Parameters:
	631	- `deadline`: Optional timeout, either an absolute timestamp
	632	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	633	deadline relative to the current time.
	634	"""
	635	return self._finished.wait(deadline)
	636
	637
	638	class Semaphore(object):
	639	"""A lock that can be acquired a fixed number of times before blocking.
	640
	641	A Semaphore manages a counter representing the number of release() calls
	642	minus the number of acquire() calls, plus an initial value. The acquire()
	643	method blocks if necessary until it can return without making the counter
	644	negative.
	645
	646	If not given, value defaults to 1.
	647
	648	:meth:`acquire` supports the context manager protocol:
	649
	650	>>> from tornado import gen
	651	>>> import toro
	652	>>> semaphore = toro.Semaphore()
	653	>>>
	654	>>> @gen.coroutine
	655	... def f():
	656	... with (yield semaphore.acquire()):
	657	... assert semaphore.locked()
	658	...
	659	... assert not semaphore.locked()
	660
	661	.. note:: Unlike the standard threading.Semaphore_, a :class:`Semaphore`
	662	can tell you the current value of its :attr:`counter`, because code in a
	663	single-threaded Tornado app can check these values and act upon them
	664	without fear of interruption from another thread.
	665
	666	.. _threading.Semaphore: http://docs.python.org/library/threading.html#threading.Semaphore
	667
	668	.. seealso:: :doc:`examples/web_spider_example`
	669
	670	:Parameters:
	671	- `value`: An int, the initial value (default 1).
	672	- `io_loop`: Optional custom IOLoop.
	673	"""
	674	def __init__(self, value=1, io_loop=None):
	675	if value < 0:
	676	raise ValueError('semaphore initial value must be >= 0')
	677
	678	# The semaphore is implemented as a Queue with 'value' objects
	679	self.q = Queue(io_loop=io_loop)
	680	for _ in range(value):
	681	self.q.put_nowait(None)
	682
	683	self._unlocked = Event(io_loop=io_loop)
	684	if value:
	685	self._unlocked.set()
	686
	687	def __repr__(self):
	688	return '<%s at %s%s>' % (
	689	type(self).__name__, hex(id(self)), self._format())
	690
	691	def __str__(self):
	692	return '<%s%s>' % (
	693	self.__class__.__name__, self._format())
	694
	695	def _format(self):
	696	return ' counter=%s' % self.counter
	697
	698	@property
	699	def counter(self):
	700	"""An integer, the current semaphore value"""
	701	return self.q.qsize()
	702
	703	def locked(self):
	704	"""True if :attr:`counter` is zero"""
	705	return self.q.empty()
	706
	707	def release(self):
	708	"""Increment :attr:`counter` and wake one waiter.
	709	"""
	710	self.q.put(None)
	711	if not self.locked():
	712	# No one was waiting on acquire(), so self.q.qsize() is positive
	713	self._unlocked.set()
	714
	715	def wait(self, deadline=None):
	716	"""Wait for :attr:`locked` to be False. Returns a Future.
	717
	718	The Future raises :exc:`toro.Timeout` after the deadline.
	719
	720	:Parameters:
	721	- `deadline`: Optional timeout, either an absolute timestamp
	722	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	723	deadline relative to the current time.
	724	"""
	725	return self._unlocked.wait(deadline)
	726
	727	def acquire(self, deadline=None):
	728	"""Decrement :attr:`counter`. Returns a Future.
	729
	730	Block if the counter is zero and wait for a :meth:`release`. The
	731	Future raises :exc:`toro.Timeout` after the deadline.
	732
	733	:Parameters:
	734	- `deadline`: Optional timeout, either an absolute timestamp
	735	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	736	deadline relative to the current time.
	737	"""
	738	queue_future = self.q.get(deadline)
	739	if self.q.empty():
	740	self._unlocked.clear()
	741	future = _ContextManagerFuture(queue_future, self.release)
	742	return future
	743
	744	def __enter__(self):
	745	raise RuntimeError(
	746	"Use Semaphore like 'with (yield semaphore)', not like"
	747	" 'with semaphore'")
	748
	749	__exit__ = __enter__
	750
	751
	752	class BoundedSemaphore(Semaphore):
	753	"""A semaphore that prevents release() being called too often.
	754
	755	A bounded semaphore checks to make sure its current value doesn't exceed
	756	its initial value. If it does, ``ValueError`` is raised. In most
	757	situations semaphores are used to guard resources with limited capacity.
	758	If the semaphore is released too many times it's a sign of a bug.
	759
	760	If not given, value defaults to 1.
	761
	762	.. seealso:: :doc:`examples/web_spider_example`
	763	"""
	764	def __init__(self, value=1, io_loop=None):
	765	super(BoundedSemaphore, self).__init__(value=value, io_loop=io_loop)
	766	self._initial_value = value
	767
	768	def release(self):
	769	if self.counter >= self._initial_value:
	770	raise ValueError("Semaphore released too many times")
	771	return super(BoundedSemaphore, self).release()
	772
	773
	774	class Lock(object):
	775	"""A lock for coroutines.
	776
	777	It is created unlocked. When unlocked, :meth:`acquire` changes the state
	778	to locked. When the state is locked, yielding :meth:`acquire` waits until
	779	a call to :meth:`release`.
	780
	781	The :meth:`release` method should only be called in the locked state;
	782	an attempt to release an unlocked lock raises RuntimeError.
	783
	784	When more than one coroutine is waiting for the lock, the first one
	785	registered is awakened by :meth:`release`.
	786
	787	:meth:`acquire` supports the context manager protocol:
	788
	789	>>> from tornado import gen
	790	>>> import toro
	791	>>> lock = toro.Lock()
	792	>>>
	793	>>> @gen.coroutine
	794	... def f():
	795	... with (yield lock.acquire()):
	796	... assert lock.locked()
	797	...
	798	... assert not lock.locked()
	799
	800	.. note:: Unlike with the standard threading.Lock_, code in a
	801	single-threaded Tornado application can check if a :class:`Lock`
	802	is :meth:`locked`, and act on that information without fear that another
	803	thread has grabbed the lock, provided you do not yield to the IOLoop
	804	between checking :meth:`locked` and using a protected resource.
	805
	806	.. _threading.Lock: http://docs.python.org/2/library/threading.html#lock-objects
	807
	808	.. seealso:: :doc:`examples/lock_example`
	809
	810	:Parameters:
	811	- `io_loop`: Optional custom IOLoop.
	812	"""
	813	def __init__(self, io_loop=None):
	814	self._block = BoundedSemaphore(value=1, io_loop=io_loop)
	815
	816	def __str__(self):
	817	return "<%s _block=%s>" % (
	818	self.__class__.__name__,
	819	self._block)
	820
	821	def acquire(self, deadline=None):
	822	"""Attempt to lock. Returns a Future.
	823
	824	The Future raises :exc:`toro.Timeout` if the deadline passes.
	825
	826	:Parameters:
	827	- `deadline`: Optional timeout, either an absolute timestamp
	828	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for a
	829	deadline relative to the current time.
	830	"""
	831	return self._block.acquire(deadline)
	832
	833	def release(self):
	834	"""Unlock.
	835
	836	If any coroutines are waiting for :meth:`acquire`,
	837	the first in line is awakened.
	838
	839	If not locked, raise a RuntimeError.
	840	"""
	841	if not self.locked():
	842	raise RuntimeError('release unlocked lock')
	843	self._block.release()
	844
	845	def locked(self):
	846	"""``True`` if the lock has been acquired"""
	847	return self._block.locked()
	848
	849	def __enter__(self):
	850	raise RuntimeError(
	851	"Use Lock like 'with (yield lock)', not like"
	852	" 'with lock'")
	853
	854	__exit__ = __enter__
	855
	856
	857	class RWLock(object):
	858	"""A reader-writer lock for coroutines.
	859
	860	It is created unlocked. When unlocked, :meth:`acquire_write` always changes
	861	the state to locked. When unlocked, :meth:`acquire_read` can changed the
	862	state to locked, if :meth:`acquire_read` was called max_readers times. When
	863	the state is locked, yielding :meth:`acquire_read`/meth:`acquire_write`
	864	waits until a call to :meth:`release_write` in case of locking on write, or
	865	:meth:`release_read` in case of locking on read.
	866
	867	The :meth:`release_read` method should only be called in the locked-on-read
	868	state; an attempt to release an unlocked lock raises RuntimeError.
	869
	870	The :meth:`release_write` method should only be called in the locked on
	871	write state; an attempt to release an unlocked lock raises RuntimeError.
	872
	873	When more than one coroutine is waiting for the lock, the first one
	874	registered is awakened by :meth:`release_read`/:meth:`release_write`.
	875
	876	:meth:`acquire_read`/:meth:`acquire_write` support the context manager
	877	protocol:
	878
	879	>>> from tornado import gen
	880	>>> import toro
	881	>>> lock = toro.RWLock(max_readers=10)
	882	>>>
	883	>>> @gen.coroutine
	884	... def f():
	885	... with (yield lock.acquire_read()):
	886	... assert not lock.locked()
	887	...
	888	... with (yield lock.acquire_write()):
	889	... assert lock.locked()
	890	...
	891	... assert not lock.locked()
	892
	893	.. note:: Unlike with the standard threading.Lock_, code in a
	894	single-threaded Tornado application can check if a :class:`RWLock`
	895	is :meth:`locked`, and act on that information without fear that another
	896	thread has grabbed the lock, provided you do not yield to the IOLoop
	897	between checking :meth:`locked` and using a protected resource.
	898
	899	.. _threading.Lock: http://docs.python.org/2/library/threading.html#lock-objects
	900
	901	:Parameters:
	902	- `max_readers`: Optional max readers value, default 1.
	903	- `io_loop`: Optional custom IOLoop.
	904	"""
	905	def __init__(self, max_readers=1, io_loop=None):
	906	self._max_readers = max_readers
	907	self._block = BoundedSemaphore(value=max_readers, io_loop=io_loop)
	908
	909	def __str__(self):
	910	return "<%s _block=%s>" % (
	911	self.__class__.__name__,
	912	self._block)
	913
	914	def acquire_read(self, deadline=None):
	915	"""Attempt to lock for read. Returns a Future.
	916
	917	The Future raises :exc:`toro.Timeout` if the deadline passes.
	918
	919	:Parameters:
	920	- `deadline`: Optional timeout, either an absolute timestamp
	921	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for
	922	a deadline relative to the current time.
	923	"""
	924	return self._block.acquire(deadline)
	925
	926	@gen.coroutine
	927	def acquire_write(self, deadline=None):
	928	"""Attempt to lock for write. Returns a Future.
	929
	930	The Future raises :exc:`toro.Timeout` if the deadline passes.
	931
	932	:Parameters:
	933	- `deadline`: Optional timeout, either an absolute timestamp
	934	(as returned by ``io_loop.time()``) or a ``datetime.timedelta`` for
	935	a deadline relative to the current time.
	936	"""
	937	futures = [self._block.acquire(deadline) for _ in
	938	xrange(self._max_readers)]
	939	try:
	940	managers = yield futures
	941	except Timeout:
	942	for f in futures:
	943	# Avoid traceback logging.
	944	f.exception()
	945	raise
	946
	947	raise gen.Return(_ContextManagerList(managers))
	948
	949	def release_read(self):
	950	"""Releases one reader.
	951
	952	If any coroutines are waiting for :meth:`acquire_read` (in case of full
	953	readers queue), the first in line is awakened.
	954
	955	If not locked, raise a RuntimeError.
	956	"""
	957	if not self.locked():
	958	raise RuntimeError('release unlocked lock')
	959	self._block.release()
	960
	961	def release_write(self):
	962	"""Releases after write.
	963
	964	The first in queue will be awakened after release.
	965
	966	If not locked, raise a RuntimeError.
	967	"""
	968	if not self.locked():
	969	raise RuntimeError('release unlocked lock')
	970	for i in xrange(self._max_readers):
	971	self._block.release()
	972
	973	def locked(self):
	974	"""``True`` if the lock has been acquired"""
	975	return self._block.locked()
	976
	977	def __enter__(self):
	978	raise RuntimeError(
	979	"Use RWLock like 'with (yield lock)', not like"
	980	" 'with lock'")
	981
	982	__exit__ = __enter__

+149

-54

translation.py less more

0		import re, os, tempfile
	0	import re, os
1	1	from subprocess import Popen, PIPE
2	2	from tornado import gen
3	3	import tornado.process, tornado.iostream
	4	try: # >=4.2
	5	import tornado.locks as locks
	6	except ImportError:
	7	import toro as locks
4	8	import logging
5	9	from select import PIPE_BUF
	10	from contextlib import contextmanager
	11	from collections import namedtuple
	12	from time import time
	13
	14
	15	class Pipeline(object):
	16	def __init__(self):
	17	# The lock is needed so we don't let two coroutines write
	18	# simultaneously to a pipeline; then the first call to read might
	19	# read translations of text put there by the second call …
	20	self.lock = locks.Lock()
	21	# The users count is how many requests have picked this
	22	# pipeline for translation. If this is 0, we can safely shut
	23	# down the pipeline.
	24	self.users = 0
	25	self.lastUsage = 0
	26	self.useCount = 0
	27
	28	@contextmanager
	29	def use(self):
	30	self.lastUsage = time()
	31	self.users += 1
	32	try:
	33	yield
	34	finally:
	35	self.users -= 1
	36	self.lastUsage = time()
	37	self.useCount += 1
	38
	39	def __lt__(self, other):
	40	return self.users < other.users
	41
	42	@gen.coroutine
	43	def translate(self, toTranslate, nosplit):
	44	raise Exception("Not implemented, subclass me!")
	45
	46
	47	class FlushingPipeline(Pipeline):
	48	def __init__(self, commands, args, *kwargs):
	49	self.inpipe, self.outpipe = startPipeline(commands)
	50	super().__init__(args, *kwargs)
	51
	52	def __del__(self):
	53	logging.debug("shutting down FlushingPipeline that was used %d times", self.useCount)
	54	self.inpipe.stdin.close()
	55	self.inpipe.stdout.close()
	56	# TODO: It seems the process immediately becomes <defunct>,
	57	# but only completely removed after a second request to the
	58	# server – why?
	59
	60	@gen.coroutine
	61	def translate(self, toTranslate, nosplit=False):
	62	with self.use():
	63	if nosplit:
	64	res = yield translateNULFlush(toTranslate, self)
	65	return res
	66	else:
	67	all_split = splitForTranslation(toTranslate, n_users=self.users)
	68	parts = yield [translateNULFlush(part, self) for part in all_split]
	69	return "".join(parts)
	70
	71	class SimplePipeline(Pipeline):
	72	def __init__(self, commands, args, *kwargs):
	73	self.commands = list(commands)
	74	super().__init__(args, *kwargs)
	75
	76	@gen.coroutine
	77	def translate(self, toTranslate, nosplit="ignored"):
	78	with self.use():
	79	with (yield self.lock.acquire()):
	80	res = yield translateSimple(toTranslate, self.commands)
	81	return res
	82
	83
	84	ParsedModes = namedtuple('ParsedModes', 'do_flush commands')
	85
	86	def makePipeline(modes_parsed):
	87	if modes_parsed.do_flush:
	88	return FlushingPipeline(modes_parsed.commands)
	89	else:
	90	return SimplePipeline(modes_parsed.commands)
	91
6	92
7	93	def startPipeline(commands):
8	94	procs = []

18	104	procs.append(tornado.process.Subprocess(cmd,
19	105	stdin=in_from,
20	106	stdout=out_from))
21
22	107	return procs[0], procs[-1]
	108
23	109
24	110	def parseModeFile(mode_path):
25	111	mode_str = open(mode_path, 'r').read().strip()
26	112	if mode_str:
27		if 'hfst-proc ' in mode_str or 'lrx-proc ' in mode_str:
	113	if 'ca-oc@aran' in mode_str:
28	114	do_flush = False
29	115	modes_parentdir = os.path.dirname(os.path.dirname(mode_path))
30	116	mode_name = os.path.splitext(os.path.basename(mode_path))[0]

39	125	do_flush = True
40	126	commands = []
41	127	for cmd in mode_str.strip().split('\|'):
	128	# TODO: we should make language pairs install
	129	# modes.xml instead; this is brittle (what if a path
	130	# has \| or " in it?)
42	131	cmd = cmd.replace('$2', '').replace('$1', '-g')
43		cmd = re.sub('^(\S*)', '\g<1> -z', cmd)
44		commands.append(cmd.split())
45		return do_flush, commands
46		else:
47		logging.error('Could not parse mode file %s' % mode_path)
48		raise Exception('Could not parse mode file %s' % mode_path)
	132	cmd = re.sub(r'^\s(\S)', r'\g<1> -z', cmd)
	133	commands.append([c.strip("'")
	134	for c in cmd.split()])
	135	return ParsedModes(do_flush, commands)
	136	else:
	137	logging.error('Could not parse mode file %s', mode_path)
	138	raise Exception('Could not parse mode file %s', mode_path)
49	139
50	140
51	141	def upToBytes(string, max_bytes):

55	145	bytes of each char.
56	146
57	147	"""
58		b = bytes(string,'utf-8')
	148	b = bytes(string, 'utf-8')
59	149	l = max_bytes
60	150	while l:
61	151	try:

65	155	l -= 1
66	156	return 0
67	157
68		def hardbreakFn(string, rush_hour):
	158	def hardbreakFn(string, n_users):
69	159	"""If others are queueing up to translate at the same time, we send
70	160	short requests, otherwise we try to minimise the number of
71	161	requests, but without letting buffers fill up.

73	163	These numbers could probably be tweaked a lot.
74	164
75	165	"""
76		if rush_hour:
	166	if n_users > 2:
77	167	return 1000
78	168	else:
79	169	return upToBytes(string, PIPE_BUF)
80	170
81	171	def preferPunctBreak(string, last, hardbreak):
82	172	"""We would prefer to split on a period or space seen before the
83		hardbreak, if we can.
	173	hardbreak, if we can. If the remaining string is smaller or equal
	174	than the hardbreak, return end of the string
84	175
85	176	"""
	177
	178	if(len(string[last:]) <= hardbreak):
	179	return last+hardbreak+1
	180
86	181	softbreak = int(hardbreak/2)+1
87	182	softnext = last + softbreak
88	183	hardnext = last + hardbreak
89	184	dot = string.rfind(".", softnext, hardnext)
90		if dot>-1:
91		return dot
	185	if dot > -1:
	186	return dot+1
92	187	else:
93	188	space = string.rfind(" ", softnext, hardnext)
94		if space>-1:
95		return space
	189	if space > -1:
	190	return space+1
96	191	else:
97	192	return hardnext
98	193
99		def splitForTranslation(toTranslate, rush_hour):
	194
	195	def splitForTranslation(toTranslate, n_users):
100	196	"""Splitting it up a bit ensures we don't fill up FIFO buffers (leads
101	197	to processes hanging on read/write)."""
102		allSplit = [] # [].append and join faster than str +=
103		last=0
104		rounds=0
105		while last < len(toTranslate) and rounds<10:
106		rounds+=1
107		hardbreak = hardbreakFn(toTranslate[last:], rush_hour)
	198	allSplit = [] # [].append and join faster than str +=
	199	last = 0
	200	rounds = 0
	201	while last < len(toTranslate) and rounds < 10:
	202	rounds += 1
	203	hardbreak = hardbreakFn(toTranslate[last:], n_users)
108	204	next = preferPunctBreak(toTranslate, last, hardbreak)
109	205	allSplit.append(toTranslate[last:next])
	206	#logging.getLogger().setLevel(logging.DEBUG)
	207	logging.debug("splitForTranslation: last:%s hardbreak:%s next:%s appending:%s"%(last,hardbreak,next,toTranslate[last:next]))
110	208	last = next
111	209	return allSplit
112	210
	211
113	212	@gen.coroutine
114		def translateNULFlush(toTranslate, lock, pipeline):
115		with (yield lock.acquire()):
116		proc_in, proc_out = pipeline
	213	def translateNULFlush(toTranslate, pipeline):
	214	with (yield pipeline.lock.acquire()):
	215	proc_in, proc_out = pipeline.inpipe, pipeline.outpipe
117	216
118	217	proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE)
119	218	proc_deformat.stdin.write(bytes(toTranslate, 'utf-8'))

124	223	# TODO: PipeIOStream has no flush, but seems to work anyway?
125	224	#proc_in.stdin.flush()
126	225
127		output = yield proc_out.stdout.read_until(bytes('\0', 'utf-8'))
	226	output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8'))
128	227
129	228	proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE)
130	229	proc_reformat.stdin.write(output)
131	230	return proc_reformat.communicate()[0].decode('utf-8')
132	231
133	232
134		def translateWithoutFlush(toTranslate, lock, pipeline):
	233	def translateWithoutFlush(toTranslate, proc_in, proc_out):
135	234	proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE)
136	235	proc_deformat.stdin.write(bytes(toTranslate, 'utf-8'))
137	236	deformatted = proc_deformat.communicate()[0]

150	249	proc_reformat.stdin.write(b"".join(output))
151	250	return proc_reformat.communicate()[0].decode('utf-8')
152	251
	252
153	253	@gen.coroutine
154	254	def translatePipeline(toTranslate, commands):
155	255

163	263	output.append(toTranslate)
164	264	output.append(towrite.decode('utf-8'))
165	265
166		pipeline = []
167		pipeline.append("apertium-deshtml")
	266	all_cmds = []
	267	all_cmds.append("apertium-deshtml")
168	268
169	269	for cmd in commands:
170	270	proc = Popen(cmd, stdin=PIPE, stdout=PIPE)

172	272	towrite = proc.communicate()[0]
173	273
174	274	output.append(towrite.decode('utf-8'))
175		pipeline.append(cmd)
	275	all_cmds.append(cmd)
176	276
177	277	proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE)
178	278	proc_reformat.stdin.write(towrite)
179	279	towrite = proc_reformat.communicate()[0].decode('utf-8')
180	280
181	281	output.append(towrite)
182		pipeline.append("apertium-rehtml-noent")
183
184		return output, pipeline
	282	all_cmds.append("apertium-rehtml-noent")
	283
	284	return output, all_cmds
	285
185	286
186	287	@gen.coroutine
187	288	def translateSimple(toTranslate, commands):
188	289	proc_in, proc_out = startPipeline(commands)
189		assert(proc_in==proc_out)
190		yield proc_in.stdin.write(bytes(toTranslate, 'utf-8'))
	290	assert proc_in == proc_out
	291	yield gen.Task(proc_in.stdin.write, bytes(toTranslate, 'utf-8'))
191	292	proc_in.stdin.close()
192		translated = yield proc_out.stdout.read_until_close()
	293	translated = yield gen.Task(proc_out.stdout.read_until_close)
193	294	proc_in.stdout.close()
194	295	return translated.decode('utf-8')
195	296
196		def translateDoc(fileToTranslate, format, modeFile):
197		modesdir=os.path.dirname(os.path.dirname(modeFile))
198		mode=os.path.splitext(os.path.basename(modeFile))[0]
199		return Popen(['apertium', '-f', format, '-d', modesdir, mode],
200		stdin=fileToTranslate, stdout=PIPE).communicate()[0]
201
202		@gen.coroutine
203		def translate(toTranslate, lock, pipeline, commands):
204		if pipeline:
205		allSplit = splitForTranslation(toTranslate, rush_hour = lock.locked())
206		parts = yield [translateNULFlush(part, lock, pipeline) for part in allSplit]
207		return "".join(parts)
208		else:
209		with (yield lock.acquire()):
210		res = yield translateSimple(toTranslate, commands)
211		return res
	297
	298	def translateDoc(fileToTranslate, fmt, modeFile, unknownMarks = False):
	299	modesdir = os.path.dirname(os.path.dirname(modeFile))
	300	mode = os.path.splitext(os.path.basename(modeFile))[0]
	301	if unknownMarks:
	302	return Popen(['apertium', '-f', fmt, '-d', modesdir, mode],
	303	stdin=fileToTranslate, stdout=PIPE).communicate()[0]
	304	else:
	305	return Popen(['apertium', '-f', fmt, '-u', '-d', modesdir, mode],
	306	stdin=fileToTranslate, stdout=PIPE).communicate()[0]

+307

-0

translation_py32.py less more

	0	import re, os
	1	from subprocess import Popen, PIPE
	2	from tornado import gen
	3	import tornado.process, tornado.iostream
	4	try: # >=4.2
	5	import tornado.locks as locks
	6	except ImportError:
	7	import toro as locks
	8	import logging
	9	from select import PIPE_BUF
	10	from contextlib import contextmanager
	11	from collections import namedtuple
	12	from time import time
	13
	14
	15	class Pipeline(object):
	16	def __init__(self):
	17	# The lock is needed so we don't let two coroutines write
	18	# simultaneously to a pipeline; then the first call to read might
	19	# read translations of text put there by the second call …
	20	self.lock = locks.Lock()
	21	# The users count is how many requests have picked this
	22	# pipeline for translation. If this is 0, we can safely shut
	23	# down the pipeline.
	24	self.users = 0
	25	self.lastUsage = 0
	26	self.useCount = 0
	27
	28	@contextmanager
	29	def use(self):
	30	self.lastUsage = time()
	31	self.users += 1
	32	try:
	33	yield
	34	finally:
	35	self.users -= 1
	36	self.lastUsage = time()
	37	self.useCount += 1
	38
	39	def __lt__(self, other):
	40	return self.users < other.users
	41
	42	@gen.coroutine
	43	def translate(self, toTranslate, nosplit):
	44	raise Exception("Not implemented, subclass me!")
	45
	46
	47	class FlushingPipeline(Pipeline):
	48	def __init__(self, commands, args, *kwargs):
	49	self.inpipe, self.outpipe = startPipeline(commands)
	50	super().__init__(args, *kwargs)
	51
	52	def __del__(self):
	53	logging.debug("shutting down FlushingPipeline that was used %d times", self.useCount)
	54	self.inpipe.stdin.close()
	55	self.inpipe.stdout.close()
	56	# TODO: It seems the process immediately becomes <defunct>,
	57	# but only completely removed after a second request to the
	58	# server – why?
	59
	60	@gen.coroutine
	61	def translate(self, toTranslate, nosplit=False):
	62	with self.use():
	63	if nosplit:
	64	res = yield translateNULFlush(toTranslate, self)
	65	raise StopIteration(res)
	66	else:
	67	all_split = splitForTranslation(toTranslate, n_users=self.users)
	68	parts = yield [translateNULFlush(part, self) for part in all_split]
	69	raise StopIteration("".join(parts))
	70
	71	class SimplePipeline(Pipeline):
	72	def __init__(self, commands, args, *kwargs):
	73	self.commands = list(commands)
	74	super().__init__(args, *kwargs)
	75
	76	@gen.coroutine
	77	def translate(self, toTranslate, nosplit="ignored"):
	78	with self.use():
	79	with (yield self.lock.acquire()):
	80	res = yield translateSimple(toTranslate, self.commands)
	81	raise StopIteration(res)
	82
	83
	84	ParsedModes = namedtuple('ParsedModes', 'do_flush commands')
	85
	86	def makePipeline(modes_parsed):
	87	if modes_parsed.do_flush:
	88	return FlushingPipeline(modes_parsed.commands)
	89	else:
	90	return SimplePipeline(modes_parsed.commands)
	91
	92
	93	def startPipeline(commands):
	94	procs = []
	95	for i, cmd in enumerate(commands):
	96	if i == 0:
	97	in_from = tornado.process.Subprocess.STREAM
	98	else:
	99	in_from = procs[-1].stdout
	100	if i == len(commands)-1:
	101	out_from = tornado.process.Subprocess.STREAM
	102	else:
	103	out_from = PIPE
	104	procs.append(tornado.process.Subprocess(cmd,
	105	stdin=in_from,
	106	stdout=out_from))
	107	return procs[0], procs[-1]
	108
	109
	110	def parseModeFile(mode_path):
	111	mode_str = open(mode_path, 'r').read().strip()
	112	if mode_str:
	113	if 'ca-oc@aran' in mode_str:
	114	do_flush = False
	115	modes_parentdir = os.path.dirname(os.path.dirname(mode_path))
	116	mode_name = os.path.splitext(os.path.basename(mode_path))[0]
	117	commands = [[
	118	'apertium',
	119	'-f', 'html-noent',
	120	# Get the _parent_ dir of the mode file:
	121	'-d', modes_parentdir,
	122	mode_name
	123	]]
	124	else:
	125	do_flush = True
	126	commands = []
	127	for cmd in mode_str.strip().split('\|'):
	128	# TODO: we should make language pairs install
	129	# modes.xml instead; this is brittle (what if a path
	130	# has \| or " in it?)
	131	cmd = cmd.replace('$2', '').replace('$1', '-g')
	132	cmd = re.sub(r'^\s(\S)', r'\g<1> -z', cmd)
	133	commands.append([c.strip("'")
	134	for c in cmd.split()])
	135	return ParsedModes(do_flush, commands)
	136	else:
	137	logging.error('Could not parse mode file %s', mode_path)
	138	raise Exception('Could not parse mode file %s', mode_path)
	139
	140
	141	def upToBytes(string, max_bytes):
	142	"""Find the unicode string length of the first up-to-max_bytes bytes.
	143
	144	At least it's much faster than going through the string adding
	145	bytes of each char.
	146
	147	"""
	148	b = bytes(string, 'utf-8')
	149	l = max_bytes
	150	while l:
	151	try:
	152	dec = b[:l].decode('utf-8')
	153	return len(dec)
	154	except UnicodeDecodeError:
	155	l -= 1
	156	return 0
	157
	158	def hardbreakFn(string, n_users):
	159	"""If others are queueing up to translate at the same time, we send
	160	short requests, otherwise we try to minimise the number of
	161	requests, but without letting buffers fill up.
	162
	163	These numbers could probably be tweaked a lot.
	164
	165	"""
	166	if n_users > 2:
	167	return 1000
	168	else:
	169	return upToBytes(string, PIPE_BUF)
	170
	171	def preferPunctBreak(string, last, hardbreak):
	172	"""We would prefer to split on a period or space seen before the
	173	hardbreak, if we can. If the remaining string is smaller or equal
	174	than the hardbreak, return end of the string
	175
	176	"""
	177
	178	if(len(string[last:]) <= hardbreak):
	179	return last+hardbreak+1
	180
	181	softbreak = int(hardbreak/2)+1
	182	softnext = last + softbreak
	183	hardnext = last + hardbreak
	184	dot = string.rfind(".", softnext, hardnext)
	185	if dot > -1:
	186	return dot+1
	187	else:
	188	space = string.rfind(" ", softnext, hardnext)
	189	if space > -1:
	190	return space+1
	191	else:
	192	return hardnext
	193
	194
	195	def splitForTranslation(toTranslate, n_users):
	196	"""Splitting it up a bit ensures we don't fill up FIFO buffers (leads
	197	to processes hanging on read/write)."""
	198	allSplit = [] # [].append and join faster than str +=
	199	last = 0
	200	rounds = 0
	201	while last < len(toTranslate) and rounds < 10:
	202	rounds += 1
	203	hardbreak = hardbreakFn(toTranslate[last:], n_users)
	204	next = preferPunctBreak(toTranslate, last, hardbreak)
	205	allSplit.append(toTranslate[last:next])
	206	#logging.getLogger().setLevel(logging.DEBUG)
	207	logging.debug("splitForTranslation: last:%s hardbreak:%s next:%s appending:%s"%(last,hardbreak,next,toTranslate[last:next]))
	208	last = next
	209	return allSplit
	210
	211
	212	@gen.coroutine
	213	def translateNULFlush(toTranslate, pipeline):
	214	with (yield pipeline.lock.acquire()):
	215	proc_in, proc_out = pipeline.inpipe, pipeline.outpipe
	216
	217	proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE)
	218	proc_deformat.stdin.write(bytes(toTranslate, 'utf-8'))
	219	deformatted = proc_deformat.communicate()[0]
	220
	221	proc_in.stdin.write(deformatted)
	222	proc_in.stdin.write(bytes('\0', "utf-8"))
	223	# TODO: PipeIOStream has no flush, but seems to work anyway?
	224	#proc_in.stdin.flush()
	225
	226	output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8'))
	227
	228	proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE)
	229	proc_reformat.stdin.write(output)
	230	raise StopIteration(proc_reformat.communicate()[0].decode('utf-8'))
	231
	232
	233	def translateWithoutFlush(toTranslate, proc_in, proc_out):
	234	proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE)
	235	proc_deformat.stdin.write(bytes(toTranslate, 'utf-8'))
	236	deformatted = proc_deformat.communicate()[0]
	237
	238	proc_in.stdin.write(deformatted)
	239	proc_in.stdin.write(bytes('\0', "utf-8"))
	240	proc_in.stdin.flush()
	241
	242	d = proc_out.stdout.read(1)
	243	output = []
	244	while d and d != b'\x00':
	245	output.append(d)
	246	d = proc_out.stdout.read(1)
	247
	248	proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE)
	249	proc_reformat.stdin.write(b"".join(output))
	250	raise StopIteration(proc_reformat.communicate()[0].decode('utf-8'))
	251
	252
	253	@gen.coroutine
	254	def translatePipeline(toTranslate, commands):
	255
	256	proc_deformat = Popen("apertium-deshtml", stdin=PIPE, stdout=PIPE)
	257	proc_deformat.stdin.write(bytes(toTranslate, 'utf-8'))
	258	deformatted = proc_deformat.communicate()[0]
	259
	260	towrite = deformatted
	261
	262	output = []
	263	output.append(toTranslate)
	264	output.append(towrite.decode('utf-8'))
	265
	266	all_cmds = []
	267	all_cmds.append("apertium-deshtml")
	268
	269	for cmd in commands:
	270	proc = Popen(cmd, stdin=PIPE, stdout=PIPE)
	271	proc.stdin.write(towrite)
	272	towrite = proc.communicate()[0]
	273
	274	output.append(towrite.decode('utf-8'))
	275	all_cmds.append(cmd)
	276
	277	proc_reformat = Popen("apertium-rehtml-noent", stdin=PIPE, stdout=PIPE)
	278	proc_reformat.stdin.write(towrite)
	279	towrite = proc_reformat.communicate()[0].decode('utf-8')
	280
	281	output.append(towrite)
	282	all_cmds.append("apertium-rehtml-noent")
	283
	284	return output, all_cmds
	285
	286
	287	@gen.coroutine
	288	def translateSimple(toTranslate, commands):
	289	proc_in, proc_out = startPipeline(commands)
	290	assert proc_in == proc_out
	291	yield gen.Task(proc_in.stdin.write, bytes(toTranslate, 'utf-8'))
	292	proc_in.stdin.close()
	293	translated = yield gen.Task(proc_out.stdout.read_until_close)
	294	proc_in.stdout.close()
	295	raise StopIteration(translated.decode('utf-8'))
	296
	297
	298	def translateDoc(fileToTranslate, fmt, modeFile, unknownMarks = False):
	299	modesdir = os.path.dirname(os.path.dirname(modeFile))
	300	mode = os.path.splitext(os.path.basename(modeFile))[0]
	301	if unknownMarks:
	302	return Popen(['apertium', '-f', fmt, '-d', modesdir, mode],
	303	stdin=fileToTranslate, stdout=PIPE).communicate()[0]
	304	else:
	305	return Popen(['apertium', '-f', fmt, '-u', '-d', modesdir, mode],
	306	stdin=fileToTranslate, stdout=PIPE).communicate()[0]

+21

-86

util.py less more

3	3	import sqlite3, re, os, logging
4	4	from subprocess import Popen, PIPE
5	5	from datetime import datetime
6		import threading
7		from collections import defaultdict
8	6
9	7	iso639Codes = {"abk":"ab","aar":"aa","afr":"af","aka":"ak","sqi":"sq","amh":"am","ara":"ar","arg":"an","hye":"hy","asm":"as","ava":"av","ave":"ae","aym":"ay","aze":"az","bam":"bm","bak":"ba","eus":"eu","bel":"be","ben":"bn","bih":"bh","bis":"bi","bos":"bs","bre":"br","bul":"bg","mya":"my","cat":"ca","cha":"ch","che":"ce","nya":"ny","zho":"zh","chv":"cv","cor":"kw","cos":"co","cre":"cr","hrv":"hr","ces":"cs","dan":"da","div":"dv","nld":"nl","dzo":"dz","eng":"en","epo":"eo","est":"et","ewe":"ee","fao":"fo","fij":"fj","fin":"fi","fra":"fr","ful":"ff","glg":"gl","kat":"ka","deu":"de","ell":"el","grn":"gn","guj":"gu","hat":"ht","hau":"ha","heb":"he","her":"hz","hin":"hi","hmo":"ho","hun":"hu","ina":"ia","ind":"id","ile":"ie","gle":"ga","ibo":"ig","ipk":"ik","ido":"io","isl":"is","ita":"it","iku":"iu","jpn":"ja","jav":"jv","kal":"kl","kan":"kn","kau":"kr","kas":"ks","kaz":"kk","khm":"km","kik":"ki","kin":"rw","kir":"ky","kom":"kv","kon":"kg","kor":"ko","kur":"ku","kua":"kj","lat":"la","ltz":"lb","lug":"lg","lim":"li","lin":"ln","lao":"lo","lit":"lt","lub":"lu","lav":"lv","glv":"gv","mkd":"mk","mlg":"mg","msa":"ms","mal":"ml","mlt":"mt","mri":"mi","mar":"mr","mah":"mh","mon":"mn","nau":"na","nav":"nv","nob":"nb","nde":"nd","nep":"ne","ndo":"ng","nno":"nn","nor":"no","iii":"ii","nbl":"nr","oci":"oc","oji":"oj","chu":"cu","orm":"om","ori":"or","oss":"os","pan":"pa","pli":"pi","fas":"fa","pol":"pl","pus":"ps","por":"pt","que":"qu","roh":"rm","run":"rn","ron":"ro","rus":"ru","san":"sa","srd":"sc","snd":"sd","sme":"se","smo":"sm","sag":"sg","srp":"sr","gla":"gd","sna":"sn","sin":"si","slk":"sk","slv":"sl","som":"so","sot":"st","azb":"az","spa":"es","sun":"su","swa":"sw","ssw":"ss","swe":"sv","tam":"ta","tel":"te","tgk":"tg","tha":"th","tir":"ti","bod":"bo","tuk":"tk","tgl":"tl","tsn":"tn","ton":"to","tur":"tr","tso":"ts","tat":"tt","twi":"tw","tah":"ty","uig":"ug","ukr":"uk","urd":"ur","uzb":"uz","ven":"ve","vie":"vi","vol":"vo","wln":"wa","cym":"cy","wol":"wo","fry":"fy","xho":"xh","yid":"yi","yor":"yo","zha":"za","zul":"zu", "hbs":"sh", "arg":"an", "pes":"fa"}
10	8	'''

14	12	JSON.stringify(out);
15	13	'''
16	14
	15	# TODO: does this need a lock?
17	16	langNamesDBConn = None
18		missingFreqsDBConn = None
19	17
20	18	def toAlpha2Code(code):
21	19	if '_' in code:

68	66	output[languageResult[2]] = languageResult[3]
69	67	return output
70	68
71		def noteUnknownToken(token, pair, dbPath):
72		global missingFreqsDBConn
73		if not missingFreqsDBConn:
74		missingFreqsDBConn = sqlite3.connect(dbPath)
75		c = missingFreqsDBConn.cursor()
76
77		c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))')
78		c.execute('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + 1)', {'pair': pair, 'token': token})
79		missingFreqsDBConn.commit()
80
81
82		unknownLock = threading.RLock()
83		unknownWords = defaultdict(lambda: defaultdict(lambda: 0))
84		unknownCount = 0
85
86		def inMemoryUnknownToken(token, pair, dbPath, limit):
87		global unknownLock
88		global unknownCount
89		global unknownWords
90
91		try:
92		unknownLock.acquire()
93		unknownWords[pair][token] += 1
94		unknownCount += 1
95
96		if unknownCount > limit:
97		flushUnknownWords(dbPath)
98		unknownWords.clear()
99		unknownCount = 0
100		finally:
101		unknownLock.release()
102
103
104		def flushUnknownWords(dbPath):
105		global unknownWords
106		global missingFreqsDBConn
107
108		timeBefore = datetime.now()
109
110		if not missingFreqsDBConn:
111		missingFreqsDBConn = sqlite3.connect(dbPath)
112
113		c = missingFreqsDBConn.cursor()
114		c.execute("PRAGMA synchronous = NORMAL")
115
116		c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))')
117
118		c.executemany('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + :amount)',
119		({'pair': pair, 'token': token, 'amount' : unknownWords[pair][token]} for pair in unknownWords for token in unknownWords[pair]))
120
121		missingFreqsDBConn.commit()
122
123		ms = timedeltaToMilliseconds(datetime.now() - timeBefore)
124		logging.info("\tSaving %s unknown words to the DB (%s ms)", unknownCount, ms)
125
126		def closeDb():
127		global missingFreqsDBConn
128		if not missingFreqsDBConn:
129		logging.warning('no connection')
130		return
131		logging.warning('closing connection')
132		missingFreqsDBConn.close()
133		missingFreqsDBConn = False
134	69
135	70	def apertium(input, dir, mode, formatting=None):
136	71	p1 = Popen(['echo', input], stdout=PIPE)
	72	print(input, dir, mode, formatting)
137	73	if formatting:
138	74	p2 = Popen(['apertium', '-d . -f %s' % formatting, mode], stdin=p1.stdout, stdout=PIPE, cwd=dir)
139	75	else:
140		p2 = Popen(['apertium', '-d .', mode], stdin=p1.stdout, stdout=PIPE, cwd=dir)
	76	p2 = Popen(['apertium', '-d {}'.format(dir), mode], stdin=p1.stdout, stdout=PIPE)
141	77	p1.stdout.close()
142	78	output = p2.communicate()[0].decode('utf-8')
143	79	return output

149	85	output = p2.communicate()[0].decode('utf-8')
150	86	return output
151	87
152		def removeLast(query, analyses):
	88	def removeDotFromDeformat(query, analyses):
	89	"""When using the txt format, a dot is added at EOF (also, double line
	90	breaks) if the last part of the query isn't itself a dot"""
153	91	if not query[-1] == '.':
154	92	return analyses[:-1]
155	93	else:

169	107
170	108	def getCoverage(text, mode, modeDir, penalize=False):
171	109	analysis = apertium(text, mode, modeDir)
172		lexicalUnits = removeLast(text, re.findall(r'\^([^\$])\$([^\^])', analysis))
	110	lexicalUnits = removeDotFromDeformat(text, re.findall(r'\^([^\$])\$([^\^])', analysis))
173	111	analyzedLexicalUnits = list(filter(lambda x: not x[0].split('/')[1][0] in '*&#', lexicalUnits))
174	112	if len(lexicalUnits) and not penalize:
175	113	return len(analyzedLexicalUnits) / len(lexicalUnits)

188	126	if lang in analyzers:
189	127	modeInfo = analyzers[lang]
190	128	analysis = apertium(query, modeInfo[0], modeInfo[1])
191		morph_lexicalUnits = removeLast(query, re.findall(lexicalUnitRE, analysis))
	129	morph_lexicalUnits = removeDotFromDeformat(query, re.findall(lexicalUnitRE, analysis))
192	130	outputs['morph'] = [lexicalUnit.split('/')[1:] for lexicalUnit in morph_lexicalUnits]
193	131	outputs['morph_inputs'] = [stripTags(lexicalUnit.split('/')[0]) for lexicalUnit in morph_lexicalUnits]
194	132	else:

198	136	if lang in taggers:
199	137	modeInfo = taggers[lang]
200	138	analysis = apertium(query, modeInfo[0], modeInfo[1])
201		tagger_lexicalUnits = removeLast(query, re.findall(lexicalUnitRE, analysis))
	139	tagger_lexicalUnits = removeDotFromDeformat(query, re.findall(lexicalUnitRE, analysis))
202	140	outputs['tagger'] = [lexicalUnit.split('/')[1:] if '/' in lexicalUnit else lexicalUnit for lexicalUnit in tagger_lexicalUnits]
203	141	outputs['tagger_inputs'] = [stripTags(lexicalUnit.split('/')[0]) for lexicalUnit in tagger_lexicalUnits]
204	142	else:

231	169	return
232	170
233	171	return (outputs, tagger_lexicalUnits, morph_lexicalUnits)
234
	172
235	173	def getTimestamp():
236	174	return datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
237	175
238		def timedeltaToMilliseconds(td):
239		return td.days86400000 + td.seconds1000 + int(td.microseconds/1000)
240
241	176	def scaleMtLog(status, time, tInfo, key, length):
242		logging.getLogger('scale-mt').error("%s %s %s html %s %s %s %s %s %s",
243		getTimestamp(),
244		timedeltaToMilliseconds(time),
245		tInfo.langpair,
246		key,
247		tInfo.ip,
248		tInfo.referer,
249		status,
250		length,
251		'null'
252		)
	177	logging.getLogger('scale-mt').error("%s %s %s html %s %s %s %s %s %s",
	178	getTimestamp(),
	179	timedeltaToMilliseconds(time),
	180	tInfo.langpair,
	181	key,
	182	tInfo.ip,
	183	tInfo.referer,
	184	status,
	185	length,
	186	'null'
	187	)
253	188
254	189
255	190	class TranslationInfo: