Codebase list mediawiki2latex / 93eea55
New upstream version 7.33 Georges Khaznadar 5 years ago
38 changed file(s) with 2831 addition(s) and 2195 deletion(s). Raw diff Collapse all Expand all
+58
-41
1.hs less more
0 import Network.HTTP
1 import Network.HTTP.Conduit
2 import qualified Data.ByteString.Lazy as LB
0 import UrlAnalyse
1 import ImperativeState
2 import UrlAnalyse
33 import qualified Data.ByteString as BStr
4 import Network.URL as URL
45 import qualified Data.ByteString.UTF8 as UTF8Str
6 import Data.List.Split (splitOn)
7 import Control.Monad
8 import Data.Maybe
9 import Control.Monad.State
10 import Data.List
11 import System.FilePath
512 import Codec.Binary.UTF8.String
6 import Data.ByteString.Lazy.Internal
7 import Network.HTTP.Conduit.Browser
8 import Control.Exception
9 import Control.Monad.Trans.Resource
10 import Control.Monad.IO.Class
11 import Data.CaseInsensitive (mk)
12 geturl3 :: String -> IO String
13 geturl3 u
14 = Control.Exception.catch
15 (do req0 <- parseUrl u
16 let req
17 = (urlEncodedBody
18 (map (\ (a, b) -> (UTF8Str.fromString a, UTF8Str.fromString b))
19 [("title","Spezial:Anmelden"),("action","submitlogin"),("type","login"),("returnto","Dirk Hünniger"),("wpName","Dirk Hünniger"),("wpPassword","rotationpeliped")]))
20 req0
21 man <- newManager def
22 req2b <- parseUrl "https://de.wikibooks.org/w/index.php"
23 let cj = createCookieJar []
24 let req2
25 = (urlEncodedBody
26 (map (\ (a, b) -> (UTF8Str.fromString a, UTF8Str.fromString b))
27 [("title","Spezial:Anmelden"),("returnto","Dirk Hünniger")]))
28 req2b
13 import Control.Concurrent.MVar
14 import Parallel
15 import Data.Map (member)
16 import MediaWikiParser
17 import MediaWikiParseTree
18 import WikiHelper
19 import Verben
20 import Nomen
21 import Adjektive
22 import Adverbien
23 import Gradpartikel
24 import Hilfsverben
25 deepGet2 :: [Char] -> [Anything a] -> [Anything a]
26 deepGet2 tag ll = concat $ map go ll
27 where
28 | t == tag =
29 [Environment Tag (TagAttr tag m) l] ++ (deepGet2 tag l)
30 go (Environment _ _ l) = (deepGet2 tag l)
31 go _ = []
2932
30 res <- runResourceT $ do browse man $ do setDefaultHeader (mk (UTF8Str.fromString "User-Agent")) $ Just (UTF8Str.fromString "mozialla 1.2.3 (compatible)")
31 --setCookieJar cj
32 makeRequestLbs req2
33 xx<-makeRequestLbs req
34 yy<-makeRequestLbs req
35 ccj <-getCookieJar
36 --liftIO $ print ccj
37 return yy
38 return ((unpackChars (responseBody res))))
39 fun
40 where
41 fun :: ErrorCall -> IO String
42 fun _ = return ""
33 deepGet3 :: [Char] -> [Char] -> [Anything Char] -> [Anything Char]
34 deepGet3 tag k ll = concat ( map go ll)
35 where go (Environment Tag (TagAttr t m) l)
36 | ((t == tag) && (member k m)) =
37 [Environment Tag (TagAttr tag m) l] ++ (deepGet3 tag k l)
38 go (Environment _ _ l) = (deepGet3 tag k l)
39 go _ = []
4340
44 main = geturl3 "https://de.wikibooks.org/w/index.php" >>= putStr
41 deepGo :: [Anything Char] -> [[Char]]
42 deepGo ((Environment Tag (TagAttr t m) l):xs) = (shallowFlatten l):(deepGo xs)
43 deepGo (_:xs) = deepGo xs
44 deepGo [] = []
45
46 -- yy <- geturl theUrl3
47 -- let gg = (deepGet "li" "id" "ca-history" (parseit minparsers yy))
48 gourl url = do x<-geturl url
49 return (reverse ( (reverse ( (deepGo (deepGet2 "a" (deepGet "div" "class" "mw-category-group" (deepGet "div" "id" "mw-pages"(parseit minparsers x)))))))))
50
51 gourl2 url = do x<-geturl url
52 return (reverse ((reverse ( (deepGo (deepGet2 "a" (deepGet "div" "class" "mw-category-group" (deepGet "div" "id" "mw-pages"(parseit minparsers x)))))))))
53
54 gourls x = do y<-gourl2 ("https://de.wiktionary.org/w/index.php?title=Kategorie:Adverb_(Deutsch)&pagefrom="++(decodeString (last x))++"#mw-pages")
55 if (length y)>0 then (print (last x))>>(print (last y))>> gourls (x++y) else return (x++y)
56
57 main = do x <- gourl ("https://de.wiktionary.org/wiki/Kategorie:Adverb_%28Deutsch%29")
58 y <- gourls x
59 print y
4560
4661
4762
4863
49 --main = simpleHttp "https://de.wikibooks.org/w/index.php?title=Spezial:Anmelden&returnto=Benutzer%3ADirk+Hünniger"
5064
5165
66
67
68
0 Version 7.31
0 Version 7.33
1 * src
2 amperand and "<" and ">" now work in sourcecode received via html
3 some more toplevel function are now documented
4 server now displayes running at wmflabs
5 server now warns about time limit of one hour
6 formulas inside image caption received of html work
7 man page update
8 source code pretty printed
9 Version 7.32
110 * src
211 tables now work with formulas inside them also
312 webserver now links to installation instructions
66 .ds version MEDIAWIKI2LATEXVERSION
77 .ds year 2013
88 .ds date \*[year]-12-25
9 .ds appauthors Dirk Hünniger <hunniger@cip.physik.uni-bonn.de>
9 .ds appauthors Dirk Hünniger <dirk.hunniger@googlemail.com>
1010 .ds manauthors Pierre Neidhardt <ambrevar@gmail.com>
1111 .
1212 .\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
5252 .BR -z ", " --zip
5353 Output LaTeX Source Archive.
5454 .TP
55 .BR -b ", " --epub
56 Output epub file.
57 .TP
58 .BR -d ", " --odt
59 Output odt file. Open Document Text (for wordprocessors)
60 .TP
5561 .BR -i ", " --internal
5662 Use internal template definitions.
5763 .TP
6066 .TP
6167 .BR -m ", " --mediawiki
6268 Use mediaWiki to expand the templates.
69 .TP
70 .BR -k ", " --bookmode
71 Use book-namespace mode for expansion. That means: Follow all links but not recursively.
6372 .TP
6473 .BR -o ", " --output = \fIFILE\fR
6574 Specify the PDF output file.
8291 .BR -u ", " --url = \fIURL\fR
8392 The input URI. It should point to a MediaWiki page.
8493 .TP
85 .BR -h ", " -? ", " -v ", " --version ", " --help
94 .BR -? ", " -v ", " --version ", " --help
8695 Show help options together with version number.
8796 .
8897 .\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
00 Name: mediawiki2latex
1 Version: 7.32
1 Version: 7.33
22 License: GPL
33 License-File: LICENSE
44 Author: Dirk Hünniger <dirk.hunniger@googlemail.com>
5858 y <- gourls x
5959 print y
6060
61
62
63
64
65
66
67
68
3535 import UrlAnalyse
3636 import Network.URL
3737 import System.FilePath
38 import Control.Concurrent.MVar()
38 import Control.Concurrent.MVar ()
3939 import Data.Hashable
4040 import Hex
4141 import Data.IORef
4444 import Data.ByteString.Lazy (toStrict)
4545 import System.Exit
4646 import System.IO.Error
47
47
4848 {-DHUN| takes a filename as input parameter and returns the so called normalized extension for it. This is and extension classifying the type of the file while used in this program. Its not necessarily the real extension of the filename. For example .jpeg images will be converted to jpg and so on. DHUN-}
49
49
5050 getExtension :: String -> String
5151 getExtension s
5252 = normalizeExtension2
5353 (map toLower (reverse . (takeWhile (/= '.')) . reverse $ s))
54
54
5555 {-DHUN| returns that name of image magick convert command on the current operating system. It takes the path from which mediawiki2latex was started as input parameter. On Windows convert.exe has to reside in a path relative to it for this to work. On linux it does not matter DHUN-}
56
56
5757 getConvert :: FilePath -> String
5858 getConvert p
5959 = if os == "linux" then convert else (getPathPrefix p) ++ convert
6060 where convert
6161 = if os == "linux" then "convert " else "convert.exe "
62
62
6363 {-DHUN| generates the information for the titlepage. It takes the result of the wiki text compilation (CompileResult) as first input parameter and the FullWikiUrl to the article as second parameter. It title information is found in the CompileResult it is used, otherise it is generated from the FullWikiUrl DHUN-}
64
64
6565 makeTitle :: CompileResult -> FullWikiUrl -> [Char]
6666 makeTitle result fu = theTitle
6767 where theTitle
7474 = "\\title{" ++
7575 (concat (map (chartrans) ((removePrintVersion (lemma fu))))) ++
7676 "}\n"
77
7778 makeTitle2 :: CompileResult -> FullWikiUrl -> [Char]
7879 makeTitle2 result fu = theTitle
7980 where theTitle
8081 = if (Compiler.title result) == "" then tit else
8182 (Compiler.title result)
82 tit
83 = (concat (map (chartrans) ((removePrintVersion (lemma fu)))))
84
83 tit = (concat (map (chartrans) ((removePrintVersion (lemma fu)))))
84
8585 {-DHUN| returns the prefix of the path where addional needed software resides depending on the operation system DHUN-}
86
86
8787 getPathPrefix :: FilePath -> String
8888 getPathPrefix p = if os == "linux" then "" else (p ++ "..\\lib\\")
89
89
9090 {-DHUN| applied the necessary image processing to a file so that it can be included in a latex document and does not take too much discspace. It takes the path form which mediawiki2latex was started as first input parameter. It takes filename with its exension stiped as second input parameter. It takes the normalized exension of the file as third parameter (see also function getExtension in this module). It takes the maximum resolution that images should have as fourth parameter. It takes the imagenumbers of the images residing in image galleries in the wiki source text as fifth parameter (those images are given a lower absolute with in cm in the pdf document and thus can be dithered to a loweder absolute with in pixels). It takes the image number of the current image as sixth parameter. DHUN-}
91
91
9292 runFileMods ::
9393 FilePath ->
9494 String ->
138138 = system
139139 ((getConvert p) ++ fn ++ " -background white -flatten " ++ fn)
140140 pngfilename = filenamebase ++ "." ++ "png"
141
141
142142 dither :: String -> IO ()
143143 dither fn
144144 = do _ <- system
172172 _ -> return ()
173173 _ -> return ()
174174 _ -> return ()
175
175
176176 runDither :: String -> Integer -> Integer -> IO ()
177177 runDither fn newSize oldSize
178178 = if newSize < oldSize then
183183 else return ()
184184 textWidth = 10.5
185185 galleryImageWidth = 5.0
186
186
187187 centimetersPerInch :: Double
188188 centimetersPerInch = 2.54
189
189
190190 galleryWidth :: Integer
191191 galleryWidth
192192 = round
193193 ((fromIntegral theResolution) * galleryImageWidth /
194194 centimetersPerInch)
195
195
196196 imageWidth :: Integer
197197 imageWidth
198198 = round
199199 ((fromIntegral theResolution) * textWidth / centimetersPerInch)
200
200
201201 {-DHUN| function to write the image files into the temporary directory and modify the files for use in a LaTeX document. It takes the pathname of temporary image download directory as first input parameter. Please not the the temporary image download directory is always different from the temporary directory. It takes the pathname of the directory in which mediawiki2latex was stated as second input parameter. It takes the name of the temporary directory as third input parameter. It takes the result of the image downloading process as fourth input parameter. This structure has a Maybe monad as outer type since the image download may fail. Inside is a 3 element tuple. The first one is the filename of the image on the wiki. The second one is the number under which the image was stored in the temporary image download directory. The third one is a list of possible urls where the image may be found on the wiki. It takes maximum resolution that images should have as fifth parameter. It takes the list of images number of images residing in galleries as sixth input parameter. DHUN-}
202
202
203203 writeFiles ::
204204 FilePath ->
205205 FilePath ->
206 String ->
207 [Maybe ImageInfo] ->
208 Integer -> [Integer] -> IO ()
206 String -> [Maybe ImageInfo] -> Integer -> [Integer] -> IO ()
209207 writeFiles dir p pathname theImages theResolution gals
210208 = mapM_ go (Prelude.zip ([1 ..] :: [Integer]) theImages)
211209 where go (i, Just x)
212210 = do let filenamebase = (pathname ++ (show i))
213 let filename = filenamebase ++ "." ++ (getExtension (wikiFilename x))
214 filecontent <- Data.ByteString.readFile (dir </> (show (imageNumber x)))
211 let filename
212 = filenamebase ++ "." ++ (getExtension (wikiFilename x))
213 filecontent <- Data.ByteString.readFile
214 (dir </> (show (imageNumber x)))
215215 Data.ByteString.writeFile filename filecontent
216 runFileMods p filenamebase (getExtension (wikiFilename x)) theResolution gals i
216 runFileMods p filenamebase (getExtension (wikiFilename x))
217 theResolution
218 gals
219 i
217220 pathname
218221 go _ = return ()
219
220
221
222 writeFiles2 :: String -> String -> [(String,Int)]-> IO ()
223 writeFiles2 tmpdir pathname forms
224 = mapM_ go forms
225 where go (x,_)
226 = do filecontent <- Data.ByteString.readFile (tmpdir</>x)
227 Data.ByteString.writeFile (pathname++(Prelude.last (splitOn "/" x))) filecontent
228
229 data LatexConfig = LatexConfig{figures ::
230 [Maybe ImageInfo],
222
223 writeFiles2 :: String -> String -> [(String, Int)] -> IO ()
224 writeFiles2 tmpdir pathname forms = mapM_ go forms
225 where go (x, _)
226 = do filecontent <- Data.ByteString.readFile (tmpdir </> x)
227 Data.ByteString.writeFile
228 (pathname ++ (Prelude.last (splitOn "/" x)))
229 filecontent
230
231 data LatexConfig = LatexConfig{figures :: [Maybe ImageInfo],
231232 title :: String, fullConfig :: FullConfig, content :: String,
232233 hostname :: String, theResult :: CompileResult, onlyTables :: Bool,
233 lang :: Maybe String, theTempDir :: String, formulas::[(String,Int)], figHTML:: String}
234
234 lang :: Maybe String, theTempDir :: String,
235 formulas :: [(String, Int)], figHTML :: String}
236
235237 runLaTeX :: LatexConfig -> ImperativeMonad ByteString
236238 runLaTeX config
237239 = liftIO
238240 (withSystemTempDirectory "MediaWiki2LaTeX"
239241 (runLaTeXCallback config))
240
242
241243 runLaTeXCallback :: LatexConfig -> FilePath -> IO ByteString
242244 runLaTeXCallback config pathname
243245 = do extract pathname
248250 d{anchor = pathname </> "document/headers"}
249251 return ()
250252 _ -> return ()
251 if os=="linux" then return () else
252 do d <- readDirectoryWith Data.ByteString.readFile ((mainPath (fullConfig config))++"../fonts/main/")
253 _ <- writeDirectoryWith Data.ByteString.writeFile d{anchor = pathname ++ "/document/"}
254 return ()
255 Tools.writeFile (pathname ++ "/document/main/main.tex") (content config)
256 Tools.writeFile (pathname ++ "/document/index.html") ((html (theResult config))++"<h2>List of Figures</h2>"++(figHTML config))
257
258
253 if os == "linux" then return () else
254 do d <- readDirectoryWith Data.ByteString.readFile
255 ((mainPath (fullConfig config)) ++ "../fonts/main/")
256 _ <- writeDirectoryWith Data.ByteString.writeFile
257 d{anchor = pathname ++ "/document/"}
258 return ()
259 Tools.writeFile (pathname ++ "/document/main/main.tex")
260 (content config)
261 Tools.writeFile (pathname ++ "/document/index.html")
262 ((html (theResult config)) ++
263 "<h2>List of Figures</h2>" ++ (figHTML config))
259264 Tools.writeFile (pathname ++ "/document/headers/svg.tex")
260265 (if vector (fullConfig config) then
261266 "\\newcommand{\\SVGExtension}{pdf}" else
273278 (figures config)
274279 (resolution (fullConfig config))
275280 (galleryNumbers (theResult config))
276 All.writeFiles2 (theTempDir config) (pathname ++ "/document/formulas/") (formulas config)
281 All.writeFiles2 (theTempDir config)
282 (pathname ++ "/document/formulas/")
283 (formulas config)
277284 cwd <- getCurrentDirectory
278285 setCurrentDirectory (pathname ++ "/document/main")
279286 case (ImperativeState.copy (fullConfig config)) of
283290 return ()
284291 _ -> return ()
285292 _ <- forM
286 ((if onlyTables config then [1] else if ((outputType (fullConfig config)) /= PlainPDF) then [] else [1, 2]) :: [Integer])
293 ((if onlyTables config then [1] else
294 if ((outputType (fullConfig config)) /= PlainPDF) then [] else
295 [1, 2])
296 :: [Integer])
287297 (\ r ->
288298 do if (onlyTables config) then return () else
289299 myprint (" generating PDF file. LaTeX run " ++ (show r) ++ " of 4")
299309 "..\\miktex\\miktex\\bin\\makeindex.exe")
300310 ++ " main")
301311 >> return ()))
302 _ <- forM ((if (onlyTables config)||((outputType (fullConfig config)) /= PlainPDF) then [] else [3, 4]) :: [Integer])
312 _ <- forM
313 ((if
314 (onlyTables config) ||
315 ((outputType (fullConfig config)) /= PlainPDF)
316 then [] else [3, 4])
317 :: [Integer])
303318 (\ r ->
304319 do if (onlyTables config) then return () else
305320 myprint (" generating PDF file. LaTeX run " ++ (show r) ++ " of 4")
318333 case splitOn "\n" te of
319334 (x : _) -> return (pack (encode (strip "pt\r" x)))
320335 _ -> return (pack (encode ""))
321 else case outputType (fullConfig config) of
322 PlainPDF -> Data.ByteString.readFile "main.pdf"
323 ZipArchive -> do setCurrentDirectory pathname
324 a<-addFilesToArchive [OptRecursive] emptyArchive ["document"]
325 Data.ByteString.writeFile "main.zip" (toStrict (fromArchive a))
326 Data.ByteString.readFile "main.zip"
327 OdtFile -> do setCurrentDirectory ".."
328 mysystem "libreoffice --headless --convert-to odt index.html"
329 mysystem "libreoffice --headless --convert-to docx index.odt"
330 mysystem "libreoffice --headless --convert-to odt index.docx"
331 Data.ByteString.readFile "index.odt"
332 EPubFile -> do mysystem "ebook-convert ../index.html .epub"
333 Data.ByteString.readFile "index.epub"
334
335
336 else
337 case outputType (fullConfig config) of
338 PlainPDF -> Data.ByteString.readFile "main.pdf"
339 ZipArchive -> do setCurrentDirectory pathname
340 a <- addFilesToArchive [OptRecursive] emptyArchive
341 ["document"]
342 Data.ByteString.writeFile "main.zip"
343 (toStrict (fromArchive a))
344 Data.ByteString.readFile "main.zip"
345 OdtFile -> do setCurrentDirectory ".."
346 mysystem "libreoffice --headless --convert-to odt index.html"
347 mysystem "libreoffice --headless --convert-to docx index.odt"
348 mysystem "libreoffice --headless --convert-to odt index.docx"
349 Data.ByteString.readFile "index.odt"
350 EPubFile -> do mysystem "ebook-convert ../index.html .epub"
351 Data.ByteString.readFile "index.epub"
336352 setCurrentDirectory cwd
337353 return result
338 ex :: ProcessHandle -> Handle -> Handle -> IO ()
354
355 ex :: ProcessHandle -> Handle -> Handle -> IO ()
339356 ex h o e
340 = do b<-hReady stdin
341 if b then do terminateProcess h
342 exitFailure
343 else do return ()
344 v<-tryIOError (hWaitForInput e 0)
345 let xo=case v of
357 = do b <- hReady stdin
358 if b then
359 do terminateProcess h
360 exitFailure
361 else do return ()
362 v <- tryIOError (hWaitForInput e 0)
363 let xo
364 = case v of
346365 Right j -> j
347366 _ -> False
348 _<- if xo then hGetChar e else return ' '
349 w<-tryIOError (hWaitForInput o 0)
350 let yo=case w of
367 _ <- if xo then hGetChar e else return ' '
368 w <- tryIOError (hWaitForInput o 0)
369 let yo
370 = case w of
351371 Right j -> j
352372 _ -> False
353 _<- if yo then hGetChar o else return ' '
354 y<-getProcessExitCode h
355 case y of
356 Just _ -> do hClose o
357 hClose e
358 return ()
359 _-> ex h o e
373 _ <- if yo then hGetChar o else return ' '
374 y <- getProcessExitCode h
375 case y of
376 Just _ -> do hClose o
377 hClose e
378 return ()
379 _ -> ex h o e
360380
361381 mysystem :: String -> IO ()
362 mysystem x =if os == "linux"
363 then
364 do (_,o,e,h)<-runInteractiveCommand x
365 ex h o e
366 return ()
367 else
368 do _<-system x
369 return ()
382 mysystem x
383 = if os == "linux" then
384 do (_, o, e, h) <- runInteractiveCommand x
385 ex h o e
386 return ()
387 else
388 do _ <- system x
389 return ()
370390
371391 getLang :: URL -> IO (Maybe String)
372392 getLang u
376396 ((Environment Tag (TagAttr _ m) _) : []) -> return $
377397 Data.Map.Strict.lookup "lang" m
378398 _ -> return $ Nothing
379
399
380400 catchFun :: IOException -> IO String
381401 catchFun _ = return ""
382
402
383403 strip :: (Eq a) => [a] -> [a] -> [a]
384404 strip l = reverse . (dropWhile isBad) . reverse . dropWhile isBad
385405 where isBad x = x `elem` l
386
406
387407 latexPostamble :: String
388408 latexPostamble = "\n\\end{longtable}\n\\pagebreak"
389
390
391 runCtrb ::String -> ImperativeMonad ()
392 runCtrb dir = do t<-liftIO $ Tools.readFile (dir </> "input")
393 cr<-imgContribback ((read t)::(Maybe ImageInfo))
394 liftIO $ Tools.writeFile (dir </> "output") (show cr)
395
396
409
410 runCtrb :: String -> ImperativeMonad ()
411 runCtrb dir
412 = do t <- liftIO $ Tools.readFile (dir </> "input")
413 cr <- imgContribback ((read t) :: (Maybe ImageInfo))
414 liftIO $ Tools.writeFile (dir </> "output") (show cr)
397415
398416 imgContrib ::
399417 (Maybe ImageInfo) ->
400418 ImperativeMonad ((Maybe (String, Maybe String)))
401 imgContrib x = do systempdir <- liftIO getTemporaryDirectory
402 tempdir <- liftIO $ createTempDirectory systempdir "MediaWiki2LaTeXImgContrib"
403 liftIO $ Tools.writeFile (tempdir </> "input") (show x)
404 _ <- liftIO $ system("mediawiki2latex -x "++( Hex.hex (show (fullconfigbase{imgctrb=Just tempdir}))))
405 t <-liftIO $ Tools.readFile (tempdir </> "output")
406 return (read t)
419 imgContrib x
420 = do systempdir <- liftIO getTemporaryDirectory
421 tempdir <- liftIO $
422 createTempDirectory systempdir "MediaWiki2LaTeXImgContrib"
423 liftIO $ Tools.writeFile (tempdir </> "input") (show x)
424 _ <- liftIO $
425 system
426 ("mediawiki2latex -x " ++
427 (Hex.hex (show (fullconfigbase{imgctrb = Just tempdir}))))
428 t <- liftIO $ Tools.readFile (tempdir </> "output")
429 return (read t)
407430
408431 imgContribback ::
409 (Maybe ImageInfo) ->
410 ImperativeMonad ((Maybe (String, Maybe String)))
432 (Maybe ImageInfo) ->
433 ImperativeMonad ((Maybe (String, Maybe String)))
411434 imgContribback z
412435 = do x <- return z
413436 xx <- imgContrib2 x
414437 liftIO (go xx)
415438 where go (Just xxx) = return (Just xxx)
416439 go _ = return (Just ("", Nothing))
417
440
418441 imgContrib2 ::
419 Maybe ImageInfo ->
420 ImperativeMonad ((Maybe (String, Maybe String)))
442 Maybe ImageInfo -> ImperativeMonad ((Maybe (String, Maybe String)))
421443 imgContrib2 (Just x)
422444 = do img <- getContributors (contributorUrls x)
423445 ffi <- liftIO $ (return . fst) img
425447 ssn <- liftIO $ (return . snd) img
426448 sns <- liftIO ((return . msum) ssn)
427449 liftIO ((fun fif sns))
428 where
429 ffun :: Map String Contributor -> String
450 where ffun :: Map String Contributor -> String
430451 ffun i = intercalate ", " (keys (i))
431
452
432453 fun :: String -> Maybe String -> IO (Maybe (String, Maybe String))
433454 fun fi sn = return (Just (fi, sn))
434455 imgContrib2 _ = do liftIO (return Nothing)
435
436 makeImgList ::
437 [(Maybe ImageInfo)] ->
438 ImperativeMonad String
456
457 makeImgList :: [(Maybe ImageInfo)] -> ImperativeMonad String
439458 makeImgList imgs2
440459 = do ccontrib <- mapM (imgContrib) imgs2
441460 cccontrib <- liftIO (mapM (return . id) ccontrib)
446465 return ((toString latexPreamble) ++ z ++ (latexPostamble))
447466 where go ((i, Just (con, lic)), Just info)
448467 = "\\href{" ++
449 (replace2 (replace2 (concat (map chartransforlink (exportURL (descriptionUrl info)))) "//" "/")
468 (replace2
469 (replace2
470 (concat (map chartransforlink (exportURL (descriptionUrl info))))
471 "//"
472 "/")
450473 "https:/"
451474 "https://")
452475 ++
454477 (show i) ++
455478 "}& " ++ con ++ "&" ++ (fromMaybe "" lic) ++ "\\\\ \\hline \n"
456479 go (((i, _), _)) = (show i) ++ "&&\\\\ \\hline \n"
457
458
459 makeImgListHTML ::
460 [(Maybe ImageInfo)] ->
461 ImperativeMonad String
480
481 makeImgListHTML :: [(Maybe ImageInfo)] -> ImperativeMonad String
462482 makeImgListHTML imgs2
463483 = do ccontrib <- mapM (imgContrib) imgs2
464484 cccontrib <- liftIO (mapM (return . id) ccontrib)
466486 imgs <- liftIO (return imgs2)
467487 let z = concat
468488 (map go (zip (zip ([1 ..] :: [Integer]) contrib) imgs))
469 return ("<table rules=\"all\"><tr><td>Number</td><td>Contributors</td><td>License</td></tr>" ++ z ++ "</table>")
489 return
490 ("<table rules=\"all\"><tr><td>Number</td><td>Contributors</td><td>License</td></tr>"
491 ++ z ++ "</table>")
470492 where go ((i, Just (con, lic)), Just info)
471493 = "<tr><td><a href=\"" ++
472494 (replace2 (replace2 (((exportURL (descriptionUrl info)))) "//" "/")
475497 ++
476498 "\">" ++
477499 (show i) ++
478 "</a></td><td>" ++ con ++ "</td><td>" ++ (fromMaybe "" lic) ++ "</td></tr>"
500 "</a></td><td>" ++
501 con ++ "</td><td>" ++ (fromMaybe "" lic) ++ "</td></tr>"
479502 go (((i, _), _)) = (show i) ++ "&&\\\\ \\hline \n"
480503
481
482 makeformulas:: String->String->[Anything Char]->ImperativeMonad [(String,Int)]
504 makeformulas ::
505 String ->
506 String -> [Anything Char] -> ImperativeMonad [(String, Int)]
483507 makeformulas p tempdir ll
484508 = do x <- allinfo
485509 return $ concat x
486 where
487 allinfo :: ImperativeMonad [[(String,Int)]]
510 where allinfo :: ImperativeMonad [[(String, Int)]]
488511 allinfo = mapM processNode ll
489 processNode:: Anything Char -> ImperativeMonad [(String,Int)]
490 processNode (Environment Math _ l) = do myname <- return (( (hex( show (hash(mathTransform l))))))
491 sz<-liftIO (do Tools.writeFile (tempdir </> ( myname++".tex")) ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"++(mathTransform l)++"$\n\\end{document}")
492 Static.writeFiles tempdir headerFiles
493 mysystem ("latex2png " ++ (tempdir </> (myname++".tex")))
494 sz<-mysize (tempdir </> (myname++".png"))
495 mysystem ("latex2png -c -d 1200 " ++ (tempdir </> (((myname))++".tex")))
496 print (mathTransform l)
497 return sz)
498 return [(myname++".png",sz)]
499 processNode (Environment BigMath _ l) = do myname <- return (( (hex( show (hash(mathTransform l))))))
500 sz<-liftIO (do Tools.writeFile (tempdir </> ( myname++".tex")) ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"++(mathTransform l)++"$\n\\end{document}")
501 Static.writeFiles tempdir headerFiles
502 mysystem ("latex2png " ++ (tempdir </> (myname++".tex")))
503 sz<-mysize (tempdir </> (myname++".png"))
504 mysystem ("latex2png -c -d 1200 " ++ (tempdir </> (((myname))++".tex")))
505 print (mathTransform l)
506 return sz)
507 return [(myname++".png",sz)]
512
513 processNode :: Anything Char -> ImperativeMonad [(String, Int)]
514 processNode (Environment Math _ l)
515 = do myname <- return (((hex (show (hash (mathTransform l))))))
516 sz <- liftIO
517 (do Tools.writeFile (tempdir </> (myname ++ ".tex"))
518 ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"
519 ++ (mathTransform l) ++ "$\n\\end{document}")
520 Static.writeFiles tempdir headerFiles
521 mysystem ("latex2png " ++ (tempdir </> (myname ++ ".tex")))
522 sz <- mysize (tempdir </> (myname ++ ".png"))
523 mysystem
524 ("latex2png -c -d 1200 " ++ (tempdir </> (((myname)) ++ ".tex")))
525 print (mathTransform l)
526 return sz)
527 return [(myname ++ ".png", sz)]
528 processNode (Environment BigMath _ l)
529 = do myname <- return (((hex (show (hash (mathTransform l))))))
530 sz <- liftIO
531 (do Tools.writeFile (tempdir </> (myname ++ ".tex"))
532 ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"
533 ++ (mathTransform l) ++ "$\n\\end{document}")
534 Static.writeFiles tempdir headerFiles
535 mysystem ("latex2png " ++ (tempdir </> (myname ++ ".tex")))
536 sz <- mysize (tempdir </> (myname ++ ".png"))
537 mysystem
538 ("latex2png -c -d 1200 " ++ (tempdir </> (((myname)) ++ ".tex")))
539 print (mathTransform l)
540 return sz)
541 return [(myname ++ ".png", sz)]
508542 processNode (Environment _ _ l) = (makeformulas p tempdir l)
509 processNode _ = return []
543 processNode _ = return []
544
510545 mysize :: String -> IO Int
511546 mysize fn
512547 = do _ <- system
525560 (x : _) -> case splitOn " " x of
526561 (_ : (_ : (y : _))) -> case splitOn "x" y of
527562 (z : _) -> case reads z of
528 [(ii,
529 _)] -> do return ii
563 [(ii, _)] -> do return
564 ii
530565 _ -> return 0
531566 _ -> return 0
532567 _ -> return 0
533568 _ -> return 0
534
535
536
537
569
538570 jjoin :: String -> String -> String
539571 jjoin theBody listOfFiguers
540572 = ((toString (latexHeader)) ++
541573 theBody ++ listOfFiguers ++ (toString latexFooter))
542
574
543575 all :: FullConfig -> ImperativeMonad ()
544576 all cfg
545577 = do liftIO $ myprint " processing started"
565597 liftIO $ myprint " downloading article and contributor information"
566598 text <- load (runMode cfg)
567599 liftIO $ myprint " parsing article text"
568
569 theFormulas <- if (outputType cfg) `elem` [EPubFile, OdtFile] then makeformulas (mainPath cfg) tempdir (printPrepareTree (parseit (if (runMode cfg)==HTML then minparsers else parsers) text)) else return []
600 theFormulas <- if (outputType cfg) `elem` [EPubFile, OdtFile] then
601 makeformulas (mainPath cfg) tempdir
602 (printPrepareTree
603 (parseit (if (runMode cfg) == HTML then minparsers else parsers)
604 text))
605 else return []
570606 liftIO
571607 (myprint
572608 (" number of bytes to be parsed: " ++
573609 (show (Data.List.length text))))
574 result <- Compiler.compile (runMode cfg) text templates [] "" Nothing (Map.fromList theFormulas) ((outputType cfg) `elem` [EPubFile, OdtFile])
610 result <- Compiler.compile (runMode cfg) text templates [] ""
611 Nothing
612 (Map.fromList theFormulas)
613 ((outputType cfg) `elem` [EPubFile, OdtFile])
575614 liftIO $
576615 myprint
577616 " forking threads to download of images and contributor information on them"
580619 (" number of images going to be downloaded: " ++
581620 (show (Data.List.length (images result)))))
582621 theImages <- getImages tempdir (images result) (wikiUrl purl)
583 --let theImages=[]
584622 let joined = jjoin (body result) ""
585623 let theConfig
586624 = LatexConfig{content = joined, figures = [],
587625 All.title = (makeTitle result purl), fullConfig = cfg,
588626 All.hostname = (UrlAnalyse.hostname purl), theResult = result,
589 onlyTables = True, lang = language, theTempDir = tempdir, formulas=theFormulas, figHTML=""}
627 onlyTables = True, lang = language, theTempDir = tempdir,
628 formulas = theFormulas, figHTML = ""}
590629 liftIO $ myprint " precompiling table columns"
591630 let cols = (sum (map Data.List.length (tablelist result)))
592631 ior <- liftIO (newIORef (0 :: Integer))
610649 (myprint
611650 (" number of bytes to be parsed: " ++
612651 (show (Data.List.length text))))
613 newResult <- Compiler.compile (runMode cfg) text templates tabs (makeTitle2 result purl) language (Map.fromList theFormulas) ((outputType cfg) `elem` [EPubFile, OdtFile])
614
652 newResult <- Compiler.compile (runMode cfg) text templates tabs
653 (makeTitle2 result purl)
654 language
655 (Map.fromList theFormulas)
656 ((outputType cfg) `elem` [EPubFile, OdtFile])
615657 liftIO $
616658 myprint
617659 " joining threads to download the images and contributor information on them"
621663 (show (Data.List.length (images result)))))
622664 pp <- makeImgList theImages
623665 pphtml <- makeImgListHTML theImages
624 (contrib,contribHTML) <- makeContributors (Just (UrlAnalyse.url purl))
666 (contrib, contribHTML) <- makeContributors
667 (Just (UrlAnalyse.url purl))
625668 let newContent = jjoin (body newResult) (contrib ++ pp)
626669 thetheImages <- liftIO $
627670 do ii <- return theImages
629672 liftIO $ myprint " preparing for PDF generation"
630673 pdf <- runLaTeX
631674 theConfig{onlyTables = False, theResult = newResult,
632 content = newContent, figures = thetheImages, figHTML=pphtml++contribHTML++"</body></html>"}
675 content = newContent, figures = thetheImages,
676 figHTML = pphtml ++ contribHTML ++ "</body></html>"}
633677 liftIO (Data.ByteString.writeFile (outputFilename cfg) pdf)
634678 liftIO $ removeDirectoryRecursive tempdir
635679 liftIO $ myprint " finished"
0
01 module Babel where
12 import Static
23 import Data.List.Split (splitOn)
56 import Codec.Binary.UTF8.String
67 import Data.ByteString
78 hiding (take, reverse, dropWhile, takeWhile, drop, map, concat,
8 elem, zip, intercalate, init, tails, isPrefixOf, any, length, null, hPutStr)
9
9 elem, zip, intercalate, init, tails, isPrefixOf, any, length, null,
10 hPutStr)
1011
1112 makeBabel :: Maybe String -> [Char] -> String
1213 makeBabel b x
2021 = case splitOn "." x of
2122 (z : _) -> z
2223 _ -> "en"
23
24
25
22 import Data.Char
33 import Data.Array
44 import Data.Tuple
5
5
66 {-DHUN| Basic Fontstyle, may be either normal of monospaced or small caps DHUN-}
7
7
88 data FontStyleBase = Normal
99 | Mono
1010 | Smallcaps
1111 deriving (Eq, Ord, Show)
12
12
1313 {-DHUN| Full discription of style of a font. Consists of Basic Fontstyle plus boolean for bold and/or italic DHUN-}
14
14
1515 data FontStyle = FontStyle{stylebase :: FontStyleBase,
1616 bold :: Bool, italic :: Bool}
1717 deriving (Eq, Ord, Show)
18
18
1919 {-DHUN| Font, a list of ttf font file currently used by mediawiki2latex DHUN-}
20
20
2121 data Font = GnuUnifont
2222 | WenQuanYiZenHei
2323 | FreeMono
3737 | ComputerModernRomanItalic
3838 | ComputerModernRomanBoldItalic
3939 deriving (Eq, Ord, Show, Ix)
40
40
4141 {-DHUN| list of Fonts. The first element in the list is the fonts that is preferred to be used. If it can not be used for some reason the next font in the list is used. Note that the list is denoted in reverse order and put into the right order by the reverse function in the way it is just written at this point of the source file DHUN-}
42
42
4343 fonts :: [Font]
4444 fonts
4545 = reverse
5050 ComputerModernTypeWriterBoldItalic, ComputerModernRoman,
5151 ComputerModernRomanBold, ComputerModernRomanItalic,
5252 ComputerModernRomanBoldItalic]
53
53
5454 {-DHUN| enumation of Fonts. To store the Fonts database efficiently on disc and to read it to memory quickly interger indices are used. The Integers are low in magnitude and thus stored as chars on disc. DHUN-}
55
55
5656 fontList :: [(Font, Int)]
5757 fontList = zip fonts [(ord 'A') ..]
58
58
5959 {-DHUN| converts font to char. See also fontList in this source file DHUN-}
60
60
6161 fromFontToChar :: Font -> Char
6262 fromFontToChar f
6363 = chr
6464 ((array (GnuUnifont, ComputerModernRomanBoldItalic) fontList) ! f)
65
65
6666 {-DHUN| converts from char to font. See also fontList in this source file DHUN-}
67
67
6868 fromCharToFont :: Char -> Font
6969 fromCharToFont c
7070 = (array (ord ('A'), ord ('A') + (length fontList) - 1)
7171 (map swap fontList))
7272 ! (ord c)
73
73
7474 {-DHUN| converts from Fonts to path of ttf file on disc. DHUN-}
75
75
7676 getttf :: Font -> [Char]
7777 getttf ComputerModernTypeWriter
7878 = "/usr/share/fonts/truetype/cmu/cmuntt.ttf"
108108 getttf GnuUnifont = "/usr/share/fonts/truetype/unifont/unifont.ttf"
109109 getttf WenQuanYiZenHei
110110 = "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc"
111
111
112112 {-DHUN| defines the FontStyle for each ttf font file. See also FontStyle in this source file. DHUN-}
113
113
114114 getstyle :: Font -> FontStyle
115115 getstyle GnuUnifont
116116 = FontStyle{stylebase = Normal, bold = False, italic = False}
1515 import Data.ByteString
1616 hiding (take, reverse, dropWhile, takeWhile, drop, map, concat,
1717 elem, length, zip, head, filter, minimum, isInfixOf)
18
1918
20
21 runCompile ::String -> ImperativeMonad ()
22 runCompile dir = do t<-liftIO $ Tools.readFile (dir </> "input")
23 cr<-return (printPrepareTree (parseit minparsers t))
24 liftIO $ Tools.writeFile (dir </> "output") (show cr)
25
26
19 runCompile :: String -> ImperativeMonad ()
20 runCompile dir
21 = do t <- liftIO $ Tools.readFile (dir </> "input")
22 cr <- return (printPrepareTree (parseit minparsers t))
23 liftIO $ Tools.writeFile (dir </> "output") (show cr)
2724
2825 {-DHUN| main function to compile mediawiki pages |DHUN-}
29
26
3027 compile ::
3128 RunMode ->
32 String -> String -> [[ByteString]] -> String -> Maybe String -> Map.Map String Int -> Bool ->ImperativeMonad CompileResult
33 compile theRunMode text templates tabs mytitle mylanguage formulas b
29 String ->
30 String ->
31 [[ByteString]] ->
32 String ->
33 Maybe String ->
34 Map.Map String Int -> Bool -> ImperativeMonad CompileResult
35 compile theRunMode text templates tabs mytitle mylanguage formulas
36 b
3437 = do st <- get
3538 case theRunMode of
3639 StandardTemplates -> return
37 (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates
38 tabs formulas)
40 (run b mylanguage mytitle (parseit parsers text)
41 (parseit parsers text)
42 (hostname . fullUrl $ st)
43 templates
44 tabs
45 formulas)
3946 UserTemplateFile _ -> return
40 (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates
41 tabs formulas)
47 (run b mylanguage mytitle (parseit parsers text)
48 (parseit parsers text)
49 (hostname . fullUrl $ st)
50 templates
51 tabs
52 formulas)
4253 HTML -> return
43 (run b mylanguage mytitle (printPrepareTree (parseit minparsers text)) (printPrepareTree (parseit minparsers text))
54 (run b mylanguage mytitle
55 (printPrepareTree (parseit minparsers text))
56 (printPrepareTree (parseit minparsers text))
4457 (hostname . fullUrl $ st)
4558 templates
46 tabs formulas)
47 Book -> do
48 return
49 (run b mylanguage mytitle (loadacu st) (loadacu st)
50 (hostname . fullUrl $ st)
51 templates
52 tabs formulas)
53 ExpandedTemplates -> do return
54 (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates
55 tabs formulas)
56
59 tabs
60 formulas)
61 Book -> do return
62 (run b mylanguage mytitle (loadacu st) (loadacu st)
63 (hostname . fullUrl $ st)
64 templates
65 tabs
66 formulas)
67 ExpandedTemplates -> do return
68 (run b mylanguage mytitle (parseit parsers text)
69 (parseit parsers text)
70 (hostname . fullUrl $ st)
71 templates
72 tabs
73 formulas)
74
5775 {-DHUN| pathname of the temporary directory of the compiler |DHUN-}
58
76
5977 dirpref :: [Char]
6078 dirpref = "../tmp/compiler/"
61
79
6280 {-DHUN| converts a wiki source document received from mediawiki when requesting it for Special:Export to a parse tree to be converted to LaTeX be treeToLaTeX3. It also signals to compiler.py that the source code was read using the temporary compiler directory |DHUN-}
63
81
6482 shortparse :: String -> IO [Anything Char]
6583 shortparse x
6684 = do Tools.writeFile (dirpref ++ "done") ""
6785 return (parseit parsers x)
68
69
70
71
86
7287 {-DHUN| return a parse tree of a source file. The first argument is the source file. The second argument is a list of command line parameter. If it contains the keyword print. The source file is understood to be the HTML code returned by mediawiki when being requested for the print version of a wiki page, otherwise it is understood to be the wiki source code of a wiki page, that is what you get when issuing a Special:Export request to mediawiki. This function return a parse tree ready to be turned into a latex document by treeToLaTeX3 |DHUN-}
73
88
7489 getparse :: String -> [String] -> IO [Anything Char]
7590 getparse x args
7691 = if ("print" `elem` args) then printparse x else shortparse x
77
92
7893 {-DHUN| prepares a HTML document received from mediawiki when requesting it for the print version of a wiki page to a parse tree to be converted to LaTeX be treeToLaTeX3. It also signals to compiler.py that the source code was read using the temporary compiler directory |DHUN-}
79
94
8095 printparse :: String -> IO [Anything Char]
8196 printparse x
8297 = do Tools.writeFile (dirpref ++ "done") ""
8398 return (printPrepareTree (parseit minparsers x))
84
85
86
99
87100 {-DHUN| the pathname of the temporary directory |DHUN-}
88
101
89102 tmppath :: [Char]
90103 tmppath = "../tmp/"
91
104
92105 {-DHUN| the function takes a list of the following format as first input [([table1,column1],width_x),([table1,column2],width_y),...,([table2,column1],width_z),...] parameter. Thats is a list containing the maximum width for each column of each table of the document. The maximum width of the column is the width that the column could have if it was printed on paper of infinite size. So the size without line breaks. The second parameter is the accumulator which should be the empty map when calling this function from outside. The function returns a map mapping tablenumber to a map mapping columnnumbers to maximum columns width. This data structure contains all information needed to make the decisions on the final width of the columns in the document |DHUN-}
93
106
94107 maketabmap ::
95108 [([Int], Double)] ->
96109 Map.Map Int (Map.Map Int Double) ->
104117 Nothing -> Just (Map.singleton s1 b1)
105118 Just m1 -> Just (Map.insert s1 b1 m1)
106119 maketabmap _ m = m
107
120
108121 {-DHUN| prepare the result of maketabmap for further procession. Some indices are offset corrected. Space for the rules of the table is added to the width of the columns DHUN-}
109
122
110123 postproctabmap ::
111124 (Fractional a, Num k1, Ord k1, Ord a) =>
112125 Map.Map k (Map.Map k1 a) -> Map.Map k (Map.Map k1 a)
115128 = Map.delete 0
116129 (Map.mapKeys (\ k -> k - 1)
117130 (Map.map (\ x -> (x + 12.333748 - (minimum (Map.elems m1)))) m1))
118
131
119132 {-DHUN| datatype for the results of a compulation process. The field images contains the strings enlclose in double square brackes in the wiki used for image inclusion. The field body contains the body of the latex document compiled form the wiki documents. The field tablelist contains a list of lists of bodies of latex document containing the latex source for a single column. Those can be compiled with the propper headers and footers on arbitrary huge paper to determine the maximum reasonable width for a each column in each table of the document which is need for automatic calculation of column widths for the document. The field gallery numbers contain the image numbers of images include in the wiki source inside of galleries. These got a smaller dimension in cm in the final pdf document and can thus be dithers to a small width in pixels. The field title contains the title of the document if a template defining the title was part of the parse wiki source |DHUN-}
120
133
121134 data CompileResult = CompileResult{images :: [String],
122135 body :: String, tablelist :: [[String]],
123 galleryNumbers :: [Integer], title :: String, html::String}
124 deriving Show
136 galleryNumbers :: [Integer], title :: String, html :: String}
137 deriving Show
138
125139 {-DHUN| the first parameter is the parse tree created by get parse of the document currently being processed. the second parameter is the URL under which the document was downloaded. the third parameter is the netloc describing the wiki this page belongs to. The fourth parameter is a mapping file defined by the user for the mapping of mediawiki templates to latex commands. the fifth parameter is a possible parse tree created by precious run the the should be added before the begging of the newly created parse tree. This function writes out all results to temporary files that will be further processed by compiler.py DHUN-}
126140
127141 run ::
128 Bool -> Maybe String -> String -> [Anything Char] -> [Anything Char] ->
129 String -> String -> [[ByteString]] -> Map.Map String Int-> CompileResult
130 run bb mylanguage mytitle parsetree parsetree2 netloc tmpl someTables formulas
142 Bool ->
143 Maybe String ->
144 String ->
145 [Anything Char] ->
146 [Anything Char] ->
147 String ->
148 String -> [[ByteString]] -> Map.Map String Int -> CompileResult
149 run bb mylanguage mytitle parsetree parsetree2 netloc tmpl
150 someTables formulas
131151 = CompileResult{images = img, body = bdy, tablelist = theTables,
132 galleryNumbers = gals, title = tit, html=if bb then trda3 else []}
152 galleryNumbers = gals, title = tit,
153 html = if bb then trda3 else []}
133154 where alldata2 g u
134155 = (treeToLaTeX3 ((snd . newtree $ g))
135156 initialState{urld = analyseNetloc netloc}{tabmap = u,
161182 bdy = doUnicode trda
162183 gals = getGalleryNumbers trst
163184 tit = getTitle trst
164
185
165186 fun :: ByteString -> Double
166187 fun x
167188 = case reads (toString x) of
66 import Data.Map.Strict hiding ((!))
77 import Data.Maybe
88 import System.Info
9
9
1010 {-DHUN| a map taking a FontStyle as key. (see FontStyle in the BaseFont module). The Values of a maps is and array. The array has got an integer index covering 16 bit. The elements of the arrows are chars. These chars can be converted to the paths of ttf files on disc. (see functions fromCharToFont and getttf in the module BaseFont on how to converts the char to a path to a ttf file on disc). The idea is that you got a 16 Bit unicode charter with certain font style properties. It put in this information in the this map and the array you get from it you get the idea ttf file to print that character in that fontstyle in a latex document DHUN-}
11
11
1212 megafont2 :: Map FontStyle (Array Int Char)
1313 megafont2
1414 = Data.Map.Strict.fromList
1616 array ((0, (2 :: Int) ^ (16 :: Int) - 1) :: (Int, Int))
1717 (zip ([0 .. (2 :: Int) ^ (16 :: Int) - 1] :: [Int]) f))
1818 | (s, f) <- megafont]
19
19
2020 {-DHUN| takes a fontstyle and a 16 bis unicode charater and give you the ideal font to print this character in a LaTeX docuemnt. See also megafont2 in this module DHUN-}
21
21
2222 getFont :: FontStyle -> Char -> Font
2323 getFont fontStyle c
2424 = fromMaybe GnuUnifont
2525 ((Data.Map.Strict.lookup fontStyle megafont2) >>=
26 return . fromCharToFont . (! (if (ord c) >65535 then ord (' ') else (ord c))))
27
26 return .
27 fromCharToFont .
28 (! (if (ord c) > 65535 then ord (' ') else (ord c))))
29
2830 {-DHUN| Takes a font and returns the LaTeX Command to switch to this particular font in xelatex DHUN-}
29
31
3032 fontsetter :: Font -> [Char]
3133 fontsetter f
32 = "\\setmainfont" ++ inner ++ "\\setmonofont" ++ innermono
34 = "\\setmainfont" ++ inner ++ "\\setmonofont" ++ innermono
3335 where filename
3436 = reverse ((takeWhile (/= '/')) (reverse (getttf f)))
3537 pathname = reverse ((dropWhile (/= '/')) (reverse (getttf f)))
36 inner = "{" ++ filename ++ "}"++"[" ++ (if os=="linux" then "Path=" ++ pathname else "") ++ (mid f) ++ "]"
38 inner
39 = "{" ++
40 filename ++
41 "}" ++
42 "[" ++
43 (if os == "linux" then "Path=" ++ pathname else "") ++
44 (mid f) ++ "]"
3745 innermono
38 = "{" ++ filename ++ "}"++"[" ++(if os=="linux" then "Path=" ++ pathname else "")++ (midmono f) ++ "]"
46 = "{" ++
47 filename ++
48 "}" ++
49 "[" ++
50 (if os == "linux" then "Path=" ++ pathname else "") ++
51 (midmono f) ++ "]"
3952 mid i
4053 | i `elem`
4154 [ComputerModernRoman, ComputerModernRomanBold,
7083 ",UprightFont=FreeMono,BoldFont=FreeMonoBold," ++
7184 "ItalicFont=FreeMonoOblique,BoldItalicFont=FreeMonoBoldOblique"
7285 midmono _ = ""
73
86
7487 {-DHUN| Takes a FontStyle and returns the LaTeX Command to switch to that font. DHUN-}
75
88
7689 fontstyler :: FontStyle -> [Char]
7790 fontstyler s
7891 = (if (stylebase s) == Mono then "\\ttfamily " else "") ++
1111 import Data.List
1212 import Tools
1313 import System.FilePath
14
14
1515 modpath2 :: String -> URL -> URL
1616 modpath2 s u
1717 = u{url_path =
2020 p = case reverse pp of
2121 ('/' : xs) -> (reverse xs)
2222 xs -> (reverse xs)
23
23
2424 conv :: URL -> String -> String
2525 conv u s
2626 = if take 5 s == "http:" then s else
3434 _ -> s})
3535 "%25"
3636 "%"
37
37
3838 getImageUrl2 :: (String, URL) -> Maybe String
3939 getImageUrl2 (s, u)
4040 = (getImageUrl "fullImageLink" u s) `mplus`
4141 (getImageUrl "fullMedia" u s)
42
42
4343 getImageUrl3 :: String -> Maybe String
4444 getImageUrl3 s = return s
45
45
4646 getImageUrl :: String -> URL -> String -> Maybe String
4747 getImageUrl fi u ss
4848 = if isInfixOf fil s then
6161 theHref = BStr.unpack (UTF8Str.fromString "href=\"")
6262 q = BStr.unpack (UTF8Str.fromString "\"")
6363
64 {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes the WikiUrl of the wiki website currently being processed as second parameter. It takes a tuple as third input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It returns a tuple. The first element of the tuple is a list of urls under which the image may be found on the wiki. The second element of the tuple is the image number as described above. The third element of the tuple is the Url where the description page of the image on the wiki is located DHUN-}
6465
65 {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes the WikiUrl of the wiki websitze currently being processed as second parameter. It takes a tuple as third input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It returns a tuple. The first element of the tuple is a list of urls under which the image may be found on the wiki. The second element of the tuple is the image number as described above. The third element of the tuple is the Url where the description page of the image on the wiki is located DHUN-}
66
6766 getImagePage ::
6867 String ->
6968 WikiUrl -> (Integer, String) -> IO (Maybe ([String], Integer, URL))
7978 Just (du, x) -> do img <- (geturl2 x) :: (IO BStr.ByteString)
8079 BStr.writeFile (dir </> (show i)) img
8180 return
82 (Just (map (unify . exportURL . (modpath2 ss)) (parses u), i, modpath2 ss du))
81 (Just
82 (map (unify . exportURL . (modpath2 ss)) (parses u), i,
83 modpath2 ss du))
8384 _ -> return Nothing
84 where
85 go :: (URL, Maybe String) -> [(URL, String)]
85 where go :: (URL, Maybe String) -> [(URL, String)]
8686 go (uu, Just x) = [(uu, x)]
8787 go _ = []
88
8988
9089 {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes a tuple as second input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It takes the WikiUrl of the wiki websitze currently being processed as thrird parameter. See function getImages in this module for documentation on the returned data type DHUN-}
9190
9291 doImage ::
93 String ->
94 WikiUrl ->
95 (Integer, String) -> IO (Maybe ImageInfo)
92 String -> WikiUrl -> (Integer, String) -> IO (Maybe ImageInfo)
9693 doImage dir theWikiUrl img
9794 = do myprint (show img)
9895 p <- getImagePage dir theWikiUrl (fst img, theName)
9996 case p of
100 Just (u, pp, du) -> return (Just ImageInfo{wikiFilename=theName,imageNumber=pp,contributorUrls=u,descriptionUrl=du})
97 Just (u, pp, du) -> return
98 (Just
99 ImageInfo{wikiFilename = theName, imageNumber = pp,
100 contributorUrls = u, descriptionUrl = du})
101101 _ -> return Nothing
102102 where theName
103103 = case dropWhile (/= ':') (takeWhile (/= '|') (snd img)) of
104104 (_ : xs) -> replace2 xs "%" "%25"
105105 _ -> []
106
106
107107 {-DHUN| main function to download images. It takes the temporary image download directory as first parameter. It takes image include strings of the images from the wiki source, that is the text in between the square brackets as second input parameter. It takes the WikiUrl of the wiki websitze currently being processed as thrird parameter. This function runs as a background process. So it returns a list of empty MVars immediately when being called. They are later on filled with information on the downloaded images including their location in the temporary image download directory. The returend MVars contain ImageInfo values. See description in the module ImperativeState for a detailed description. DHUN-}
108
108
109109 getImages ::
110 String ->
111 [String] ->
112 WikiUrl ->
113 ImperativeMonad [Maybe ImageInfo]
110 String -> [String] -> WikiUrl -> ImperativeMonad [Maybe ImageInfo]
114111 getImages dir images theWikiUrl
115112 = do liftIO $
116113 do let ddir = dir
0
01 module Grammatik where
1 data Plingular = Singular | Plural
2 deriving Show
3 data Casus = Nominativ | Dativ | Genitiv | Akkusativ
4 deriving Show
5 data Person = Ich | Du | ErSieEs
6 deriving Show
7 data Verbform = VerbformPerson Person | Paeteritum | Partizip2 | Lonjunktiv | Imperativ Plingular | Grundform
8 deriving Show
9 data Adjektivfrom = Positiv | Koperativ | Superlativ
10 deriving Show
11 data Genus = Maskulin | Feminin | Neutrum
12 deriving Show
13 data GenusArticelis = MaskulinA | FemininA | NeutrumA | PluralA
14 deriving Show
15 data Bestimmung = Demonstativ | Definit | Unbestimmt
16 deriving Show
17 data Wort = Nomen String Genus Casus Plingular | Verb String Verbform | Adjektiv Adjektivfrom | Artikel Bestimmung GenusArticelis Casus
18 deriving Show
192
20 data SatzTeil = Unbekannt [Wort] | Subjekt Wort | VerbS Wort | Objekt Wort
21 deriving Show
3 data Plingular = Singular
4 | Plural
5 deriving Show
6
7 data Casus = Nominativ
8 | Dativ
9 | Genitiv
10 | Akkusativ
11 deriving Show
12
13 data Person = Ich
14 | Du
15 | ErSieEs
16 deriving Show
17
18 data Verbform = VerbformPerson Person
19 | Paeteritum
20 | Partizip2
21 | Lonjunktiv
22 | Imperativ Plingular
23 | Grundform
24 deriving Show
25
26 data Adjektivfrom = Positiv
27 | Koperativ
28 | Superlativ
29 deriving Show
30
31 data Genus = Maskulin
32 | Feminin
33 | Neutrum
34 deriving Show
35
36 data GenusArticelis = MaskulinA
37 | FemininA
38 | NeutrumA
39 | PluralA
40 deriving Show
41
42 data Bestimmung = Demonstativ
43 | Definit
44 | Unbestimmt
45 deriving Show
46
47 data Wort = Nomen String Genus Casus Plingular
48 | Verb String Verbform
49 | Adjektiv Adjektivfrom
50 | Artikel Bestimmung GenusArticelis Casus
51 deriving Show
52
53 data SatzTeil = Unbekannt [Wort]
54 | Subjekt Wort
55 | VerbS Wort
56 | Objekt Wort
57 deriving Show
22 import Data.Char
33 import Data.Map.Strict hiding (map)
44 import Data.Maybe
5
5
66 {-DHUN| list of integer in the range from 0 to 15. So one hex digit DHUN-}
7
7
88 nums :: [Int]
99 nums = [0 .. 15]
10
10
1111 {-DHUN| list of single digit hex numbers in ascending order DHUN-}
12
12
1313 chars :: [Char]
1414 chars = (['0' .. '9'] ++ ['A' .. 'F'])
15
15
1616 {-DHUN| map from integer to hex digit DHUN-}
17
17
1818 fromm :: Map Int Char
1919 fromm = fromList $ zip nums chars
20
20
2121 {-DHUN| map from hex digit to integer DHUN-}
22
22
2323 tom :: Map Char Int
2424 tom = fromList $ zip chars nums
25
25
2626 {-DHUN| function to convert a single unicode character (Char) to a hex encodes string DHUN-}
27
27
2828 hexChar :: Char -> String
2929 hexChar c
3030 = concat
3737 fromm)
3838 >>= (\ x -> return [x])))
3939 (reverse [0 .. 7] :: [Int]))
40
40
4141 {-DHUN| function to convert a string of unicode characters to a hex encoded version of it DHUN-}
42
42
4343 hex :: String -> String
4444 hex s = concat (map hexChar s)
45
45
4646 {-DHUN| function to decode a hex encoded unicode string DHUN-}
47
47
4848 unhex :: String -> String
4949 unhex (a : (b : (c : (d : (e : (f : (g : (h : xs))))))))
5050 = (chr
5151 (sum
5252 (map
53 (\ (i, cc) -> (16 ^ i) * (fromMaybe 0 (Data.Map.Strict.lookup cc tom)))
53 (\ (i, cc) ->
54 (16 ^ i) * (fromMaybe 0 (Data.Map.Strict.lookup cc tom)))
5455 (zip (reverse [0 .. 7] :: [Int]) [a, b, c, d, e, f, g, h]))))
5556 : (unhex xs)
5657 unhex _ = []
0
01 module HtmlRenderer where
12 import MediaWikiParseTree
23 import MyState
34 import qualified Data.Map.Strict as Map
45 import Data.Map.Strict (Map)
5 import Control.Monad.Trans.State
6 (State, state, runState, put, get)
6 import Control.Monad.Trans.State (State, state, runState, put, get)
77 import LatexRenderer
88 import WikiHelper
99 import Tools
1818 import Data.Tuple
1919 import Data.Hashable
2020 import Hex
21
2122 type HtmlRenderer = State MyState
2223
2324 templateToHtml :: [Anything Char] -> String -> Renderer String
2627 \ st -> swap $ templateHtmlProcessor st (prepateTemplate l s)
2728
2829 templateHtmlProcessor ::
29 MyState ->
30 (String, Map String [Anything Char]) -> (MyState, String)
31 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Warnung", ll)
30 MyState ->
31 (String, Map String [Anything Char]) -> (MyState, String)
32 templateHtmlProcessor st
33 ("Mathe f\252r Nicht-Freaks: Vorlage:Warnung", ll)
3234 = (st,
3335 "<b>Warnung</b><br/>" ++
34 (treeToHtml (Map.findWithDefault [] "1" ll) st)++"")
35 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Hinweis", ll)
36 (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "")
37 templateHtmlProcessor st
38 ("Mathe f\252r Nicht-Freaks: Vorlage:Hinweis", ll)
3639 = (st,
3740 "<b>Hinweis</b><br/>" ++
38 (treeToHtml (Map.findWithDefault [] "1" ll) st)++"")
39
40 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Beispiel", ll)
41 (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "")
42 templateHtmlProcessor st
43 ("Mathe f\252r Nicht-Freaks: Vorlage:Beispiel", ll)
4144 = (st,
4245 "<b>Beispiel</b><br/>" ++
43 (treeToHtml (Map.findWithDefault [] "beispiel" ll) st)++"")
44
45 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Satz", ll)
46 (treeToHtml (Map.findWithDefault [] "beispiel" ll) st) ++ "")
47 templateHtmlProcessor st
48 ("Mathe f\252r Nicht-Freaks: Vorlage:Satz", ll)
4649 = (st,
4750 "<b>Satz</b><br/>" ++
48 (treeToHtml (Map.findWithDefault [] "satz" ll) st)++"")
49
50 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Lösungsweg", ll)
51 (treeToHtml (Map.findWithDefault [] "satz" ll) st) ++ "")
52 templateHtmlProcessor st
53 ("Mathe f\252r Nicht-Freaks: Vorlage:L\246sungsweg", ll)
5154 = (st,
5255 "<b>Wie kommt man auf den Beweis?</b>" ++
53 (treeToHtml (Map.findWithDefault [] "lösungsweg" ll) st)++"")
54
55 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Beweis", ll)
56 (treeToHtml (Map.findWithDefault [] "l\246sungsweg" ll) st) ++ "")
57 templateHtmlProcessor st
58 ("Mathe f\252r Nicht-Freaks: Vorlage:Beweis", ll)
5659 = (st,
5760 "<b>Beweis: </b><br/>" ++
58 (treeToHtml (Map.findWithDefault [] "beweis" ll) st)++"")
59
60 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Definition", ll)
61 (treeToHtml (Map.findWithDefault [] "beweis" ll) st) ++ "")
62 templateHtmlProcessor st
63 ("Mathe f\252r Nicht-Freaks: Vorlage:Definition", ll)
6164 = (st,
6265 "<b>Definition: </b><i>(" ++
63 (treeToHtml (Map.findWithDefault [] "titel" ll) st)++")</i><br/>"
64 ++ (treeToHtml (Map.findWithDefault [] "definition" ll) st))
65 templateHtmlProcessor st ("mathe für Nicht-Freaks: Vorlage:Definition", ll)
66 (treeToHtml (Map.findWithDefault [] "titel" ll) st) ++
67 ")</i><br/>" ++
68 (treeToHtml (Map.findWithDefault [] "definition" ll) st))
69 templateHtmlProcessor st
70 ("mathe f\252r Nicht-Freaks: Vorlage:Definition", ll)
6671 = (st,
6772 "<b>Definition: </b><i>(" ++
68 (treeToHtml (Map.findWithDefault [] "titel" ll) st)++")</i><br/>"
69 ++ (treeToHtml (Map.findWithDefault [] "definition" ll) st))
70
71
72
73 (treeToHtml (Map.findWithDefault [] "titel" ll) st) ++
74 ")</i><br/>" ++
75 (treeToHtml (Map.findWithDefault [] "definition" ll) st))
7376 templateHtmlProcessor st ("-", ll)
7477 = (tempProcAdapter $ mnfindent ll) st
75
76 templateHtmlProcessor st ("Mathe f\252r Nicht-Freaks: Vorlage:Klapptext", ll)
78 templateHtmlProcessor st
79 ("Mathe f\252r Nicht-Freaks: Vorlage:Klapptext", ll)
7780 = (tempProcAdapter $ mnfklapptext ll) st
78 templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Klapptext", ll)
81 templateHtmlProcessor st
82 ("Aufgabensammlung: Vorlage:Klapptext", ll)
7983 = (tempProcAdapter $ mnfklapptext ll) st
80 templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Vollst\228ndige Induktion", ll)
84 templateHtmlProcessor st
85 ("Aufgabensammlung: Vorlage:Vollst\228ndige Induktion", ll)
8186 = (tempProcAdapter $ mnfinduktion ll) st
82
83
8487 templateHtmlProcessor st ("Formel", ll)
8588 = (st,
8689 "<dl><dd>" ++
87 (treeToHtml (Map.findWithDefault [] "1" ll) st)++"</dd></dl>")
88 templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Frage", ll)
90 (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "</dd></dl>")
91 templateHtmlProcessor st
92 ("Mathe f\252r Nicht-Freaks: Vorlage:Frage", ll)
8993 = (tempProcAdapter $ mnffrage ll) st
90
91 templateHtmlProcessor st ("Anker", _)
92 = (st, "")
94 templateHtmlProcessor st ("Anker", _) = (st, "")
9395 templateHtmlProcessor st ("Symbol", ll)
9496 = (st, (treeToHtml (Map.findWithDefault [] "1" ll) st))
95
96 templateHtmlProcessor st ("#invoke:Mathe für Nicht-Freaks/Seite", _)
97 = (st, "")
98
97 templateHtmlProcessor st
98 ("#invoke:Mathe f\252r Nicht-Freaks/Seite", _) = (st, "")
9999 templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Infobox", _)
100100 = (st, "")
101101 templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Symbol", _)
102102 = (st, "")
103
104 templateHtmlProcessor st ("Nicht l\246schen", _)
105 = (st, "")
106
107
108 templateHtmlProcessor st ("#ifeq:{{{include", _)
109 = (st, "")
110
111
112
113 templateHtmlProcessor st ("Druckversion Titelseite", _)
114 = (st, "")
115 templateHtmlProcessor st ("PDF-Version Gliederung", _)
116 = (st, "")
117 templateHtmlProcessor st ("#invoke:Liste", _)
118 = (st, "")
119 templateHtmlProcessor st ("Smiley", _)
120 = (st, "☺")
121 templateHtmlProcessor st ("", _)
122 = (st, "")
123
124
125
126
127
128
129 templateHtmlProcessor st (x, _) = (st,"UNKNOWN TEMPLATE "++x++" ")
103 templateHtmlProcessor st ("Nicht l\246schen", _) = (st, "")
104 templateHtmlProcessor st ("#ifeq:{{{include", _) = (st, "")
105 templateHtmlProcessor st ("Druckversion Titelseite", _) = (st, "")
106 templateHtmlProcessor st ("PDF-Version Gliederung", _) = (st, "")
107 templateHtmlProcessor st ("#invoke:Liste", _) = (st, "")
108 templateHtmlProcessor st ("Smiley", _) = (st, "\9786")
109 templateHtmlProcessor st ("", _) = (st, "")
110 templateHtmlProcessor st (x, _)
111 = (st, "UNKNOWN TEMPLATE " ++ x ++ " ")
130112
131113 wikiLinkCaptionHtml :: [Anything Char] -> MyState -> String
132114 wikiLinkCaptionHtml l st = if isCaption x then rebuild x else ""
133115 where x = (treeToHtml (last (splitOn [C '|'] l)) st)
134116 rebuild (':' : xs) = xs
135117 rebuild b = b
118
136119 wikiImageToHtml :: [Anything Char] -> Renderer String
137120 wikiImageToHtml l
138121 = do st <- get
139 mystr <- return ((if not (micro st) then "<p>"else "")++"<"++(if ext=="webm" then "video controls" else "img")++" src=\"./images/" ++ (n st) ++ "." ++ ext ++ "\" style=\"width: "++ (if (tb st) then "100.0" else (mysize st)) ++"%;\">"++ (if (not (micro st)) then "<br/> "++(getfig st)++" "++(n st) ++ " " ++ (s st) ++ "</p>"else ""))
122 mystr <- return
123 ((if not (micro st) then "<p>" else "") ++
124 "<" ++
125 (if ext == "webm" then "video controls" else "img") ++
126 " src=\"./images/" ++
127 (n st) ++
128 "." ++
129 ext ++
130 "\" style=\"width: " ++
131 (if (tb st) then "100.0" else (mysize st)) ++
132 "%;\">" ++
133 (if (not (micro st)) then
134 "<br/> " ++
135 (getfig st) ++ " " ++ (n st) ++ " " ++ (s st) ++ "</p>"
136 else ""))
140137 put
141138 st{getImages = (getImages st) ++ [shallowFlatten l],
142139 getJ = ((getJ st) + 1)}
155152 s1 st
156153 = if '|' `elem` (shallowFlatten l) then (s2 st) else
157154 (treeToHtml [] st{getJ = ((getJ st) + 1)})
158 mysize st = printf "%0.5f" ((mysizefloat2 st)*100.0)
155 mysize st = printf "%0.5f" ((mysizefloat2 st) * 100.0)
159156 mysizefloat st = (min (getF st) (imageSize l))
160157 mysizefloat2 st = if (msb st) then 1.0 else (mysizefloat st)
161158 msb st = (mysizefloat st) == (getF st)
162159 micro st = ((mysizefloat st) < 0.17) || ((getInTab st) > 1)
163160 n st = show (getJ st)
164161 tb st = ((getInTab st) > 0)
165 getfig st = head (splitOn "}" (last (splitOn "\\newcommand{\\myfigurebabel}{" (makeBabel (langu st) "en"))))
166
167
168
162 getfig st
163 = head
164 (splitOn "}"
165 (last
166 (splitOn "\\newcommand{\\myfigurebabel}{"
167 (makeBabel (langu st) "en"))))
169168
170169 galleryContentToHtml :: [[Anything Char]] -> Renderer String
171170 galleryContentToHtml (x : xs)
173172 ss <- galleryContentToHtml xs
174173 return $ s ++ "</tr><tr>" ++ ss
175174 galleryContentToHtml [] = return []
176
175
177176 {-DHUN| converts a part of a gallery (image gallery, gallery tag) from parse tree to latex. A part are as many elements as fit into a single row in the resulting latex table DHUN-}
178
177
179178 galleryRowToHtml :: [Anything Char] -> Renderer String
180179 galleryRowToHtml [] = return []
181180 galleryRowToHtml (x : []) = treeToHtml2 [x]
183182 = do s <- treeToHtml2 [x]
184183 g <- galleryRowToHtml xs
185184 return $ s ++ "</td><td>" ++ g
186
185
187186 {-DHUN| Converts are gallery (image gallery, gallery tag) from parse tree to latex. Also writes table header and footer. This is the function you should use for converting galleries to latex DHUN-}
188
187
189188 galleryToHtml :: [Anything Char] -> Renderer String
190189 galleryToHtml x
191190 = do st <- get
195194 trim (treeToHtml z st) /= ""])
196195 st2 <- get
197196 put st2{getF = (getF st)}
197 return ("<table><tr>" ++ s ++ "</tr></table>")
198
199 mnffrage :: Map String [Anything Char] -> Renderer String
200 mnffrage ll
201 = do typ <- treeToHtml2 (Map.findWithDefault [] "typ" ll)
202 frage <- treeToHtml2 (Map.findWithDefault [] "frage" ll)
203 antwort <- treeToHtml2 (Map.findWithDefault [] "antwort" ll)
198204 return
199 ("<table><tr>" ++ s ++ "</tr></table>")
200
201 mnffrage :: Map String [Anything Char] -> Renderer String
202 mnffrage ll = do typ<-treeToHtml2 (Map.findWithDefault [] "typ" ll)
203 frage<-treeToHtml2 (Map.findWithDefault [] "frage" ll)
204 antwort<-treeToHtml2 (Map.findWithDefault [] "antwort" ll)
205 return ("<dl><dd><b>" ++ typ++":</b> "++ frage++"</dd><dd>"++antwort ++"</dd></dl>")
205 ("<dl><dd><b>" ++
206 typ ++ ":</b> " ++ frage ++ "</dd><dd>" ++ antwort ++ "</dd></dl>")
206207
207208 mnfindent :: Map String [Anything Char] -> Renderer String
208 mnfindent ll = do one<-treeToHtml2 (Map.findWithDefault [] "1" ll)
209 return ("<dl><dd>" ++ one ++"</dd></dl>")
210
209 mnfindent ll
210 = do one <- treeToHtml2 (Map.findWithDefault [] "1" ll)
211 return ("<dl><dd>" ++ one ++ "</dd></dl>")
211212
212213 mnfklapptext :: Map String [Anything Char] -> Renderer String
213 mnfklapptext ll = do kopf<-treeToHtml2 (Map.findWithDefault [] "kopf" ll)
214 inhalt<-treeToHtml2 (Map.findWithDefault [] "inhalt" ll)
215 return ("<b>" ++ kopf ++"</b><br/>"++inhalt)
216
214 mnfklapptext ll
215 = do kopf <- treeToHtml2 (Map.findWithDefault [] "kopf" ll)
216 inhalt <- treeToHtml2 (Map.findWithDefault [] "inhalt" ll)
217 return ("<b>" ++ kopf ++ "</b><br/>" ++ inhalt)
217218
218219 mnfinduktion :: Map String [Anything Char] -> Renderer String
219 mnfinduktion ll = do erf<-treeToHtml2 (Map.findWithDefault [] "erfuellungsmenge" ll)
220 aus<-treeToHtml2 (Map.findWithDefault [] "aussageform" ll)
221 anf<-treeToHtml2 (Map.findWithDefault [] "induktionsanfang" ll)
222 vor<-treeToHtml2 (Map.findWithDefault [] "induktionsvoraussetzung" ll)
223 beh<-treeToHtml2 (Map.findWithDefault [] "induktionsbehauptung" ll)
224 sch<-treeToHtml2 (Map.findWithDefault [] "beweis_induktionsschritt" ll)
225 return ("<b>Aussageform, deren Allgemeingültigkeit für "++erf++" bewiesen werden soll:</b><br/>"++aus++"<br/><b>1. Induktionsanfang</b><br/>"++anf++"<br/><b>2. Induktionsschritt</b><br><b>2a. Induktionsvoraussetzung</b><br/>"++vor++"<br/><b>2b. Induktionsbehauptung</b><br/>"++beh++"<br/><b>2c. Beweis des Induktionsschritts</b><br/>"++sch++"<br/>")
226
227
228
229 writedict::[(String,String)]->String
220 mnfinduktion ll
221 = do erf <- treeToHtml2
222 (Map.findWithDefault [] "erfuellungsmenge" ll)
223 aus <- treeToHtml2 (Map.findWithDefault [] "aussageform" ll)
224 anf <- treeToHtml2 (Map.findWithDefault [] "induktionsanfang" ll)
225 vor <- treeToHtml2
226 (Map.findWithDefault [] "induktionsvoraussetzung" ll)
227 beh <- treeToHtml2
228 (Map.findWithDefault [] "induktionsbehauptung" ll)
229 sch <- treeToHtml2
230 (Map.findWithDefault [] "beweis_induktionsschritt" ll)
231 return
232 ("<b>Aussageform, deren Allgemeing\252ltigkeit f\252r " ++
233 erf ++
234 " bewiesen werden soll:</b><br/>" ++
235 aus ++
236 "<br/><b>1. Induktionsanfang</b><br/>" ++
237 anf ++
238 "<br/><b>2. Induktionsschritt</b><br><b>2a. Induktionsvoraussetzung</b><br/>"
239 ++
240 vor ++
241 "<br/><b>2b. Induktionsbehauptung</b><br/>" ++
242 beh ++
243 "<br/><b>2c. Beweis des Induktionsschritts</b><br/>" ++
244 sch ++ "<br/>")
245
246 writedict :: [(String, String)] -> String
230247 writedict [] = []
231 writedict ((k,v):xs) = k++"=\""++v++"\" " ++ (writedict xs)
232
233 treeToHtml3 :: Map String Int->Maybe String -> String -> [Anything Char] -> MyState -> (String, MyState)
234 treeToHtml3 formulas mylanguage title l st = let (a,b)=runState (treeToHtml2 l) st{langu=mylanguage, forms=formulas} in ("<html><head><meta charset=\"utf-8\"><title>"++title++"</title><style>table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { margin: 0; padding: 0; vertical-align: baseline; border: none; }\ntable.sourceCode { width: 100%; line-height: 100%; }\ntd.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; }\ntd.sourceCode { padding-left: 5px; }\ncode > span.kw { font-weight: bold; }\ncode > span.dt { text-decoration: underline; }\ncode > span.co { font-style: italic; }\ncode > span.al { font-weight: bold; }\ncode > span.er { font-weight: bold; }\n</style></head><body>"++a,b)
248 writedict ((k, v) : xs)
249 = k ++ "=\"" ++ v ++ "\" " ++ (writedict xs)
250
251 treeToHtml3 ::
252 Map String Int ->
253 Maybe String ->
254 String -> [Anything Char] -> MyState -> (String, MyState)
255 treeToHtml3 formulas mylanguage title l st
256 = let (a, b)
257 = runState (treeToHtml2 l) st{langu = mylanguage, forms = formulas}
258 in
259 ("<html><head><meta charset=\"utf-8\"><title>" ++
260 title ++
261 "</title><style>table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { margin: 0; padding: 0; vertical-align: baseline; border: none; }\ntable.sourceCode { width: 100%; line-height: 100%; }\ntd.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; }\ntd.sourceCode { padding-left: 5px; }\ncode > span.kw { font-weight: bold; }\ncode > span.dt { text-decoration: underline; }\ncode > span.co { font-style: italic; }\ncode > span.al { font-weight: bold; }\ncode > span.er { font-weight: bold; }\n</style></head><body>"
262 ++ a,
263 b)
264
235265 treeToHtml :: [Anything Char] -> MyState -> String
236266 treeToHtml l states = (fst $ runState (treeToHtml2 l) states)
267
237268 treeToHtmlBak :: [Anything Char] -> MyState -> String
238269 treeToHtmlBak _ _ = ""
270
239271 treeToHtml2Bak :: [Anything Char] -> HtmlRenderer String
240272 treeToHtml2Bak _ = return ""
241273
243275 treeToHtml2 ll
244276 = do x <- allinfo
245277 return $ concat x
246 where
247 allinfo :: HtmlRenderer [String]
278 where allinfo :: HtmlRenderer [String]
248279 allinfo = mapM nodeToHtml ll
249
280
250281 walk :: String -> [Anything Char] -> String -> HtmlRenderer String
251282 walk prefix l postfix
252283 = do d <- treeToHtml2 l
253284 return $ prefix ++ d ++ postfix
285
254286 nodeToHtml :: Anything Char -> HtmlRenderer String
255287 nodeToHtml (C c)
256 = do st<-get
257 x<-if (c=='\n') && ((lastChar st)==c) then return "</p><p>" else return [c]
258 put st{lastChar=c}
288 = do st <- get
289 x <- if (c == '\n') && ((lastChar st) == c) then return "</p><p>"
290 else return [c]
291 put st{lastChar = c}
259292 return x
260293 nodeToHtml (Environment Wikilink _ l)
261294 = do st <- get
278311 guard (not ((getInTab st) > 0))
279312 return aa
280313 of
281 Just j -> (renderHtml ((formatHtmlBlock defaultFormatOpts) (highlightAs j f)))
314 Just j -> (renderHtml
315 ((formatHtmlBlock defaultFormatOpts) (highlightAs j f)))
282316 Nothing -> (rtrim d)
283317 nodeToHtml (Environment Template (Str s) l) = templateToHtml l s
284 nodeToHtml (Environment Wikitable _ l) = walk "<table><tr>" l "</table></tr>"
318 nodeToHtml (Environment Wikitable _ l)
319 = walk "<table><tr>" l "</table></tr>"
285320 nodeToHtml (Environment TableRowSep _ _) = return "</tr><tr>"
286321 nodeToHtml (Environment TableColSep _ _) = return "</td><td>"
287322 nodeToHtml (Environment TableHeadColSep _ _) = return "</th><th>"
288 nodeToHtml (Environment TableCap _ l) = walk "<caption>" l "</caption>"
289 nodeToHtml (Environment Wikiheading (Str x) l) = let y=(show (length x)) in walk ("<h"++y++">") l ("</h"++y++">")
323 nodeToHtml (Environment TableCap _ l)
324 = walk "<caption>" l "</caption>"
325 nodeToHtml (Environment Wikiheading (Str x) l)
326 = let y = (show (length x)) in
327 walk ("<h" ++ y ++ ">") l ("</h" ++ y ++ ">")
290328 nodeToHtml (Environment ItemEnv (Str _) [Item _]) = return []
291 nodeToHtml (Environment ItemEnv (Str s) l) = do tag<-return (case s of
292 "*"-> "ul"
293 _ -> "ol")
294 walk ("<"++tag++">") l ("</li></"++tag++">")
329 nodeToHtml (Environment ItemEnv (Str s) l)
330 = do tag <- return
331 (case s of
332 "*" -> "ul"
333 _ -> "ol")
334 walk ("<" ++ tag ++ ">") l ("</li></" ++ tag ++ ">")
295335 nodeToHtml (Item _) = return "</li><li>"
296
297336 nodeToHtml (Environment Tag (TagAttr "noscript" _) _) = return []
298337 nodeToHtml (Environment Tag (TagAttr "head" _) _) = return []
299338 nodeToHtml (Environment Tag (TagAttr "a" _) l) = walk "" l ""
307346 || ((Map.findWithDefault [] "id" a) `elem` ["coordinates"])
308347 then return "" else walk "" l ""
309348 else walk "" l ""
310 nodeToHtml (Environment Tag (TagAttr "img" m) _) | (Map.lookup "class" m) == (Just "mwe-math-fallback-image-inline") = return []
349 nodeToHtml (Environment Tag (TagAttr "img" m) _)
350 | (Map.lookup "class" m) == (Just "mwe-math-fallback-image-inline")
351 = return []
311352 nodeToHtml (Environment Comment _ _) = return []
312 nodeToHtml (Environment Preformat (TagAttr "pre" _) l) = walk "<pre>" l "</pre>"
313 nodeToHtml (Environment Math (TagAttr "math" _) l) = do st<-get
314 return ("<img src=\"./formulas/"++(hex(show (hash (mathTransform l))))++".png\" style=\" width:"++(show ((((forms st) Map.! ((hex(show (hash (mathTransform l))))++".png"))*2) `div` 3) )++"px;\" />")
315
316
317 nodeToHtml (Environment Math _ l) = do st<-get
318 --return (show (forms st))
319 return ("<img src=\"./formulas/"++(hex(show (hash (mathTransform l))))++".png\" style=\" width:"++(show ((((forms st) Map.! ((hex(show (hash (mathTransform l))))++".png"))*2) `div` 3) )++"px;\" />")
320
321 nodeToHtml (Environment Tag (TagAttr "table" m) l)
353 nodeToHtml (Environment Preformat (TagAttr "pre" _) l)
354 = walk "<pre>" l "</pre>"
355 nodeToHtml (Environment Math (TagAttr "math" _) l)
356 = do st <- get
357 return
358 ("<img src=\"./formulas/" ++
359 (hex (show (hash (mathTransform l)))) ++
360 ".png\" style=\" width:" ++
361 (show
362 ((((forms st) Map.!
363 ((hex (show (hash (mathTransform l)))) ++ ".png"))
364 * 2)
365 `div` 3))
366 ++ "px;\" />")
367 nodeToHtml (Environment Math _ l)
368 = do st <- get
369 return
370 ("<img src=\"./formulas/" ++
371 (hex (show (hash (mathTransform l)))) ++
372 ".png\" style=\" width:" ++
373 (show
374 ((((forms st) Map.!
375 ((hex (show (hash (mathTransform l)))) ++ ".png"))
376 * 2)
377 `div` 3))
378 ++ "px;\" />")
379 nodeToHtml (Environment Tag (TagAttr "table" m) l)
322380 = do st <- get
323381 put $ st{getInTab = (getInTab st) + 1}
324 d <- walk ("<table "++(writedict (Map.toList m))++">") l ("</table>")
382 d <- walk ("<table " ++ (writedict (Map.toList m)) ++ ">") l
383 ("</table>")
325384 st2 <- get
326385 put $ st2{getInTab = (getInTab st)}
327386 return d
337396 newst i
338397 = (midst i){getGalleryNumbers =
339398 (getGalleryNumbers (midst i)) ++ (map toInteger (gins i))}
340 nodeToHtml (Environment Tag (TagAttr "span" _) l) = walk "" l ""
341 nodeToHtml (Environment Tag (TagAttr x m) l) = walk ("<"++x++" "++(writedict (Map.toList m))++">") l ("</"++x++">")
399 nodeToHtml (Environment Tag (TagAttr "span" _) l) = walk "" l ""
400 nodeToHtml (Environment Tag (TagAttr x m) l)
401 = walk ("<" ++ x ++ " " ++ (writedict (Map.toList m)) ++ ">") l
402 ("</" ++ x ++ ">")
342403 nodeToHtml (Environment _ _ l) = walk "" l ""
343404 nodeToHtml _ = return []
344
77 import Control.Concurrent.MVar
88 import Data.List
99 import Network.URL
10
11 {-DHUN| A type to for errors that might be thrown during the imperative calculation DHUN-}
12
1013 data MyError = DownloadError String String
1114 | OtherError String
1215 | WikiUrlParseError String
1821 | ToManyOptionsError
1922 | ToManyOutputOptionsError
2023 | PaperError
21
22 type MyErrorMonad = Either MyError
24
25 {-DHUN| A monad for dealing with errors DHUN-}
26
27 type MyErrorMonad = Either MyError
28
29 {-DHUN| printable error messages DHUN-}
2330
2431 instance Show MyError where
2532 show (DownloadError theLemma theUrl)
3037 show NotImplementedError
3138 = "Error: The requested feature is not implemented yet"
3239 show (NotIntegerPairError msg)
33 = "Error: The option --" ++ msg ++ "could not be parsed to a pair of integers (like -f 23:42)"
40 = "Error: The option --" ++
41 msg ++ "could not be parsed to a pair of integers (like -f 23:42)"
3442 show PaperError
3543 = "Error: The option paper may only be one of A4,A5,B5,letter,legal,executive"
3644 show ToManyOptionsError
4755 = "Error: The option --" ++
4856 msg ++ " could not be parsed as an integer."
4957 show (OtherError msg) = msg
50
58
59
60 {-DHUN| A type to capture a contributor form the list of contributors needed for license reasons. The element name is username of the author, the element edits is the number of edits done by the user, href is a link to the users homepage on the wiki DHUN-}
61
5162 data Contributor = Contributor{name :: String, edits :: Integer,
5263 href :: String}
5364 deriving (Eq, Ord, Show, Read)
54
65
66 {-DHUN| The sum of two contribors is defined as the new contribor with the same name and homepage and the edits summed up. Of course this makes only sense when summing up edits for the same contributor, which is not checked but has to be ensured by hand DHUN-}
67
5568 myplus :: Contributor -> Contributor -> Contributor
5669 myplus x y = x{edits = (edits x) + (edits y)}
57
70
71 {-DHUN| Build a map of contributors summing up the edits per contributor. The map takes the name of the contributor as key and the contributor records given above as value. The first parameter is a list of such maps the results is also such a map representing the sum of the maps in the list DHUN-}
72
5873 contribsum :: [Map String Contributor] -> Map String Contributor
59 contribsum x = Data.List.foldl (unionWith myplus) Data.Map.Strict.empty x
60
74 contribsum x
75 = Data.List.foldl (unionWith myplus) Data.Map.Strict.empty x
76
77 {-DHUN| a defaults version of the record Imperative State DHUN-}
78
79
6180 imperativeStateZero :: IO ImperativeState
6281 imperativeStateZero
63 = do v<-newMVar (0::Int)
64 return ImperativeState{audict = [], fullUrl = fullWikiUrlZero,
65 tmpPath = "" ,counter=v, loadacu=[]}
66
82 = do v <- newMVar (0 :: Int)
83 return
84 ImperativeState{audict = [], fullUrl = fullWikiUrlZero,
85 tmpPath = "", counter = v, loadacu = []}
86
6787 data ImperativeState = ImperativeState{audict ::
6888 [(Map String Contributor)],
69 fullUrl :: FullWikiUrl, tmpPath :: String, counter::MVar Int, loadacu::[Anything Char]}
89 fullUrl :: FullWikiUrl, tmpPath :: String,
90 counter :: MVar Int, loadacu :: [Anything Char]}
7091
92 data ImageInfo = ImageInfo{wikiFilename :: String,
93 imageNumber :: Integer, contributorUrls :: [String],
94 descriptionUrl :: URL}
95 deriving (Show, Read)
7196
72 data ImageInfo = ImageInfo{wikiFilename :: String,imageNumber::Integer,contributorUrls::[String],descriptionUrl::URL}
73 deriving (Show,Read)
74
7597 type ImperativeMonad = ExceptT MyError (StateT ImperativeState IO)
76
98
7799 data RunMode = HTML
78100 | ExpandedTemplates
79101 | StandardTemplates
80102 | Book
81103 | UserTemplateFile String
82104 deriving (Show, Read, Eq)
83
105
84106 data SourceMode = Included
85107 | Excluded
86108 deriving (Show, Read)
87
88 data OutputType = PlainPDF | ZipArchive | EPubFile | OdtFile
89 deriving (Show, Read,Eq)
109
110 data OutputType = PlainPDF
111 | ZipArchive
112 | EPubFile
113 | OdtFile
114 deriving (Show, Read, Eq)
115
90116 data FullConfig = FullConfig{headers :: Maybe String,
91117 resolution :: Integer, outputFilename :: String,
92118 inputUrl :: String, runMode :: RunMode, paper :: String,
93119 vector :: Bool, copy :: Maybe String, mainPath :: String,
94 server :: Maybe Int, outputType::OutputType, selfTest ::Maybe (Integer,Integer), compile:: Maybe String, imgctrb:: Maybe String}
120 server :: Maybe Int, outputType :: OutputType,
121 selfTest :: Maybe (Integer, Integer), compile :: Maybe String,
122 imgctrb :: Maybe String}
95123 deriving (Show, Read)
96124
97 fullconfigbase::FullConfig
98 fullconfigbase= FullConfig{headers =Nothing,
99 resolution =0, outputFilename ="",
100 inputUrl ="", runMode =HTML, paper ="A4",
101 vector = False, copy = Nothing, mainPath = "",
102 server = Nothing, outputType=PlainPDF, selfTest =Nothing, compile= Nothing,imgctrb=Nothing}
125 fullconfigbase :: FullConfig
126 fullconfigbase
127 = FullConfig{headers = Nothing, resolution = 0,
128 outputFilename = "", inputUrl = "", runMode = HTML, paper = "A4",
129 vector = False, copy = Nothing, mainPath = "", server = Nothing,
130 outputType = PlainPDF, selfTest = Nothing, compile = Nothing,
131 imgctrb = Nothing}
44 getGalleryNumbers, getTitle, initialState, getJ, urld,
55 analyseNetloc, templateMap, getUserTemplateMap, urls, mUrlState,
66 initialUrlState, makeLables, templateRegistry, baseUrl,
7 deepFlatten, wikiLinkCaption, imageSize, isCaption, linewidth,
8 generateGalleryImageNumbers , splitToTuples, galleryTableScale, tempProcAdapter)
7 deepFlatten, wikiLinkCaption, imageSize, isCaption, linewidth,
8 generateGalleryImageNumbers, splitToTuples, galleryTableScale,
9 tempProcAdapter)
910 where
1011 import Data.String.HT (trim)
1112 import MyState
3233 import Data.Maybe
3334 import Data.Tuple (swap)
3435 import MediaWikiParser hiding (prep)
35
36
3637 {-DHUN| the maximum width of lines for preformat and source code DHUN-}
37
38
3839 linewidth :: Int
3940 linewidth = 80
40
41
4142 {-DHUN| The user can provide her own translation table for mediawiki templates to latex commands. this is done in the templates.user files. This function takes this file in list representation and converts it to the map representation to be able to look up the names of templates DHUN-}
42
43
4344 getUserTemplateMap :: [[String]] -> Map String [String]
4445 getUserTemplateMap input
4546 = Map.fromList (map (\ (x : xs) -> (x, xs)) input)
46
47
4748 {-DHUN| table may omit tailing columns in a row, but in latex they need to be written out, this function does so DHUN-}
48
49
4950 rowaddsym :: TableState -> [Char]
5051 rowaddsym st
5152 = if (currentColumn st) < ((numberOfColumnsInTable st) + 1) then
5556 replicate (((numberOfColumnsInTable st) + 1) - (currentColumn st))
5657 '&')
5758 else []
58
59
5960 {-DHUN| This function renders the inner parts of a table to latex it does so by calling tableContentToLaTeX and additionally removes superfluous newlines which might cause compilation problems in latex when used inside tables DHUN-}
60
61
6162 tableContentToLaTeX2 ::
6263 [Anything Char] -> (StateT TableState (State MyState) String)
6364 tableContentToLaTeX2 l
6465 = do r <- tableContentToLaTeX l
6566 return (killnl2 r)
66
67
6768 varwidthbegin :: TableState -> [Char]
68 varwidthbegin st = if isJust (activeColumn st) then "\\begin{varwidth}{\\linewidth}" else ""
69 varwidthbegin st
70 = if isJust (activeColumn st) then "\\begin{varwidth}{\\linewidth}"
71 else ""
6972
7073 varwidthend :: TableState -> [Char]
71 varwidthend st = if isJust (activeColumn st) then "\\end{varwidth}" else ""
74 varwidthend st
75 = if isJust (activeColumn st) then "\\end{varwidth}" else ""
7276
7377 {-DHUN| This function renders the inner parts of a table to latex, please always use tableContentToLaTeX2 since this also removes superfluous newlines DHUN-}
74
78
7579 tableContentToLaTeX ::
7680 [Anything Char] -> (StateT TableState (State MyState) String)
7781 tableContentToLaTeX ((Environment TableRowSep _ _) : [])
7882 = do st <- get
7983 let cc = (currentColumn st)
8084 let c = cc + (multiRowCount cc (multiRowMap st))
81 return $ (varwidthend st)++
82 (headendsym (lastCellWasHeaderCell st)) ++
83 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
84 (rowaddsym st{currentColumn = c})
85 return $
86 (varwidthend st) ++
87 (headendsym (lastCellWasHeaderCell st)) ++
88 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
89 (rowaddsym st{currentColumn = c})
8590 tableContentToLaTeX ((Environment TableRowSep _ l) : xs)
8691 = do sst <- lift get
8792 st <- get
112117 if stillInTableHeader st then not mycond else False}
113118 xx <- tableContentToLaTeX xs
114119 return $
115 if (not (isFirstRow st)) then (varwidthend st) ++
116 (headendsym (lastCellWasHeaderCell st)) ++
117 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
118 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
119 (multiRowSymbolForRowSep (currentColumn st) (multiRowMap st)
120 (seperatingLinesRequestedForTable st))
121 ++
122 (rowaddsym st{currentColumn = c}) ++
123 (rowendsymb ((getInTab sst) <= 1)
124 ((rowCounter st) == (inputLastRowOfHeader st) - 2))
125 ++
126 (innerHorizontalLine (seperatingLinesRequestedForTable st)
127 (multiRowMap st3)
128 (numberOfColumnsInTable st))
129 ++ " \n" ++ (varwidthbegin st)++xx
120 if (not (isFirstRow st)) then
121 (varwidthend st) ++
122 (headendsym (lastCellWasHeaderCell st)) ++
123 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
124 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
125 (multiRowSymbolForRowSep (currentColumn st) (multiRowMap st)
126 (seperatingLinesRequestedForTable st))
127 ++
128 (rowaddsym st{currentColumn = c}) ++
129 (rowendsymb ((getInTab sst) <= 1)
130 ((rowCounter st) == (inputLastRowOfHeader st) - 2))
131 ++
132 (innerHorizontalLine (seperatingLinesRequestedForTable st)
133 (multiRowMap st3)
134 (numberOfColumnsInTable st))
135 ++ " \n" ++ (varwidthbegin st) ++ xx
130136 else xx
131137 tableContentToLaTeX ((Environment TableColSep _ l) : xs)
132138 = do st <- get
146152 (multiRowStartSymbol l (activeColumn st)) /= "",
147153 isFirstRow = False, lastCellWasHeaderCell = False}
148154 xx <- tableContentToLaTeX xxs
149 return $ (varwidthend st)++
150 (headendsym (lastCellWasHeaderCell st)) ++
151 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
152 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
153 (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++
154 (multiRowSymbol (currentColumn st) (multiRowMap st)
155 (seperatingLinesRequestedForTable st))
156 ++
157 (multiColumnStartSymbol l (columnsWidthList st) c
158 (seperatingLinesRequestedForTable st)
159 st)
155 return $
156 (varwidthend st) ++
157 (headendsym (lastCellWasHeaderCell st)) ++
158 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
159 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
160 (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++
161 (multiRowSymbol (currentColumn st) (multiRowMap st)
162 (seperatingLinesRequestedForTable st))
160163 ++
161 (multiRowStartSymbol l (activeColumn st)) ++
162 (if rig then "\\RaggedLeft{}" else "") ++
163 (tablecolorsym l) ++ hypennothing ++(varwidthbegin st)++xx
164 (multiColumnStartSymbol l (columnsWidthList st) c
165 (seperatingLinesRequestedForTable st)
166 st)
167 ++
168 (multiRowStartSymbol l (activeColumn st)) ++
169 (if rig then "\\RaggedLeft{}" else "") ++
170 (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st) ++ xx
164171 where rig
165172 = isInfixOf2
166 [Environment Attribute (Attr ("style", "text-align:right")) []]
173 [Environment Attribute (Attr ("style", "text-align:right")) []]
167174 l
168175 xxs
169176 = if rig then (reverse . removesp . reverse . removesp) xs else xs
188195 isFirstRow = False, lastCellWasHeaderCell = True,
189196 currentRowIsHeaderRow = True}
190197 xx <- tableContentToLaTeX xs
191 return $ (varwidthend st)++
192 (headendsym (lastCellWasHeaderCell st)) ++
193 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
194 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
195 (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++
196 (multiRowSymbol (currentColumn st) (multiRowMap st)
197 (seperatingLinesRequestedForTable st))
198 ++
199 (multiColumnStartSymbol l (columnsWidthList st) c
200 (seperatingLinesRequestedForTable st)
201 st)
198 return $
199 (varwidthend st) ++
200 (headendsym (lastCellWasHeaderCell st)) ++
201 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
202 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
203 (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++
204 (multiRowSymbol (currentColumn st) (multiRowMap st)
205 (seperatingLinesRequestedForTable st))
202206 ++
203 (multiRowStartSymbol l (activeColumn st)) ++
204 headstartsym ++ (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st)++ xx
207 (multiColumnStartSymbol l (columnsWidthList st) c
208 (seperatingLinesRequestedForTable st)
209 st)
210 ++
211 (multiRowStartSymbol l (activeColumn st)) ++
212 headstartsym ++
213 (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st) ++ xx
205214 tableContentToLaTeX (x : xs)
206215 = do st <- get
207216 ele <- case (activeColumn st) of
215224 = do st <- get
216225 let cc = currentColumn st
217226 let c = cc + (multiRowCount cc (multiRowMap st))
218 return $ (varwidthend st)++
219 (headendsym (lastCellWasHeaderCell st)) ++
220 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
221 (multiRowSymbolForTableEnd (currentColumn st) (multiRowMap st)
222 (seperatingLinesRequestedForTable st))
223 ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (rowaddsym st{currentColumn = (c + (columnMultiplicityForCounting []))})
224
227 return $
228 (varwidthend st) ++
229 (headendsym (lastCellWasHeaderCell st)) ++
230 (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++
231 (multiRowSymbolForTableEnd (currentColumn st) (multiRowMap st)
232 (seperatingLinesRequestedForTable st))
233 ++
234 (multiRowEndSymbol (lastCellWasMultiRow st)) ++
235 (rowaddsym
236 st{currentColumn = (c + (columnMultiplicityForCounting []))})
237
225238 {-DHUN| This string has to be added to each new cell in a latex table in order to allow for hyphenation of the first word in this cell DHUN-}
226
239
227240 hypennothing :: [Char]
228241 hypennothing = "\\hspace*{0pt}\\ignorespaces{}\\hspace*{0pt}"
229
242
230243 {-DHUN| color cell in latex if HTML attribute bgcolor is present in the parse tree for the cell DHUN-}
231
244
232245 tablecolorsym :: [Anything Char] -> [Char]
233246 tablecolorsym ll
234247 = case genLookup "bgcolor" ll of
238251 "\\cellcolor{" ++ colname ++ "}"
239252 _ -> "\\cellcolor{" ++ x ++ "}"
240253 Nothing -> ""
241
254
242255 {-DHUN| the caption of a table is given in |+ or <th> elements, it needs to be reformatted in the parse in oder to be rendered in latex as a multicolumn cell spanning the whole width of the table DHUN-}
243
256
244257 reformatTableCaption ::
245258 Int -> [Anything Char] -> MyState -> [Anything Char]
246259 reformatTableCaption n
266279 reformatTableCaption n (x : xs) st
267280 = x : reformatTableCaption n xs st
268281 reformatTableCaption _ [] _ = []
269
282
270283 {-DHUN| strips column separators out of a parse tree or part of which DHUN-}
271
284
272285 stripColSep :: [Anything Char] -> [Anything Char]
273286 stripColSep = filter go
274287 where go (Environment TableHeadColSep _ _) = False
275288 go (Environment TableColSep _ _) = False
276289 go _ = True
277
290
278291 {-DHUN| predicate to test if an element in the parse tree is a row separator DHUN-}
279
292
280293 isRowSep :: Anything Char -> Bool
281294 isRowSep (Environment TableRowSep _ _) = True
282295 isRowSep _ = False
283
296
284297 {-DHUN| strip empty rows out of the parse tree DHUN-}
285
298
286299 stripempty :: [Anything Char] -> MyState -> [Anything Char]
287300 stripempty [] _ = []
288301 stripempty ((Environment TableRowSep a b) : xs) s
303316 pre = takeWhile (not . isRowSep) l
304317 post = dropWhile (not . isRowSep) l
305318 inside = (treeToLaTeX (stripColSep pre)) s
306
319
307320 {-DHUN| In order to determine the maximum width of columns, each table is precompiled with latex several times, with only one column included each time. this function creates the list of the latex sources of these tables, for one table in the parse tree DHUN-}
308
321
309322 maketablist ::
310323 [Anything Char] -> TableState -> Int -> MyState -> [[Char]]
311324 maketablist l tst nc mst = map tablo [1 .. (nc + 1)]
312325 where tablo n
313326 = "\\begin{tabular}{|" ++
314327 (replicate nc 'l') ++
315 "|}" ++ "\\begin{varwidth}{\\linewidth}"++
316 (fst
317 (fst
318 (runState
319 ((runStateT (tableContentToLaTeX2 l))
320 tst{inputLastRowOfHeader = -2, activeColumn = Just n})
321 mst)))
322 ++ "\\end{tabular}"
323
328 "|}" ++
329 "\\begin{varwidth}{\\linewidth}" ++
330 (fst
331 (fst
332 (runState
333 ((runStateT (tableContentToLaTeX2 l))
334 tst{inputLastRowOfHeader = -2, activeColumn = Just n})
335 mst)))
336 ++ "\\end{tabular}"
337
324338 {-DHUN| Takes a map from int to double finds the biggest double and removes the corresponding key value pair from the map. This way wide columns are set to smaller sizes in order to fit the whole table onto the page width DHUN-}
325
339
326340 removehighest :: Map Int Double -> Map Int Double
327341 removehighest m
328342 | m /= Map.empty = Map.fromList (hlp (Map.toList m))
331345 hlp (x : xs) = x : (hlp xs)
332346 hlp [] = []
333347 removehighest _ = Map.empty
334
348
335349 {-DHUN| Returns a list of floats which represents the width of the columns of a table in units of the line width with the proper corrections for use in the a latex documents. If the boolean input parameter is true the table is understood to be written in landscape mode. It also take a map of Int to Double. This is the list of the maximum width of columns determined by previous runs of latex on the table with only one column included per run DHUN-}
336
337 wdth3 :: Bool -> Map Int Double -> ([Float],Double)
350
351 wdth3 :: Bool -> Map Int Double -> ([Float], Double)
338352 wdth3 ls m
339 | m /= Map.empty = (
340 (map
341 ((* (1.0 - (scalefactor (fromIntegral n)))) .
342 double2Float . (/ (linew2 ls)))
343 (Map.elems mm)),if d==1.0 then 1.0 else d*0.90)
353 | m /= Map.empty =
354 ((map
355 ((* (1.0 - (scalefactor (fromIntegral n)))) .
356 double2Float . (/ (linew2 ls)))
357 (Map.elems mm)),
358 if d == 1.0 then 1.0 else d * 0.9)
344359 where n = (maximum (Map.keys m))
345 (mm,d) = wdth ls n m
346 wdth3 _ _ = ([],1.0)
347
360 (mm, d) = wdth ls n m
361 wdth3 _ _ = ([], 1.0)
362
348363 {-DHUN| Returns a table header which represents the width of the columns of a table in units of the line width with the proper corrections for use in the a latex documents. If the first boolean input parameter is true the table is understood to be written in landscape mode. It also take a map of Int to Double. This is the list of the maximum width of columns determined by previous runs of latex on the table with only one column included per run. If second boolean parameter is true it is understood the the rule should be printed with the table, otherwise the table should be printed without rules DHUN-}
349
364
350365 wdth2 :: Bool -> Map Int Double -> Bool -> String
351366 wdth2 ls m b
352367 | m /= Map.empty =
356371 double2Float . (/ (linew2 ls)))
357372 (Map.elems mm))
358373 where n = (maximum (Map.keys m))
359 (mm,_) = wdth ls n m
374 (mm, _) = wdth ls n m
360375 wdth2 _ _ _ = []
361
376
362377 {-DHUN| takes the list of maximum column widths created by previous runs of the latex compiler with only one columns included per run as map from Int to Double. Take the total number of columns of the table as Int. The table is understood to be printed in landscape mode if the boolean parameter is true. It returns a map from int to double representing the width of columns of the table to be used in the latex documents. So it takes raw widths. Which are just the width of the column if the width of the paper was infinite and return the width that fit on the finite width of the real paper DHUN-}
363
364 wdth :: Bool -> Int -> Map Int Double -> (Map Int Double,Double)
378
379 wdth :: Bool -> Int -> Map Int Double -> (Map Int Double, Double)
365380 wdth ls n mm
366381 = case
367382 (Control.Monad.msum
373388 mm))
374389 [0 .. (length (Map.keys mm))])))
375390 of
376 Just (x,Nothing) -> if (sum (Map.elems x)) < (linew2 ls) then
377 (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x,1.0) else (x,1.0)
378 Just (x,Just ddd) -> (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x, ddd)
379 Nothing -> (myfill ((linew2 ls) / (fromIntegral n)) Map.empty,1.0)
380 where
381 hlp :: (Map Int Double, Int) -> Maybe ((Map Int Double),Maybe Double)
382 hlp (m, i) | ((sum (Map.elems m)) :: Double)+((linew2 ls) / (fromIntegral n)) * (fromIntegral i)<(linew2 ls) = Just (myfill (((linew2 ls) - (sum (Map.elems m))) / ((fromIntegral i) :: Double)) m,Nothing)
383
391 Just (x, Nothing) -> if (sum (Map.elems x)) < (linew2 ls) then
392 (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x, 1.0)
393 else (x, 1.0)
394 Just (x, Just ddd) -> (Map.map
395 (\ y -> y * ((linew2 ls) / (sum (Map.elems x))))
396 x,
397 ddd)
398 Nothing -> (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, 1.0)
399 where hlp ::
400 (Map Int Double, Int) -> Maybe ((Map Int Double), Maybe Double)
401 hlp (m, i)
402 | ((sum (Map.elems m)) :: Double) +
403 ((linew2 ls) / (fromIntegral n)) * (fromIntegral i)
404 < (linew2 ls)
405 =
406 Just
407 (myfill
408 (((linew2 ls) - (sum (Map.elems m))) /
409 ((fromIntegral i) :: Double))
410 m,
411 Nothing)
384412 hlp (_, i)
385 | ((i == 3)||(i==n) ) = let (mmm,dd)=(wdth ls n (Map.map ((\x->x*0.95)) mm)) in Just (mmm,Just ((0.965)*dd))
413 | ((i == 3) || (i == n)) =
414 let (mmm, dd) = (wdth ls n (Map.map ((\ x -> x * 0.95)) mm)) in
415 Just (mmm, Just ((0.965) * dd))
386416 hlp (m, i)
387 | ((sum (Map.elems m)) :: Double)+((linew2 ls) / (fromIntegral n)) * (fromIntegral i)>=(linew2 ls) = Nothing
388 hlp _ = Just (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, Nothing)
389
417 | ((sum (Map.elems m)) :: Double) +
418 ((linew2 ls) / (fromIntegral n)) * (fromIntegral i)
419 >= (linew2 ls)
420 = Nothing
421 hlp _
422 = Just (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, Nothing)
423
390424 myfill :: Double -> Map Int Double -> Map Int Double
391425 myfill x m = Map.union m (Map.fromList (zip [1 .. n] (repeat x)))
392
426
393427 {-DHUN| In landscape mode everything has to be multiplied by a factor of two. If the boolean parameter is true it is understood that the table should be printed in landscape mode. This function return the width of the line in latex using the units of latex DHUN-}
394
428
395429 linew2 :: Bool -> Double
396430 linew2 ls = if ls then linew * 1.414 else linew
397
431
398432 {-DHUN| The width of the line in A4 paper with DIV margin factor of 13 in latex own units DHUN-}
399
433
400434 linew :: Double
401435 linew = 455.45742
402
436
403437 hasTable :: [Anything a] -> Bool
404438 hasTable ll = or $ map go ll
405 where go::Anything a->Bool
406 go (Environment Wikitable _ _) =True
439 where go :: Anything a -> Bool
440 go (Environment Wikitable _ _) = True
407441 go (Environment _ _ l) = (hasTable l)
408442 go _ = False
409443
410
411444 {-DHUN| convert a table form the parse tree to latex. The [Anything Char] parameter it the contend of the table represented as a parse tree. The String parameter contains the HTML attributes of the table element, or in wiki notation the HTML parameters of the line beginning with {| . This is evaluated in order to find out whether rules should be printed in the table. The return type is Renderer String. Which means that it returns a string but also take a state as additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-}
412
445
413446 tableToLaTeX :: [Anything Char] -> String -> Renderer String
414447 tableToLaTeX l1 s
415448 = do st <- get
435468 Just t -> wdth2 lsc t sep
436469 sep = seperatingLinesRequested s
437470 hline = horizontalLine sep
438 (widths,fontscalefactor)
471 (widths, fontscalefactor)
439472 = case Map.lookup tbno (tabmap st) of
440 Nothing -> (columnWidths l,1.0)
473 Nothing -> (columnWidths l, 1.0)
441474 Just t -> wdth3 lsc t
442475 env = tableEnvironment (getF st)
443 scriptsize = (isInfixOf2 "latexfontsize=\"scriptsize\"" s)||((numberOfColumns l) > 5)
476 scriptsize
477 = (isInfixOf2 "latexfontsize=\"scriptsize\"" s) ||
478 ((numberOfColumns l) > 5)
444479 sb = if scriptsize then "{\\scriptsize{}" else ""
445480 se = if scriptsize then "}" else ""
446 lsc = (env == "longtable") && (((numberOfColumns l) > 100)|| (hasTable l1))
481 lsc
482 = (env == "longtable") &&
483 (((numberOfColumns l) > 100) || (hasTable l1))
447484 lsb = if lsc then "\\begin{landscape}\n" else ""
448485 lse = if lsc then "\n\\end{landscape}" else ""
449486 tbno = (length (tablist st)) + 1
464501 lsb ++
465502 sb ++
466503 (if (env /= "tabular") then "\n" else "\\scalebox{0.85}{") ++
467 (if fontscalefactor==1.0 then "" else "{\\scalefont{"++(printf "%0.5f" fontscalefactor)++"}")++"\\begin{" ++
468 env ++
469 "}{" ++
470 spec ++
471 "}" ++
472 hline ++
473 " \n" ++
474 t1 ++
475 (rowDelimiter sep) ++ " \n\\end{" ++ env ++ "}\n" ++ (if fontscalefactor==1.0 then "" else "}")++ (if (env /= "tabular") then "" else "}")++se ++ lse
504 (if fontscalefactor == 1.0 then "" else
505 "{\\scalefont{" ++ (printf "%0.5f" fontscalefactor) ++ "}")
506 ++
507 "\\begin{" ++
508 env ++
509 "}{" ++
510 spec ++
511 "}" ++
512 hline ++
513 " \n" ++
514 t1 ++
515 (rowDelimiter sep) ++
516 " \n\\end{" ++
517 env ++
518 "}\n" ++
519 (if fontscalefactor == 1.0 then "" else "}") ++
520 (if (env /= "tabular") then "" else "}") ++
521 se ++ lse
476522 return r
477
523
478524 {-DHUN| Converts an image from the parse tree to latex. The actual images is only referenced in the wiki source, as well as the parse tree, as well as the latex source. It takes a parse tree representation of the image as only input parameter. The return type is Renderer String. Which means that it returns a string but also take a state as additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-}
479
525
480526 wikiImageToLaTeX :: [Anything Char] -> Renderer String
481527 wikiImageToLaTeX l
482528 = do st <- get
543589 addit st
544590 = if (getInTab st) > 0 then "" else
545591 (if not (micro st) then "\\vspace{0.75cm}" else "")
546
592
547593 {-DHUN| Returns the caption of a wikilink. Takes a parse tree representation of the wikilink and the current state of the renderer. Return the caption in LaTeX representation as string. A Wikilink is represented as [[FooBar]] in Wiki notation. DHUN-}
548
594
549595 wikiLinkCaption :: [Anything Char] -> MyState -> String
550596 wikiLinkCaption l st = if isCaption x then rebuild x else ""
551597 where x = (treeToLaTeX (last (splitOn [C '|'] l)) st)
552598 rebuild (':' : xs) = xs
553599 rebuild b = b
554
600
555601 {-DHUN| Returns the LaTeX representation of a wikilink. Takes a parse tree representation of the wikilink and the current state of the render. A Wikilink is represented as [[FooBar]] in Wiki notation. DHUN-}
556
602
557603 wikiLinkToLaTeX :: [Anything Char] -> MyState -> String
558604 wikiLinkToLaTeX l st
559605 = case
596642 killnl ('\n' : ('\n' : xs)) = killnl ('\n' : xs)
597643 killnl (x : xs) = x : (killnl xs)
598644 killnl [] = []
599
645
600646 {-DHUN| If repeated newlines appear in a string directly after each other. Each series of newlines is reduced to exactly one newline DHUN-}
601
647
602648 killnl2 :: String -> String
603649 killnl2 ('\n' : ('\n' : xs)) = killnl2 ('\n' : xs)
604650 killnl2 ('\n' : xs)
607653 post = (dropWhile (/= '\n') xs)
608654 killnl2 (x : xs) = x : (killnl2 xs)
609655 killnl2 [] = []
610
656
611657 {-DHUN| returns the caption of a link. A link is represented as [foobar.com mycaption] in wiki notation. It takes the parse tree representation of the link as first input parameter. The second input parameter is the current state of the renderer. The third parameter is the Uri scheme as string (See 'URI scheme' in the English wikipeda) usually this is 'http'. It returns the latex representation of the caption of the link as string DHUN-}
612
658
613659 linkCaption ::
614660 [Anything Char] -> MyState -> String -> Bool -> String
615661 linkCaption l st s b
616662 = case spl of
617663 (_ : (gg : gs)) -> (treeToLaTeX
618 (concat (gg:(map (\ x -> (C ' ') : x) gs)))
664 (concat (gg : (map (\ x -> (C ' ') : x) gs)))
619665 st)
620666 _ -> if b then "" else s ++ (escapelink (linkLocation l))
621667 where spl = splitOn [C ' '] l
622
668
623669 {-DHUN| returns the latex representation of a link. A link is represented as [foobar.com mycaption] in wiki notation. It takes the parse tree representation of the link as first input parameter. The second input parameter is the current state of the renderer. The third parameter is the Uri scheme as string (See 'URI scheme' in the English wikipeda) usually this is 'http'. It returns the latex representation of the link as string DHUN-}
624
670
625671 linkToLaTeX :: [Anything Char] -> MyState -> String -> String
626672 linkToLaTeX l st s
627673 = if
634680 where addit = if b then "fn" else ""
635681 b = getInFootnote st
636682 cap = (linkCaption l st s b)
637
683
638684 {-DHUN| takes a list and splits it into sublist of equal length, allowing a possible smaller length for the last list in case the devision does not create an integer result. DHUN-}
639
685
640686 splitToTuples :: [a] -> [[a]]
641687 splitToTuples x
642688 = map (take galleryNumberOfColumns) .
643689 takeWhile (not . null) . iterate (drop galleryNumberOfColumns)
644690 $ x
645
691
646692 {-DHUN| the number of column to be used in latex documents for mediawikis gallery (image gallery) (gallery tags) DHUN-}
647
693
648694 galleryNumberOfColumns :: Int
649695 galleryNumberOfColumns = 1
650
696
651697 {-DHUN| the width of a column for the table of the latex version of mediawikis gallery (image gallery, gallery tags) DHUN-}
652
698
653699 galleryTableScale :: Float
654700 galleryTableScale
655701 = (1.0 / (fromIntegral galleryNumberOfColumns)) - (scalefactor 1.0)
656
702
657703 {-DHUN| the latex string for a single column in table header in the latex version of mediawikis gallery (image gallery, gallery tag) DHUN-}
658
704
659705 galleryTableSpecifier :: String
660706 galleryTableSpecifier
661707 = concat $
662708 replicate galleryNumberOfColumns
663709 ">{\\RaggedRight}p{0.5\\linewidth}"
664
710
665711 {-DHUN| converts the inner parts gallery (image gallery, gallery tag) from parse tree notation to latex, does not write the latex table header and footer. This is only a helper function. Always use galleryToLatex if you want to convert a gallery to latex DHUN-}
666
712
667713 galleryContentToLatex :: [[Anything Char]] -> Renderer String
668714 galleryContentToLatex (x : xs)
669715 = do s <- galleryRowToLaTex x
670716 ss <- galleryContentToLatex xs
671717 return $ s ++ "\\\\ \n" ++ ss
672718 galleryContentToLatex [] = return []
673
719
674720 {-DHUN| converts a part of a gallery (image gallery, gallery tag) from parse tree to latex. A part are as many elements as fit into a single row in the resulting latex table DHUN-}
675
721
676722 galleryRowToLaTex :: [Anything Char] -> Renderer String
677723 galleryRowToLaTex [] = return []
678724 galleryRowToLaTex (x : []) = treeToLaTeX2 [x]
680726 = do s <- treeToLaTeX2 [x]
681727 g <- galleryRowToLaTex xs
682728 return $ s ++ "&" ++ g
683
729
684730 {-DHUN| Converts are gallery (image gallery, gallery tag) from parse tree to latex. Also writes table header and footer. This is the function you should use for converting galleries to latex DHUN-}
685
731
686732 galleryToLatex :: [Anything Char] -> Renderer String
687733 galleryToLatex x
688734 = do st <- get
695741 return
696742 ("\\begin{longtable}{" ++
697743 galleryTableSpecifier ++ "} \n" ++ s ++ "\\end{longtable}")
698
744
699745 {-DHUN| A function to drop all unnecessary elements of an HTML image map, so that it can be converted to latex by calling treeToLaTeX2 DHUN-}
700
746
701747 imageMapClean :: [Anything Char] -> [Anything Char]
702748 imageMapClean ((Environment Wikilink s l) : xs)
703749 = (Environment Wikilink s l) : imageMapClean xs
704750 imageMapClean (_ : xs) = imageMapClean xs
705751 imageMapClean [] = []
706
752
707753 {-DHUN| Takes the parse tree representation of an image, and returns the its size in unit of the width of a line in latex. Images 400px or wider in wiki notation are understood to use the full width of the line. Smaller one are considered fractionally. That means 100px means 0.25 the width of the line and 200px means 0.5 width of the line DHUN-}
708
754
709755 imageSize :: [Anything Char] -> Float
710756 imageSize l = if [] == x then 1.0 else (minimum x)
711757 where x = map readImageSize (imageSizeStrings (shallowFlatten l))
712
758
713759 {-DHUN| takes a candidate string for the width of an image in the wikis px notation. Returns 1.0 if the candidate could not be parsed, returns the width of the image in units of the width of a line in latex otherwise. Images 400px or wider in wiki notation are understood to use the full width of the line. Smaller one are considered fractionally. That means 100px means 0.25 the width of the line and 200px means 0.5 width of the line DHUN-}
714
760
715761 readImageSize :: String -> Float
716762 readImageSize y
717763 = case (reads x) of
720766 where x = removex y
721767 removex ('x' : zs) = zs
722768 removex z = z
723
769
724770 {-DHUN| takes a flattend version of a parse tree represendtation of an image and retruns a list of substrings which are candidates for representing the width of the image in the wikis px notation DHUN-}
725
771
726772 imageSizeStrings :: String -> [String]
727773 imageSizeStrings s
728774 = [take (length (x) - 2) (x) |
729775 x <- ((splitOn ['|'] s) :: [String]), isSuffixOf "px" x]
730
776
731777 {-DHUN| converts a mathematical fomula from the wiki to latex notation DHUN-}
732
778
733779 mathToLatex :: [Anything Char] -> String
734780 mathToLatex l
735781 = if isInfixOf2 "\\begin{alignat}" (shallowFlatten l) then
736782 mathTransform l else "{$" ++ (mathTransform l) ++ "$}"
737
783
738784 {-DHUN| a predicate that returns true if and only if the input is a parse tree that contains only spaces but no other structures DHUN-}
739
785
740786 onlySpaces :: [Anything Char] -> Bool
741787 onlySpaces ((C ' ') : xs) = onlySpaces xs
742788 onlySpaces [] = True
743789 onlySpaces _ = False
744
790
745791 {-DHUN| in the wiki notation pipe (|)inside temples are escaped as {!} and double pipes as {!!}}. this function undoes this escaping in a parse tree DHUN-}
746
792
747793 prepateParameter :: [Anything Char] -> [Anything Char]
748794 prepateParameter ((Environment Template _ [C '!']) : xs)
749795 = (C '|') : prepateParameter xs
751797 = (C '|') : (C '|') : prepateParameter xs
752798 prepateParameter (x : xs) = x : prepateParameter xs
753799 prepateParameter [] = []
754
800
755801 {-DHUN| this function prepares a template as parse by the parser in the parse tree notation into a other notation that can be further processes by the latex renderer and the function templateToLatex in particular. The first input parameter is the parse tree notation of the template. The second is the name of the template as string. It returns a tuple, the first element of this tuple is the name of the template (stripped of heading an tailing white space) and the second element of the tuple is a map from strings to parse trees. The strings are the names of the parameters of the template. These might be just numbers represented as string but also any other strings DHUN-}
756
802
757803 prepateTemplate ::
758804 [Anything Char] -> String -> (String, Map String [Anything Char])
759805 prepateTemplate ll x = (trim x, enum ll 1 (Map.fromList []))
760 where
761 enum ::
806 where enum ::
762807 [Anything Char] ->
763808 Integer -> Map String [Anything Char] -> Map String [Anything Char]
764809 enum ((Environment TemplateInside (Str "") l) : zs) i d
767812 = enum zs i (Map.insert (trim z) (prepateParameter l) d)
768813 enum [] _ d = d
769814 enum (_ : zs) i d = enum zs i d
770
815
771816 {-DHUN| converts a template from the wiki to latex. The first parameter is the parse tree representation of the template as generated by the parse the second is the name of the template. It returns a Renderer String. That is it returns the latex representation of the template, but also takes a state as an additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-}
772
817
773818 templateToLatex :: [Anything Char] -> String -> Renderer String
774819 templateToLatex l s
775820 = state $
785830 st)
786831 ((C 'B') : ((C '|') : xs)) -> (wikiLinkToLaTeX xs st, st)
787832 _ -> swap $ templateProcessor st (prepateTemplate l s)
788
789
790
791833
792834 mnfindentlatex :: Map String [Anything Char] -> Renderer String
793 mnfindentlatex ll = do one<-treeToLaTeX2 (Map.findWithDefault [] "1" ll)
794 st<-get
795 return ("\n\\begin{" ++ (itemEnvironmentName ":" (getF st)) ++ "}" ++ (itemEnvironmentParameters ":" (getF st)) ++ "\n\\item{}" ++ one ++ "\n\\end{" ++ (itemEnvironmentName ":" (getF st)) ++ "}\n")
796
835 mnfindentlatex ll
836 = do one <- treeToLaTeX2 (Map.findWithDefault [] "1" ll)
837 st <- get
838 return
839 ("\n\\begin{" ++
840 (itemEnvironmentName ":" (getF st)) ++
841 "}" ++
842 (itemEnvironmentParameters ":" (getF st)) ++
843 "\n\\item{}" ++
844 one ++ "\n\\end{" ++ (itemEnvironmentName ":" (getF st)) ++ "}\n")
797845
798846 {-DHUN| function to converts wikipedias citearticle template to latex DHUN-}
799
847
800848 citearticle :: Map String [Anything Char] -> Renderer String
801849 citearticle ll
802850 = state $
843891 (treeToLaTeX (Map.findWithDefault [] "month" ll) st) else
844892 "")
845893 ++
846 (treeToLaTeX (Map.findWithDefault [] "year" ll) st) ++
894 (treeToLaTeX (Map.findWithDefault [] "year" ll) st) ++
847895 (if Map.member "url" ll then
848 (treeToLaTeX (Map.findWithDefault [] "url" ll) st) else
849 "")++"\n", st)
850
896 (treeToLaTeX (Map.findWithDefault [] "url" ll) st)
897 else "")
898 ++ "\n",
899 st)
900
851901 {-DHUN| removes source structures from a parse tree, keeping the source inside the source structure in the parse tree, so flattens out the source structure. You need this if some parse tree contains source code but you don't know whether or not it is inside a source tag DHUN-}
852
902
853903 flattensource :: [Anything Char] -> [Anything Char]
854904 flattensource ((Environment Source (TagAttr _ _) l) : xs)
855905 = l ++ (flattensource xs)
856906 flattensource (x : xs) = x : (flattensource xs)
857907 flattensource [] = []
858
908
859909 {-DHUN| prepare code for printing in latex. takes the current state of the renderer as first parameter. takes the map version of the template containing the code as second parameter. returns the latex representation as string DHUN-}
860
910
861911 trilex :: MyState -> Map String [Anything Char] -> String
862912 trilex st ll = trilexgen st ll "code"
863
913
864914 {-DHUN| prepare code for printing in latex. Takes the map version of the template containing the code as first parameter. returns the latex representation as string. It returns a Render String so that it can access the current state of the renderer as additional monadic input parameter DHUN-}
865
915
866916 trilex2 :: Map String [Anything Char] -> Renderer String
867917 trilex2 ll
868918 = do st <- get
869919 return $ trilexgen st ll "code"
870
920
871921 {-DHUN| prepare code for printing in latex. takes the current state of the renderer as first parameter. takes the map version of the template containing the code as second parameter. takes the name of the parameter containing the actual source in the map as third parameter. returns the latex representation as string DHUN-}
872
922
873923 trilexgen ::
874924 MyState -> Map String [Anything Char] -> String -> String
875925 trilexgen st ll code
879929 (killnewline (flattensource (Map.findWithDefault [] code ll))))
880930 st)
881931 else ""
882 where
883 killnewline :: [Anything Char] -> [Anything Char]
932 where killnewline :: [Anything Char] -> [Anything Char]
884933 killnewline ((C '\n') : xs) = killnewline xs
885934 killnewline x = x
886
935
887936 {-DHUN| analyzes a color in HTML notation. It returns a triple. The first element is a boolean. If it is true the color has got rgb hex notation and the third parameter contains the rgb latex notation. If it is false, the color is not rgb and hopefully a HTML color name, which is returned a second element of the tuple DHUN-}
888
937
889938 colinfo :: String -> (Bool, String, String)
890939 colinfo colcode
891940 = case col of
907956 of
908957 Just x -> x
909958 Nothing -> colcode
910
959
911960 ss :: String -> [Integer]
912961 ss (a : (b : xs)) = (maybeToList . unhex $ [a, b]) ++ (ss xs)
913962 ss _ = []
914
963
915964 ss3 :: String -> [Integer]
916965 ss3 (a : xs) = (maybeToList . unhex $ [a, a]) ++ (ss3 xs)
917966 ss3 _ = []
918
967
919968 ss2 :: [Integer] -> [Float]
920969 ss2 (x : xs) = ((fromInteger x) / 255.0) : ss2 xs
921970 ss2 [] = []
922971 ss4 x = if (((length . ss) x) == 3) then ss x else ss3 x
923
972
924973 prettyp2 :: [String] -> String
925974 prettyp2 (x : []) = x
926975 prettyp2 (x : xs) = x ++ "," ++ (prettyp2 xs)
927976 prettyp2 [] = []
928
977
929978 prettyp :: [String] -> String
930979 prettyp x = "{" ++ (prettyp2 x) ++ "}"
931
980
932981 makecol :: Maybe String -> Maybe String
933982 makecol x
934983 = do xx <- x
936985 return $
937986 prettyp ((map (printf "%0.5f") ((ss2 . ss4) xx)) :: [String])
938987 mypred x = (((length . ss) x) == 3) || (((length . ss3) x) == 3)
939
988
940989 {-DHUN| and adapter to convert between the monadic and non monadic version of render. A function returning renderer string means that it returns a string but takes a state as additional monadic input parameter and returns the a possibly modified version of it as an additional monadic output parameter. This function takes a monadic return value. That is renderer String and returns its non monadic version. DHUN-}
941
990
942991 tempProcAdapter ::
943992 Renderer String -> (MyState -> (MyState, String))
944993 tempProcAdapter = (swap .) . runState
945
994
946995 {-DHUN| function for key strokes templates in the blender wikibook DHUN-}
947
996
948997 key :: [Char] -> [Char]
949998 key "AKEY" = "A"
950999 key "BKEY" = "B"
9761025 key "SEMICOLON" = ";"
9771026 key "NUM-" = "NUM-{}"
9781027 key x = x
979
1028
9801029 {-DHUN| converts a template to latex. Takes the current state of the render as first input parameter. The second input parameter is a tuple. Its first element is the name of the template as string. Its second element is a map, mapping the names of the parameters of the template to their parse tree representations, it returns a tuple. The First element is the possible change state of the renderer the second one is the latex representation of the template DHUN-}
981
1030
9821031 templateProcessor ::
9831032 MyState ->
9841033 (String, Map String [Anything Char]) -> (MyState, String)
9861035 = (st,
9871036 "Main Page: " ++
9881037 (wikiLinkToLaTeX (Map.findWithDefault [] "1" ll) st))
989
990
991 templateProcessor st ("#invoke:Mathe für Nicht-Freaks/Seite", _)
1038 templateProcessor st ("#invoke:Mathe f\252r Nicht-Freaks/Seite", _)
9921039 = (st, "")
993 templateProcessor st ("#invoke:Liste", _)
994 = (st, "")
995
1040 templateProcessor st ("#invoke:Liste", _) = (st, "")
9961041 templateProcessor st ("!", _) = (st, "|")
9971042 templateProcessor st ("!!", _) = (st, "||")
9981043 templateProcessor st
11221167 where go (C x) = [x]
11231168 go _ = []
11241169 templateProcessor st ("Komplexe Zahlen/ Vorlage:Formel", ll)
1125 = (st, (treeToLaTeX (shallowEnlargeMath (Map.findWithDefault [] "1" ll)) st) )
1170 = (st,
1171 (treeToLaTeX (shallowEnlargeMath (Map.findWithDefault [] "1" ll))
1172 st))
11261173 templateProcessor st ("Ada/95/RM", ll)
11271174 = (st,
11281175 (linkToLaTeX
12511298 (treeToLaTeX (Map.findWithDefault [] "Aufgabe" ll) st) ++
12521299 "\n {\\bfseries Musterl\246sung} \n" ++
12531300 (treeToLaTeX (Map.findWithDefault [] "L\246sung" ll) st) ++ " \n"))
1254 templateProcessor st ("-", ll) = (tempProcAdapter $ mnfindentlatex ll) st
1301 templateProcessor st ("-", ll)
1302 = (tempProcAdapter $ mnfindentlatex ll) st
12551303 templateProcessor st ("Haskell speaker 2", ll) = (st, param "1")
12561304 where param n = (treeToLaTeX (Map.findWithDefault [] n ll) st)
12571305 templateProcessor st ("Vorlage:LaTeX Mehrspaltig Anfang", ll)
12701318 templateProcessor st ("Referenzbox Internet", ll)
12711319 = (st,
12721320 "{\\bfseries Themenbezogene} " ++
1273 (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++(map C "|Webangebote"))] st))
1321 (treeToLaTeX
1322 [Environment Wikilink (Str "")
1323 ((Map.findWithDefault [] "1" ll) ++ (map C "|Webangebote"))]
1324 st))
12741325 templateProcessor st ("Vorlage:Referenzbox Internet", ll)
12751326 = (st,
12761327 "{\\bfseries Themenbezogene} " ++
1277 (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++(map C "|Webangebote"))] st))
1328 (treeToLaTeX
1329 [Environment Wikilink (Str "")
1330 ((Map.findWithDefault [] "1" ll) ++ (map C "|Webangebote"))]
1331 st))
12781332 templateProcessor st ("Referenzbox IntraReihe", ll)
12791333 = (st,
12801334 "{\\bfseries Zum anderen Band der " ++
1281 (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++[C '|']++(Map.findWithDefault [] "2" ll))] st)++"}"++(treeToLaTeX (Map.findWithDefault [] "3" ll) st) )
1282
1335 (treeToLaTeX
1336 [Environment Wikilink (Str "")
1337 ((Map.findWithDefault [] "1" ll) ++
1338 [C '|'] ++ (Map.findWithDefault [] "2" ll))]
1339 st)
1340 ++ "}" ++ (treeToLaTeX (Map.findWithDefault [] "3" ll) st))
12831341 templateProcessor st ("Vorlage:Referenzbox IntraReihe", ll)
12841342 = (st,
12851343 "{\\bfseries Zum anderen Band der " ++
1286 (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++[C '|']++(Map.findWithDefault [] "2" ll))] st)++"}"++(treeToLaTeX (Map.findWithDefault [] "3" ll) st) )
1344 (treeToLaTeX
1345 [Environment Wikilink (Str "")
1346 ((Map.findWithDefault [] "1" ll) ++
1347 [C '|'] ++ (Map.findWithDefault [] "2" ll))]
1348 st)
1349 ++ "}" ++ (treeToLaTeX (Map.findWithDefault [] "3" ll) st))
12871350 templateProcessor st ("unicode", ll)
12881351 = (st, (treeToLaTeX (Map.findWithDefault [] "1" ll) st))
12891352 templateProcessor st
13971460 where heading
13981461 = if Map.member "info" ll then
13991462 treeToLaTeX (Map.findWithDefault [] "info" ll) st else "Hinweis"
1400 templateProcessor st ("Wie_mein_Buch_auf_die_Welt_kommt/_Vorlage:Zitat", ll) = templateProcessor st ("Zitat", ll)
1463 templateProcessor st
1464 ("Wie_mein_Buch_auf_die_Welt_kommt/_Vorlage:Zitat", ll)
1465 = templateProcessor st ("Zitat", ll)
14011466 templateProcessor st ("Zitat", ll)
14021467 = (st,
14031468 "\\begin{longtable}{|p{\\linewidth}|}\\hline\n" ++
14071472 else "")
14081473 ++
14091474 " \\uline{" ++
1410 (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Autor" ll) "autor" ll)) st) ++
1475 (treeToLaTeX
1476 ((Map.findWithDefault (Map.findWithDefault [] "Autor" ll) "autor"
1477 ll))
1478 st)
1479 ++
14111480 "}\\\\\\textit{\n" ++
1412 (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Zitat" ll) "zitat" ll)) st) ++
1481 (treeToLaTeX
1482 ((Map.findWithDefault (Map.findWithDefault [] "Zitat" ll) "zitat"
1483 ll))
1484 st)
1485 ++
14131486 "}\\scriptsize \\\\ \\RaggedLeft \\scriptsize \n" ++
1414 (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Quelle" ll) "quelle" ll)) st) ++
1415 "\\\\ \\hline \n\\end{longtable}\n")
1487 (treeToLaTeX
1488 ((Map.findWithDefault (Map.findWithDefault [] "Quelle" ll) "quelle"
1489 ll))
1490 st)
1491 ++ "\\\\ \\hline \n\\end{longtable}\n")
14161492 templateProcessor st ("java web api", ll)
14171493 = (st,
14181494 "\\myhref{http://java.sun.com/javase/6/docs/api/" ++
14191495 loc ++ "}{" ++ cap ++ "}")
1420 where
1421 loc :: String
1496 where loc :: String
14221497 loc = (shallowFlatten (Map.findWithDefault [] "1" ll))
1423
1498
14241499 cap :: String
14251500 cap
14261501 = "Java API: " ++ (treeToLaTeX (Map.findWithDefault [] "2" ll) st)
14681543 st)
14691544 ++ "}}\n")
14701545 templateProcessor st ("BigJava", ll)
1471 = tempProcAdapter (do x<-return ((replace2 (replace2 ("{\\ttfamily { " ++ ((formatLaTeXBlock defaultFormatOpts) (highlightAs "java" (shallowFlatten (map renormalize (breakLines3 96 (Map.findWithDefault [] "code" ll))))))++ "}}\n" )"\\begin{Shaded}" "") "\\end{Shaded}" ""))
1472 lll<-mapM doFonts x
1473 return (concat lll) ) st
1546 = tempProcAdapter
1547 (do x <- return
1548 ((replace2
1549 (replace2
1550 ("{\\ttfamily { " ++
1551 ((formatLaTeXBlock defaultFormatOpts)
1552 (highlightAs "java"
1553 (shallowFlatten
1554 (map renormalize
1555 (breakLines3 96 (Map.findWithDefault [] "code" ll))))))
1556 ++ "}}\n")
1557 "\\begin{Shaded}"
1558 "")
1559 "\\end{Shaded}"
1560 ""))
1561 lll <- mapM doFonts x
1562 return (concat lll))
1563 st
14741564 templateProcessor st
14751565 ("C++-Programmierung/ Vorlage:Kapitelanhang", ll)
14761566 = (st,
14921582 templateProcessor st ("code:Output", ll)
14931583 = (st,
14941584 ("{{" ++
1495
1496 (if (Map.member "1" ll) then
1497 "{ {" ++
1498 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll))
1499 st)
1585 (if (Map.member "1" ll) then
1586 "{ {" ++
1587 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) st)
1588 ++ "}}"
1589 else "")
1590 ++
1591 "}}\n$\\text{ }$\\newline{}\n{\\bfseries Code}\\newline{}" ++
1592 (if (Map.member "2" ll) then
1593 "{\\ttfamily {\\scriptsize" ++
1594 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) st)
15001595 ++ "}}"
15011596 else "")
1597 ++
1598 (if (Map.member "3" ll) then
1599 "\n{\\bfseries Output}\\newline{}{\\ttfamily {\\scriptsize" ++
1600 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) st)
1601 ++ "}}"
1602 else "")))
1603 templateProcessor st ("bcode:Example", ll)
1604 = (st,
1605 ("{\\bfseries Code}\\newline{}{\\ttfamily {\\scriptsize" ++
1606 (if (Map.member "1" ll) then
1607 "{\\ttfamily {\\scriptsize" ++
1608 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) st)
1609 ++ "}}"
1610 else "")
15021611 ++
1503
1504 "}}\n$\\text{ }$\\newline{}\n{\\bfseries Code}\\newline{}" ++
1612 "}}\n" ++
15051613 (if (Map.member "2" ll) then
15061614 "{\\ttfamily {\\scriptsize" ++
1507 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll))
1508 st)
1615 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) st)
15091616 ++ "}}"
15101617 else "")
1511 ++
1512 (if (Map.member "3" ll) then
1513 "\n{\\bfseries Output}\\newline{}{\\ttfamily {\\scriptsize" ++
1514 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll))
1515 st)
1516 ++ "}}"
1517 else "")))
1518
1519 templateProcessor st ("bcode:Example", ll)
1520 = (st,
1521 ("{\\bfseries Code}\\newline{}{\\ttfamily {\\scriptsize" ++
1522
1523 (if (Map.member "1" ll) then
1524 "{\\ttfamily {\\scriptsize" ++
1525 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll))
1526 st)
1527 ++ "}}"
1528 else "")
1529 ++
1530
1531 "}}\n" ++
1532 (if (Map.member "2" ll) then
1533 "{\\ttfamily {\\scriptsize" ++
1534 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll))
1535 st)
1536 ++ "}}"
1537 else "")
1538 ++
1539 (if (Map.member "3" ll) then
1540 "{\\ttfamily {\\scriptsize" ++
1541 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll))
1542 st)
1543 ++ "}}"
1544 else "")))
1545
1618 ++
1619 (if (Map.member "3" ll) then
1620 "{\\ttfamily {\\scriptsize" ++
1621 (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) st)
1622 ++ "}}"
1623 else "")))
15461624 templateProcessor st ("C++-Programmierung/ Vorlage:Code", ll)
15471625 = (st,
15481626 ("{\\ttfamily {\\scriptsize" ++
15591637 st)
15601638 ++ "}}"
15611639 else "")))
1562
15631640 templateProcessor st ("cite web", ll) = (st, mainer)
15641641 where mainer
15651642 = "\\myhref{" ++
15721649 accessdate
15731650 = treeToLaTeX (Map.findWithDefault [] "accessdate" ll) st
15741651 templateProcessor st ("code", ll) = (st, mainer)
1575 where
1576 mainer :: String
1652 where mainer :: String
15771653 mainer
15781654 = "\\TemplateCode{" ++
15791655 header ++
15801656 "}{" ++ "}{" ++ "}{" ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}"
1581
1657
15821658 header :: String
15831659 header
15841660 = if Map.member "header" ll then
15851661 (treeToLaTeX (Map.findWithDefault [] "header" ll)
15861662 st{getInCode = True})
15871663 else ""
1588
1664
15891665 lang :: String
15901666 lang
15911667 = if Map.member "lang" ll then
15931669 st{getInCode = True})
15941670 ++ " Source"
15951671 else ""
1596
1672
15971673 code :: String
15981674 code = trilexgen st{getInCode = True} ll "source"
15991675 templateProcessor st ("Java_Code_File", ll) = (st, mainer)
1600 where
1601 mainer :: String
1676 where mainer :: String
16021677 mainer
16031678 = "\\TemplateCode{" ++
16041679 header ++
16051680 "}{" ++ "}{" ++ "}{" ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}"
1606
1681
16071682 header :: String
16081683 header
16091684 = if Map.member "header" ll then
16101685 (treeToLaTeX (Map.findWithDefault [] "header" ll)
16111686 st{getInCode = True})
16121687 else ""
1613
1688
16141689 lang :: String
16151690 lang = if Map.member "lang" ll then "Java Source" else ""
1616
1691
16171692 code :: String
16181693 code = trilexgen st{getInCode = True} ll "source"
16191694 templateProcessor st ("Syntax", ll) = (st, mainer)
1620 where
1621 mainer :: String
1695 where mainer :: String
16221696 mainer
16231697 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16241698 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1625
1699
16261700 code :: String
16271701 code = trilex st{getInCode = True} ll
16281702 templateProcessor st ("syntax", ll) = (st, mainer)
1629 where
1630 mainer :: String
1703 where mainer :: String
16311704 mainer
16321705 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16331706 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1634
1707
16351708 code :: String
16361709 code = trilex st{getInCode = True} ll
16371710 templateProcessor st ("HaskellGHCi", ll) = (st, mainer)
1638 where
1639 mainer :: String
1711 where mainer :: String
16401712 mainer
16411713 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16421714 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1643
1715
16441716 code :: String
16451717 code = trilexgen st{getInCode = True} ll "1"
16461718 templateProcessor st ("Java://", ll) = (st, mainer)
1647 where
1648 mainer :: String
1719 where mainer :: String
16491720 mainer
16501721 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16511722 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1652
1723
16531724 code :: String
16541725 code = trilexgen st{getInCode = True} ll "1"
16551726 templateProcessor st ("LaTeX/Usage", ll)
16731744 (x, st2)
16741745 = runState (treeToLaTeX2 (Map.findWithDefault [] "render" ll)) st
16751746 templateProcessor st ("java://", ll) = (st, mainer)
1676 where
1677 mainer :: String
1747 where mainer :: String
16781748 mainer
16791749 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16801750 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1681
1751
16821752 code :: String
16831753 code = trilexgen st{getInCode = True} ll "1"
16841754 templateProcessor st ("java", ll) = (st, mainer)
1685 where
1686 mainer :: String
1755 where mainer :: String
16871756 mainer
16881757 = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n"
16891758 ++ code ++ "\n\\end{TemplateCodeInside}\n"
1690
1759
16911760 code :: String
16921761 code = trilex st{getInCode = True} ll
16931762 templateProcessor st ("DOI", ll)
18131882 = (tempProcAdapter $ citearticle ll) st
18141883 templateProcessor st ("cite news", ll)
18151884 = (tempProcAdapter $ citearticle ll) st
1816
18171885 templateProcessor st ("Druckversionsnotiz", _) = (st, "")
1818
18191886 templateProcessor st ("meta", ll)
18201887 = (st,
18211888 wikiLinkToLaTeX
18521919 (((C 's') : (C ':') : (Map.findWithDefault [] "1" ll)) ++
18531920 [C '|'] ++ (Map.findWithDefault [] "2" ll))
18541921 st)
1855
18561922 templateProcessor st ("wiktionary", ll)
18571923 = (st,
18581924 wikiLinkToLaTeX
1859 ((C 'w') :(C 'i'): (C 'k'): (C 't') : (C ':') : (Map.findWithDefault [] "1" ll))
1925 ((C 'w') :
1926 (C 'i') :
1927 (C 'k') : (C 't') : (C ':') : (Map.findWithDefault [] "1" ll))
18601928 st)
1861 templateProcessor st ("indent", ll)
1862 = (st, go)
1863 where
1864 go = case reads (shallowFlatten (Map.findWithDefault [] "1" ll)) :: [(Integer,String)] of
1865 [(n, _)] -> "\\newline{}"++(concat (genericReplicate n "{$\\text{ }$}"))
1929 templateProcessor st ("indent", ll) = (st, go)
1930 where go
1931 = case
1932 reads (shallowFlatten (Map.findWithDefault [] "1" ll)) ::
1933 [(Integer, String)]
1934 of
1935 [(n, _)] -> "\\newline{}" ++
1936 (concat (genericReplicate n "{$\\text{ }$}"))
18661937 _ -> "\\newline{}"
18671938 templateProcessor st ("wikipedia", ll)
18681939 = (st,
19021973 templateProcessor st ("Wiktionary", ll)
19031974 = (st,
19041975 wikiLinkToLaTeX
1905 ((map C "wiktionary") ++ ((C ':') : (Map.findWithDefault [] "1" ll)))
1976 ((map C "wiktionary") ++
1977 ((C ':') : (Map.findWithDefault [] "1" ll)))
19061978 st)
1907
19081979 templateProcessor st ("B3D:N2P/VTT1", ll) = (st2, r)
19091980 where (r, st2)
19101981 = runState
19602031 ll))
19612032 st)
19622033 templateProcessor st ("Kasten", ll)
1963 = (st, "\\LaTeXZeroBoxTemplate{"++(
1964 treeToLaTeX
1965 (Map.findWithDefault (Map.findWithDefault [] "inhalt" ll) "1" ll)
1966 st)++"}")
1967
2034 = (st,
2035 "\\LaTeXZeroBoxTemplate{" ++
2036 (treeToLaTeX
2037 (Map.findWithDefault (Map.findWithDefault [] "inhalt" ll) "1" ll)
2038 st)
2039 ++ "}")
19682040 templateProcessor st ("Druckversion Titeleintrag", ll)
19692041 = (st,
19702042 "\\pagebreak{}\\begin{longtable}{|p{0.3\\linewidth}|p{0.7\\linewidth}|}\\hline\n"
20682140 = (tempProcAdapter $ javakeyword xs ll "Java:") st
20692141 templateProcessor st ("Haskell lib", ll)
20702142 = (st, linkToLaTeX link st "")
2071 where
2072 param :: String -> Maybe [Anything Char]
2143 where param :: String -> Maybe [Anything Char]
20732144 param name = Map.lookup name ll
20742145 package
20752146 = fromMaybe (map (C) "base") $ param "p" `mplus` param "package"
20762147 version
20772148 = fromMaybe (map (C) "4.1.0.0") $ param "v" `mplus` param "version"
2078
2149
20792150 unnamed :: Integer -> [Anything Char]
20802151 unnamed i = fromMaybe [] $ param (show i)
20812152 unnPars = takeWhile (not . null) $ map unnamed [1 ..]
21082179 "\\myhref{http://commons.wikimedia.org/wiki/" ++
21092180 loc ++ "}{" ++ cap ++ "}")
21102181 where loc = (shallowFlatten (Map.findWithDefault [] "1" ll))
2111
2182
21122183 cap :: String
21132184 cap
21142185 = if (Map.member "2" ll) then
21192190 "\\myhref{http://commons.wikimedia.org/wiki/" ++
21202191 loc ++ "}{" ++ cap ++ "}")
21212192 where loc = (shallowFlatten (Map.findWithDefault [] "1" ll))
2122
2193
21232194 cap :: String
21242195 cap
21252196 = if (Map.member "2" ll) then
21302201 "\\myhref{http://commons.wikimedia.org/wiki/Category:" ++
21312202 loc ++ "}{" ++ cap ++ "}")
21322203 where loc = (shallowFlatten (Map.findWithDefault [] "1" ll))
2133
2204
21342205 cap :: String
21352206 cap
21362207 = if (Map.member "2" ll) then
21412212 "\\myhref{http://commons.wikimedia.org/wiki/" ++
21422213 loc ++ "}{" ++ cap ++ "}")
21432214 where loc = (shallowFlatten (Map.findWithDefault [] "1" ll))
2144
2215
21452216 cap :: String
21462217 cap
21472218 = if (Map.member "2" ll) then
21642235 st)
21652236 templateProcessor st ("Reaktion", ll)
21662237 = (st, edukte ++ " $\\rightarrow$ " ++ produkte)
2167 where
2168 reput :: [String] -> [[Anything Char]] -> [[Anything Char]]
2238 where reput :: [String] -> [[Anything Char]] -> [[Anything Char]]
21692239 reput (k : ks) out
21702240 = if Map.member k ll then
21712241 reput ks ((Map.findWithDefault [] k ll) : out) else reput ks out
21722242 reput [] out = out
2173
2243
21742244 myjoin :: [[Anything Char]] -> String -> String
21752245 myjoin (x : xs) acu
21762246 = if xs == [] then
21892259 = templateProcessor st ("C++-Programmierung/ Vorlage:Code", ll)
21902260 templateProcessor st ("Regal:Programmierung: Vorlage:Code", ll)
21912261 = (st, mainer)
2192 where
2193 mainer :: String
2262 where mainer :: String
21942263 mainer
21952264 = "\\TemplateCode{" ++
21962265 header ++
21992268 "}{" ++
22002269 marker ++
22012270 "}{}{" ++ output ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}"
2202
2271
22032272 marker :: String
22042273 marker
22052274 = if Map.member "error" ll then "e" else
22062275 if Map.member "valid" ll then "valid" else ""
2207
2276
22082277 header :: String
22092278 header
22102279 = if Map.member "kopf" ll then
22112280 (treeToLaTeX (Map.findWithDefault [] "kopf" ll)
22122281 st{getInCode = True})
22132282 else ""
2214
2283
22152284 lang :: String
22162285 lang
22172286 = if Map.member "lang" ll then
22192288 st{getInCode = True})
22202289 ++ " Quelltext"
22212290 else ""
2222
2291
22232292 code :: String
22242293 code = trilex st{getInCode = True} ll
2225
2294
22262295 output :: String
22272296 output
22282297 = if Map.member "output" ll then
22292298 (treeToLaTeX (killnbsp (Map.findWithDefault [] "output" ll))
22302299 st{getInCode = True})
22312300 else ""
2232
2301
22332302 footer :: String
22342303 footer
22352304 = if Map.member "fuss" ll then
22402309 killnbsp x = x
22412310 templateProcessor st (x, ll)
22422311 = (tempProcAdapter $ unknownTemplate x ll) st
2243
2312
22442313 {-DHUN| This function is currently nearly unused, it is essentially the same as templateProcessor. But with this way of writing it down you can find out for which templates handler functions are registered. DHUN-}
2245
2314
22462315 templateRegistry ::
22472316 [(String, Map String [Anything Char] -> Renderer String)]
22482317 templateRegistry
22492318 = [("Regal:Programmierung: Vorlage:Code",
22502319 \ ll ->
2251 let
2252 marker :: String
2320 let marker :: String
22532321 marker
22542322 = if Map.member "error" ll then "e" else
22552323 if Map.member "valid" ll then "valid" else ""
22772345 "}{" ++
22782346 marker ++
22792347 "}{" ++ output ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}")]
2280
2348
22812349 {-DHUN| processing of Java keywords for the English wikibook on Java, each Java keyword got its own template there DHUN-}
2282
2350
22832351 javakeyword ::
22842352 [Char] -> Map String [Anything Char] -> [Char] -> Renderer String
22852353 javakeyword xs ll j
22942362 "abstract", "false", "else", "switch", "continue", "import",
22952363 "final", "break", "implements", "finally", "while", "string",
22962364 "float", "do", "for", "case", "default", "package", "this"])
2297
2365
22982366 {-DHUN| Handler for the unknown template. That is the ones that no handler was registered for DHUN-}
2299
2367
23002368 unknownTemplate ::
23012369 String -> Map String [Anything Char] -> Renderer String
23022370 unknownTemplate xx ll2
23242392 (drop 1 . nullinit $ show x) ++
23252393 "\n\n" ++ "{" ++ (intercalate "}{" uparams) ++ "}" ++ "\n\n"
23262394 in fromMaybe unknown_sf maybe_known_sf
2327
2395
23282396 {-DHUN| helper function to generate image numbers for image in image galleries (gallery tag in mediawiki) the fist input parameter is the start number, the second the end number. A list of all numbers between start and end is returned DHUN-}
2329
2397
23302398 generateGINsHelper :: Int -> Int -> [Int]
23312399 generateGINsHelper b e
23322400 = if b == e then [] else b : (generateGINsHelper (b + 1) e)
2333
2401
23342402 {-DHUN| function to generate image numbers for image in image galleries (gallery tag in mediawiki). The renderer start before the start of the gallery is given as first parameter. The state of the renderer after the end of the gallery is given as second parameter. A list containing the numbers of the images in the gallery is returned DHUN-}
2335
2403
23362404 generateGalleryImageNumbers :: MyState -> MyState -> [Int]
23372405 generateGalleryImageNumbers oldst newst
23382406 = generateGINsHelper (getJ oldst) (getJ newst)
2339
2407
23402408 {-DHUN| strips center tags of the parse tree keeping the data inside the center tags in the parse tree, so just flattens out the center tags DHUN-}
2341
2409
23422410 uncenter :: [Anything t] -> [Anything t]
23432411 uncenter ((Environment Tag (TagAttr "center" _) l) : xs)
23442412 = l ++ (uncenter xs)
23462414 = (Environment e s (uncenter l)) : (uncenter xs)
23472415 uncenter (x : xs) = x : (uncenter xs)
23482416 uncenter [] = []
2349
2350
23512417
23522418 doFonts :: Char -> Renderer String
23532419 doFonts c
2354 = do st <- get
2355 case (fontStack st) of
2356 (x : _) -> if (getFont x c) == (font st) then return [c]
2357 else
2358 do put st{font = (getFont x c)}
2359 return
2360 ((fontsetter (getFont x c)) ++ (fontstyler x) ++ [c])
2361 _ -> return [c]
2420 = do st <- get
2421 case (fontStack st) of
2422 (x : _) -> if (getFont x c) == (font st) then return [c] else
2423 do put st{font = (getFont x c)}
2424 return ((fontsetter (getFont x c)) ++ (fontstyler x) ++ [c])
2425 _ -> return [c]
23622426
23632427 {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns the latex representation of the tree as return value. This function should only be used internally in latex renderer since it does not generate the table of names references for the ref tags. DHUN-}
2364
2428
23652429 treeToLaTeX :: [Anything Char] -> MyState -> String
23662430 treeToLaTeX l states = fst $ runState (treeToLaTeX2 l) states
2367
2431
23682432 {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns a tuple. the first element is the latex representation of the tree. The second is the new state of the renders. Does one run before the actual run, in order to generate a table of names references for the ref tags in mediawiki. This function should be called by the main program after the parser. DHUN-}
2369
2433
23702434 treeToLaTeX3 :: [Anything Char] -> MyState -> (String, MyState)
23712435 treeToLaTeX3 l st = runState ttl2twice st
23722436 where ttl2twice
23742438 b <- get
23752439 put st{fndict = fndict b}
23762440 treeToLaTeX2 l
2377
2441
23782442 {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns the latex representation of the tree as Renderer String. So it actually takes the current state of the renderer as additional monadic input parameter and returns a possible modified version of it as additional monadic return parameter. This function should only be used internally in latex renderer since it does not generate the table of names references for the ref tags. DHUN-}
2379
2443
23802444 treeToLaTeX2 :: [Anything Char] -> Renderer String
23812445 treeToLaTeX2 ll
23822446 = do x <- allinfo
23832447 return $ concat x
2384 where
2385 allinfo :: Renderer [String]
2448 where allinfo :: Renderer [String]
23862449 allinfo = mapM nodeToLaTeX (removeBr ll)
2387
2450
23882451 walk :: String -> [Anything Char] -> String -> Renderer String
23892452 walk prefix l postfix
23902453 = do d <- treeToLaTeX2 l
23912454 return $ prefix ++ d ++ postfix
2392
2455
23932456 walktrim :: String -> [Anything Char] -> String -> Renderer String
23942457 walktrim prefix l postfix
23952458 = do st <- get
23982461 st2 <- get
23992462 put $ st2{getInHeading = getInHeading st}
24002463 return $ prefix ++ (trim d) ++ postfix
2401
2464
24022465 walkbf :: [Anything Char] -> Renderer String
24032466 walkbf l
24042467 = do st <- get
24052468 put $
2406 st{lastFontChanged=True, fontStack =
2469 st{lastFontChanged = True,
2470 fontStack =
24072471 ((fromMaybe
24082472 FontStyle{stylebase = Normal, bold = True, italic = False}
24092473 (maybeHead (fontStack st))){bold = True}
24122476 st2 <- get
24132477 put $ st2{fontStack = drop 1 (fontStack st2)}
24142478 return $ "{\\bfseries " ++ (trim d) ++ "}"
2415
2479
24162480 walkit :: [Anything Char] -> Renderer String
24172481 walkit l
24182482 = do st <- get
24192483 put $
2420 st{lastFontChanged=True,fontStack =
2484 st{lastFontChanged = True,
2485 fontStack =
24212486 ((fromMaybe
24222487 FontStyle{stylebase = Normal, bold = False, italic = True}
24232488 (maybeHead (fontStack st))){italic = True}
24262491 st2 <- get
24272492 put $ st2{fontStack = drop 1 (fontStack st2)}
24282493 return $ "{\\itshape " ++ (trim d) ++ "}"
2429
2494
24302495 walktt :: [Anything Char] -> Renderer String
24312496 walktt l
24322497 = do st <- get
24332498 put $
2434 st{lastFontChanged=True,fontStack =
2499 st{lastFontChanged = True,
2500 fontStack =
24352501 ((fromMaybe
24362502 FontStyle{stylebase = Mono, bold = False, italic = False}
24372503 (maybeHead (fontStack st))){stylebase = Mono}
24402506 st2 <- get
24412507 put $ st2{fontStack = drop 1 (fontStack st2)}
24422508 return $ "{\\ttfamily " ++ (trim d) ++ "}"
2443
2509
24442510 walkfn :: String -> [Anything Char] -> String -> Renderer String
24452511 walkfn prefix l postfix
24462512 = do st <- get
24492515 st2 <- get
24502516 put $ st2{getInFootnote = (getInFootnote st)}
24512517 return $ prefix ++ d ++ postfix
2452
2518
24532519 nodeToLaTeX :: Anything Char -> Renderer String
24542520 nodeToLaTeX (C c)
24552521 = do st <- get
24562522 case (fontStack st) of
24572523 (x : _) -> if (getFont x c) == (font st) then return (chartrans c)
24582524 else
2459 do put st{font = (getFont x c),lastFontChanged=(lastFontChanged st)&&( not (c==' ')) }
2525 do put
2526 st{font = (getFont x c),
2527 lastFontChanged = (lastFontChanged st) && (not (c == ' '))}
24602528 return
2461 ((if ((lastFontChanged st)&& (c==' ')) then "{$\\text{ }$}" else "")++(fontsetter (getFont x c)) ++ (fontstyler x) ++ (chartrans c))
2529 ((if ((lastFontChanged st) && (c == ' ')) then "{$\\text{ }$}"
2530 else "")
2531 ++
2532 (fontsetter (getFont x c)) ++
2533 (fontstyler x) ++ (chartrans c))
24622534 _ -> return (chartrans c)
24632535 nodeToLaTeX (Environment ForbiddenTag (Str s) _)
24642536 = return $ s >>= chartrans
25292601 = do prep <- treeToLaTeX2 . prepart $ v
25302602 post <- treeToLaTeX2 . postpart $ v
25312603 return (prep, post)
2532
2604
25332605 fulllist :: Renderer [(String, String)]
25342606 fulllist = mapM texit vv
2535
2607
25362608 vv :: [[Anything Char]]
25372609 vv = [x | x <- splitOn [Item ';'] l, x /= []]
25382610 prepart v = takeWhile ((/=) (C ':')) v
25402612 = case dropWhile ((/=) (C ':')) v of
25412613 (_ : xs) -> xs
25422614 x -> x
2543
2615
25442616 prolist :: [(String, String)] -> MyState -> String
25452617 prolist lis st
25462618 = do (prd, pod) <- lis
25762648 nodeToLaTeX (Item c) = return $ "\n" ++ (itemSeperator c) ++ " "
25772649 nodeToLaTeX (Environment Itemgroup _ l) = walk "" l ""
25782650 nodeToLaTeX (Environment Wikiheading (Str s) l)
2579 = do st<-get
2580 if (getInTab st) > 0 then walktrim ("{\\Large ") (uncenter l)
2581 ("}\n") else walktrim ("\\" ++ (getsec s) ++ "{") (uncenter l) ("}\n" ++ (getsecpost s))
2651 = do st <- get
2652 if (getInTab st) > 0 then
2653 walktrim ("{\\Large ") (uncenter l) ("}\n") else
2654 walktrim ("\\" ++ (getsec s) ++ "{") (uncenter l)
2655 ("}\n" ++ (getsecpost s))
25822656 nodeToLaTeX (Environment Tag (TagAttr ('h' : (x : [])) _) l)
25832657 = if x `elem` "123456" then
25842658 case reads [x] of
26052679 linkToLaTeX
26062680 ((map (C)
26072681 (case g of
2608 '/' : '/' : gx -> "http://" ++ gx
2682 '/' : ('/' : gx) -> "http://" ++ gx
26092683 '/' : _ -> wikiUrlDataToString (urld st) g
26102684 _ -> g))
26112685 ++ [C ' '] ++ l)
27702844 = do let g = case reverse l of
27712845 [] -> []
27722846 (x : xs) -> if x == (C '\n') then reverse xs else l
2773 let xg = case g of
2774 (C '\n'): xs -> xs
2775 _ -> g
2847 let xg
2848 = case g of
2849 (C '\n') : xs -> xs
2850 _ -> g
27762851 let f = shallowFlatten (map renormalize (breakLines3 linewidth xg))
27772852 let glines = (Map.lookup "line" a) /= Nothing
2778 let spg= splitOn "\n" f
2853 let spg = splitOn "\n" f
27792854 let spgl = length (show (length spg))
27802855 let lino = linenumbers spgl 1 (length spg)
2781 let newlines = if glines then intercalate "\n" (map (\(k,v)->k++" "++v) (zip lino spg)) else f
2782 d <- treeToLaTeX2 (breakLines3 linewidth (if glines then map C newlines else xg))
2856 let newlines
2857 = if glines then
2858 intercalate "\n" (map (\ (k, v) -> k ++ " " ++ v) (zip lino spg))
2859 else f
2860 d <- treeToLaTeX2
2861 (breakLines3 linewidth (if glines then map C newlines else xg))
27832862 st <- get
27842863 case
2785 do aa <- Map.lookup "lang" a
2786 guard (not (getInFootnote st))
2787 guard (not ((getInTab st) > 0))
2788 return aa
2789 of
2790 Just j -> do gg<-return $ '\n' :
2791 replace2 (replace2 (replace2
2792 (replace2
2793 (replace2
2794 (replace2
2864 do aa <- Map.lookup "lang" a
2865 guard (not (getInFootnote st))
2866 guard (not ((getInTab st) > 0))
2867 return aa
2868 of
2869 Just j -> do gg <- return $
2870 '\n' :
2871 replace2
27952872 (replace2
27962873 (replace2
27972874 (replace2
27982875 (replace2
27992876 (replace2
28002877 (replace2
2801 ((formatLaTeXBlock defaultFormatOpts)
2802 (highlightAs j newlines))
2803 "'"
2804 "\\textquotesingle{}")
2805 "\n"
2806 "\\newline\n")
2807 "{Shaded}\\newline\n"
2808 "{Shaded}\n")
2809 "{Highlighting}[]\\newline\n\\newline\n"
2878 (replace2
2879 (replace2
2880 (replace2
2881 (replace2
2882 (replace2
2883 ((formatLaTeXBlock
2884 defaultFormatOpts)
2885 (highlightAs j
2886 newlines))
2887 "'"
2888 "\\textquotesingle{}")
2889 "\n"
2890 "\\newline\n")
2891 "{Shaded}\\newline\n"
2892 "{Shaded}\n")
2893 "{Highlighting}[]\\newline\n\\newline\n"
2894 "{Highlighting}[]\n")
2895 "{Highlighting}\\newline\n"
2896 "{Highlighting}\n")
2897 " "
2898 "\\ensuremath{\\text{ }}")
2899 "%"
2900 "\\%")
2901 "$"
2902 "\\$")
2903 "{Highlighting}[]\\newline\n"
28102904 "{Highlighting}[]\n")
2811 "{Highlighting}\\newline\n"
2812 "{Highlighting}\n")
2813 " "
2814 "\\ensuremath{\\text{ }}")
2815 "%"
2816 "\\%")
2817 "$"
2818 "\\$")
2819 "{Highlighting}[]\\newline\n"
2820 "{Highlighting}[]\n")
2821 "&"
2822 "\\&") "_" "\\_") "^" "\\^{}"
2823 lll<-mapM doFonts gg
2824 return (concat lll)
2825 Nothing -> return ("\\TemplateSource{" ++ (rtrim d) ++ "}\n")
2905 "&"
2906 "\\&")
2907 "_"
2908 "\\_")
2909 "^"
2910 "\\^{}"
2911 lll <- mapM doFonts gg
2912 return (concat lll)
2913 Nothing -> return ("\\TemplateSource{" ++ (rtrim d) ++ "}\n")
28262914 nodeToLaTeX (Environment Tag (TagAttr "font" a) l)
28272915 = if Map.member "style" a then
28282916 if
28972985 nodeToLaTeX (Environment Label (Str s) _)
28982986 = return $ "\\label{" ++ s ++ "}"
28992987 nodeToLaTeX _ = return []
2900
2988
29012989 {-DHUN| Unicode escaping for latex strings DHUN-}
2902
2990
29032991 doUnicode :: String -> String
29042992 doUnicode ('\206' : ('\178' : xs))
29052993 = "\\ensuremath{\\beta}" ++ doUnicode xs
00 {-DHUN| module storing information on image licensing on wikipedia wikimedia commons and so on. DHUN-}
11 module Licenses where
2
2
33 {-DHUN| a map (written as list) to map an url to a license to an abbriviated text of the license. In contrast to the map minlicenses in this module the presence of a leading http: is optional DHUN-}
4
4
55 licenses :: [(String, String)]
66 licenses = minlicenses ++ (map go minlicenses)
7 where
8 go :: (String, String) -> (String, String)
7 where go :: (String, String) -> (String, String)
98 go (x, y) = ((drop 5) x, y)
10
9
1110 {-DHUN| a map (written as list) to map an url to a license to an abbriviated text of the license DHUN-}
12
11
1312 minlicenses :: [(String, String)]
1413 minlicenses
1514 = [("http://en.wikipedia.org/wiki/public_domain", "PD"),
1615 ("https://en.wikipedia.org/wiki/Public_domain", "PD"),
17 ("https://de.wikibooks.org/wiki/GNU_Freie_Dokumentationslizenz", "GFDL"),
18 ("https://de.wikipedia.org/wiki/GNU-Lizenz_f%C3%BCr_freie_Dokumentation", "GFDL"),
19 ("https://de.wikipedia.org/wiki/GNU-Lizenz_für_freie_Dokumentation", "GFDL"),
20
16 ("https://de.wikibooks.org/wiki/GNU_Freie_Dokumentationslizenz",
17 "GFDL"),
18 ("https://de.wikipedia.org/wiki/GNU-Lizenz_f%C3%BCr_freie_Dokumentation",
19 "GFDL"),
20 ("https://de.wikipedia.org/wiki/GNU-Lizenz_f\252r_freie_Dokumentation",
21 "GFDL"),
2122 ("http://en.wikipedia.org/wiki/en:GNU_Free_Documentation_License",
2223 "GFDL"),
2324 ("http://en.wikipedia.org/wiki/en:GNU_General_Public_License",
2425 "GPL"),
25 ("http://de.wikipedia.org/wiki/GNU_General_Public_License",
26 "GPL"),
26 ("http://de.wikipedia.org/wiki/GNU_General_Public_License", "GPL"),
2727 ("http://de.wikipedia.org/wiki/Gemeinfreiheit", "PD"),
2828 ("http://en.wikipedia.org/wiki/de:Gemeinfreiheit", "PD"),
2929 ("http://de.wikipedia.org/wiki/Gemeinfreiheit", "PD"),
2424 import Network.URL
2525 import Control.Monad.Except
2626 import System.Process
27
27
2828 notendyet ::
2929 (String -> ImperativeMonad String) ->
3030 ParsecT String () ImperativeMonad String ->
4343 <|>
4444 do a <- anyChar
4545 notendyet action sstart eend (aku ++ [a])
46
46
4747 beginning ::
4848 (String -> ImperativeMonad String) ->
4949 ParsecT String () ImperativeMonad String ->
5757 do _ <- sstart
5858 ne <- notendyet action sstart eend []
5959 return (ne)
60
60
6161 startToEnd ::
6262 (String -> ImperativeMonad String) ->
6363 ParsecT String () ImperativeMonad String ->
7272 do a <- anyChar
7373 s <- startToEnd action sstart eend
7474 return (a : s)
75
75
7676 zeroAction :: (Monad m) => t -> t1 -> m [Char]
7777 zeroAction _ _ = return ""
78
78
7979 runAction ::
8080 String ->
8181 String ->
9090 case x of
9191 Left _ -> return ""
9292 Right xs -> return xs
93
93
9494 chapterAction :: WikiUrl -> String -> ImperativeMonad String
9595 chapterAction wurl text
9696 = do pp <- liftIO (getpage d (wurl))
9999 noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p)
100100 _ -> return ""
101101 where d = (trim (takeWhile (/= '|') text))
102
102
103103 chapterAction2 :: FullWikiUrl -> String -> ImperativeMonad String
104104 chapterAction2 fu text
105105 = do pp <- liftIO (getpage d (wurl))
111111 where e = (trim (takeWhile (/= '|') text))
112112 d = (removePrintVersion (lemma fu)) ++ "/" ++ e
113113 wurl = wikiUrl fu
114
115114
116115 chapterAction3 :: FullWikiUrl -> String -> ImperativeMonad String
117116 chapterAction3 fu text
127126
128127 includeAction :: WikiUrl -> String -> ImperativeMonad String
129128 includeAction = qIncludeAction
130
129
131130 qIncludeAction :: WikiUrl -> String -> ImperativeMonad String
132131 qIncludeAction wurl text
133 = if isInfixOf "Vorlage" text then return ("{{"++text++"}}")
134 else
135 do pp <- (liftIO (print d)) >>liftIO (getpage d (wurl))
132 = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}")
133 else
134 do pp <- (liftIO (print d)) >> liftIO (getpage d (wurl))
136135 case pp of
137 Just p -> do _ <- addContributors d Nothing
138 noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p)
139 _ -> return ""
136 Just p -> do _ <- addContributors d Nothing
137 noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p)
138 _ -> return ""
140139 where d = (trim (takeWhile (/= '|') text))
141140
142141 qBookIncludeAction :: WikiUrl -> String -> ImperativeMonad String
143142 qBookIncludeAction wurl text
144 = if isInfixOf "Vorlage" text then return ("{{"++text++"}}")
145 else
146 do pp <- (liftIO (print d)) >>liftIO (getBookpage d (wurl))
143 = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}")
144 else
145 do pp <- (liftIO (print d)) >> liftIO (getBookpage d (wurl))
147146 case pp of
148 Just p -> do _ <- addContributors d Nothing
149 x <- noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p)
150 st<- get
151 systempdir <- liftIO getTemporaryDirectory
152 tempdir <- liftIO $ createTempDirectory systempdir "MediaWiki2LaTeXParser"
153 liftIO $ Tools.writeFile (tempdir </> "input") x
154 _ <- liftIO $ system("mediawiki2latex -x "++( Hex.hex (show (fullconfigbase{compile=Just tempdir}))))
155 t<-liftIO $ Tools.readFile (tempdir </> "output")
156 put st{loadacu=((read t)++(loadacu st)::[Anything Char])}
157 return x
158 _ -> return ""
147 Just p -> do _ <- addContributors d Nothing
148 x <- noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p)
149 st <- get
150 systempdir <- liftIO getTemporaryDirectory
151 tempdir <- liftIO $
152 createTempDirectory systempdir "MediaWiki2LaTeXParser"
153 liftIO $ Tools.writeFile (tempdir </> "input") x
154 _ <- liftIO $
155 system
156 ("mediawiki2latex -x " ++
157 (Hex.hex (show (fullconfigbase{compile = Just tempdir}))))
158 t <- liftIO $ Tools.readFile (tempdir </> "output")
159 put st{loadacu = ((read t) ++ (loadacu st) :: [Anything Char])}
160 return x
161 _ -> return ""
159162 where d = (trim (takeWhile (/= '|') text))
160163
161
162164 makeUrl :: String -> String -> String -> [Char]
163165 makeUrl lang theFam thePage
164166 = (unify . exportURL)
182184 Absolute
183185 (Host{protocol = HTTP True, host = "toolserver.org",
184186 port = Nothing})}))
185
187
186188 langau :: Map String String
187189 langau
188190 = fromList
203205 ("ce", "\1071\1079\1076\1072\1088\1093\1086"), ("nl", "Auteurs"),
204206 ("es", "Autores"), ("eu", "Egile"), ("fr", "Auteurs"),
205207 ("cs", "Autor"), ("br", "Aozer")]
206
208
207209 makeHeader :: FullWikiUrl -> Maybe String -> [Char]
208210 makeHeader fullurl m
209211 = let mmm = m >>= (\ yy -> Map.lookup yy langau) in
222224 "\\label{Contributors}\n" ++
223225 "\\begin{longtable}{rp{0.6\\linewidth}}\n" ++
224226 "\\textbf{Edits}&\\textbf{User}\\\\\n"
225
227
226228 makeHeaderHTML :: FullWikiUrl -> Maybe String -> [Char]
227229 makeHeaderHTML fullurl m
228230 = let mmm = m >>= (\ yy -> Map.lookup yy langau) in
238240 _ -> "Contributors")
239241 ++
240242 "</h2>\n" ++
241 "<table rules=\"all\">" ++
242 "<tr><td>Edits</td><td>User</td></tr>"
243 "<table rules=\"all\">" ++ "<tr><td>Edits</td><td>User</td></tr>"
243244
244245 makeBody :: (Ord t) => Map t Contributor -> URL -> [Char]
245246 makeBody m u = concat (map go (sort (toList m)))
251252 (concat
252253 (map chartransforlink (exportURL (u{url_path = (fun (href v))}))))
253254 ++ "}{" ++ (concat (map chartrans (name v))) ++ "}\\\\\n"
254
255
255256 makeBodyHTML :: (Ord t) => Map t Contributor -> URL -> [Char]
256257 makeBodyHTML m u = concat (map go (sort (toList m)))
257258 where fun ('/' : xs) = xs
258259 fun xs = xs
259260 go (_, v)
260 = "<tr><td>"++(show (edits v)) ++
261 "</td><td><a href=\"" ++
262 (((exportURL (u{url_path = (fun (href v))}))))
263 ++ "\">" ++ (concat (map chartrans (name v))) ++ "</a></td></tr>"
264
261 = "<tr><td>" ++
262 (show (edits v)) ++
263 "</td><td><a href=\"" ++
264 (((exportURL (u{url_path = (fun (href v))})))) ++
265 "\">" ++ (concat (map chartrans (name v))) ++ "</a></td></tr>"
265266
266267 makeContributors :: Maybe URL -> ImperativeMonad (String, String)
267268 makeContributors uu
277278 = case (deepGet2 "html" (parseit minparsers yy)) of
278279 ((Environment Tag (TagAttr _ m) _) : []) -> Map.lookup "lang" m
279280 _ -> Nothing
280 return (
281 ((makeHeader (fullUrl st) lang) ++
282 (makeBody (myaudict) (url (fullUrl st))) ++
283 "\\end{longtable}\n" ++ "\\pagebreak\n"),(makeHeaderHTML (fullUrl st) lang) ++
284 (makeBodyHTML (myaudict) (url (fullUrl st))) ++"</table>")
285
281 return
282 (((makeHeader (fullUrl st) lang) ++
283 (makeBody (myaudict) (url (fullUrl st))) ++
284 "\\end{longtable}\n" ++ "\\pagebreak\n"),
285 (makeHeaderHTML (fullUrl st) lang) ++
286 (makeBodyHTML (myaudict) (url (fullUrl st))) ++ "</table>")
287
286288 parseUrl :: String -> ImperativeMonad FullWikiUrl
287289 parseUrl u
288290 = case analyseFull u of
289291 Just x -> return x
290292 _ -> throwError (WikiUrlParseError u)
291
293
292294 getContributors ::
293295 [String] ->
294 ImperativeMonad
295 (([(Map String Contributor)], [(Maybe String)]))
296 ImperativeMonad (([(Map String Contributor)], [(Maybe String)]))
296297 getContributors u
297298 = do st <- get
298 stz<- liftIO imperativeStateZero
299 put stz {counter=counter st}
299 stz <- liftIO imperativeStateZero
300 put stz{counter = counter st}
300301 au <- mapM go u
301302 newState <- get
302303 put st
307308 sst <- get
308309 put sst{fullUrl = purl}
309310 addContributors (lemma purl) (Just (UrlAnalyse.url purl))
310
311
311312 addContributors ::
312313 [Char] -> Maybe URL -> ImperativeMonad ((Maybe String))
313314 addContributors theLemma uu
314315 = do sst <- get
315316 let st = fullUrl sst
316317 thetheLemma <- liftIO $ return theLemma
317 thetheHostname <- liftIO $ return (hostname st)
318 thetheHostname <- liftIO $ return (hostname st)
318319 thetheUU <- liftIO $ return uu
319320 au <- (liftIO ((((fun sst)) thetheLemma thetheHostname thetheUU)))
320321 :: ImperativeMonad ((Map String Contributor, Maybe String))
322323 lic <- liftIO (((return . snd)) au)
323324 put sst{audict = auau : (audict sst)}
324325 return lic
325 where fun ssst lem ho uuu
326 where fun ssst lem ho uuu
326327 = do xx <- simpleContributors lem ho uuu ssst
327328 return (Data.List.foldl runGo2 Map.empty xx, myvalue xx)
328329 runGo2 mymap (author, theHref, theEdits, _)
332333 = case yy of
333334 [(_, __, _, Just lic)] -> (Just lic)
334335 _ -> Nothing
335
336
336337 infun ::
337338 String ->
338339 String -> Integer -> Maybe Contributor -> Maybe Contributor
340341 = case xx of
341342 Nothing -> Just Contributor{name = a, href = h, edits = e}
342343 Just old -> Just old{edits = (edits old) + e}
343
344
344345 noinclude :: t -> String -> ImperativeMonad [Char]
345346 noinclude wurl
346347 = runAction "<noinclude>" "</noinclude>" (zeroAction wurl)
347
348
348349 runActions :: FullWikiUrl -> String -> ImperativeMonad String
349350 runActions fu text
350351 = do x <- noinclude wurl text
351 y <- runAction "{{Druckversion Kapitel|" "}}" (chapterAction3 fu)
352 x
352 y <- runAction "{{Druckversion Kapitel|" "}}" (chapterAction3 fu) x
353353 v <- runAction "{{Print entry|" "}}" (chapterAction2 fu) y
354354 z <- runAction "{{print entry|" "}}" (chapterAction2 fu) v
355355 a <- runAction "{{Print entry|" "}}" (chapterAction wurl) z
365365 _ <- runAction "{{:" "}}" (qIncludeAction wurl) i
366366 runAction "{{:" "}}" (qIncludeAction wurl) j
367367 where wurl = wikiUrl fu
368
368
369369 runBookActions :: FullWikiUrl -> String -> ImperativeMonad String
370370 runBookActions fu text
371371 = do x <- noinclude wurl text
372372 runAction "[[" "]]" (qBookIncludeAction wurl) x
373373 where wurl = wikiUrl fu
374
375374
376375 replacements :: String -> String
377376 replacements x
386385 "\n|")
387386 "{{Fortran:Vorlage: Table}}"
388387 "prettytable"
389
388
390389 loadPlain :: ImperativeState -> Maybe URL -> ImperativeMonad [Char]
391390 loadPlain st uu
392391 = let fu = fullUrl st in
405404 runBookActions fu p
406405 _ -> throwError (DownloadError (lemma fu) (exportURL (url fu)))
407406
408
409407 loadHTML :: ImperativeState -> ImperativeMonad String
410408 loadHTML st
411409 = let fu = fullUrl st in
412 do midst<-get
413 (res,newst)<-liftIO (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst)
414 case res of
415 Right _->put newst
416 _ -> return ()
410 do midst <- get
411 (res, newst) <- liftIO
412 (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst)
413 case res of
414 Right _ -> put newst
415 _ -> return ()
417416 x <- liftIO (geturl2 (exportURL (url fu)))
418417 return . decode . unpack $ x
419
420418
421419 loadBookHTML :: ImperativeState -> ImperativeMonad String
422420 loadBookHTML st
423421 = let fu = fullUrl st in
424 do midst<-get
425 (res,newst)<-liftIO (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst)
426 case res of
427 Right _->put newst
428 _ -> return ()
422 do midst <- get
423 (res, newst) <- liftIO
424 (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst)
425 case res of
426 Right _ -> put newst
427 _ -> return ()
429428 x <- liftIO (geturl2 (exportURL (url fu)))
430429 return . decode . unpack $ x
431
432430
433431 loadMediaWiki :: ImperativeState -> ImperativeMonad [Char]
434432 loadMediaWiki st
457455 throwError
458456 (DownloadError (lemma fu) (exportURL (url fu)))
459457 _ -> throwError (DownloadError (lemma fu) (exportURL (url fu)))
460
458
461459 {-DHUN| main function to download images form the wiki. It takes the RunMode as only parameter. In case of HTML the html from the website is loaded. In all other cases the wiki source text is downloaded. In case of ExpandedTemplates the templates are also expanded by mediawiki running on the wiki website DHUN-}
462
460
463461 load :: RunMode -> ImperativeMonad String
464462 load theRunMode
465463 = do st <- get
00 {-DHUN| module for logging currently unused DHUN-}
11 module Logger where
22 import ImperativeState
3
3
44 {-DHUN| initilaize logging facility DHUN-}
5
5
66 minInit :: ImperativeMonad ()
77 minInit = return ()
33 import Network.URI
44 import Data.List.Split
55 import qualified Data.Map as Map
6
6
77 {-DHUN| Wikimedia project prefixes so 'de' from de.wikipedia.org DHUN-}
8
8
99 foreignPrefixes :: [String]
1010 foreignPrefixes
1111 = ["af", "als", "an", "roa-rup", "ast", "gn", "av", "ay", "az",
3131 "kn", "ks", "ml", "mr", "ne", "new", "or", "pa", "sa", "si", "ta",
3232 "te", "km", "lo", "th", "am", "ti", "iu", "chr", "ko", "ja", "zh",
3333 "wuu", "lzh", "yue"]
34
34
3535 {-DHUN| Wikimedia projects for interwiki links [[w:Foobar]] means en.wikipedia.org/wiki/Foobar DHUN-}
36
36
3737 multilangwikis :: [(String, String)]
3838 multilangwikis
3939 = ([("w", "wikipedia"), ("wikipedia", "wikipedia"),
4747 ++
4848 [("wikispecies", "wikispecies"), ("v", "wikiversity"),
4949 ("wikiversity", "wikiversity")])
50
50
5151 {-DHUN| Wikimedia projects for interwiki links to wikis which only have got a single language version DHUN-}
52
52
5353 singlelangwikis :: [(String, String)]
5454 singlelangwikis
5555 = [("wikimedia", "wikimediafoundation"),
5656 ("foundation", "wikimediafoundation"),
5757 ("wmf", "wikimediafoundation"), ("mw", "mediawiki")]
58
58
5959 {-DHUN| Wikimedia projects for interwiki links to wikis which only have got a single language version DHUN-}
60
60
6161 wikimediasingellangwikis :: [(String, String)]
6262 wikimediasingellangwikis
6363 = [("commons", "commons"), ("metawikipedia", "meta"),
6464 ("meta", "meta"), ("m", "meta"), ("incubator", "incubator"),
6565 ("strategy", "strategy")]
66
66
6767 {-DHUN| All Wikis DHUN-}
68
68
6969 allwikis :: [(String, String)]
7070 allwikis
7171 = multilangwikis ++ singlelangwikis ++ wikimediasingellangwikis
72
72
7373 {-DHUN| Prefixes for including images in wikis DHUN-}
74
74
7575 imgtags :: [[Char]]
7676 imgtags
7777 = [map toLower x |
9595 "\1057\1083\1080\1082\1072", "Bild", "\3652\3615\3621\3660",
9696 "Talaksan", "Dosya", "\1496\1506\1511\1506", "\22294\20687",
9797 "\22294\20687"]]
98
98
9999 {-DHUN| lower Greek letter for HTML entity to latex so &delta; to \\delta DHUN-}
100
100
101101 lowergreek :: [[Char]]
102102 lowergreek
103103 = ["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta",
104104 "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "pi", "rho",
105105 "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"]
106
106
107107 {-DHUN| not Greek but to be processed like the Greeks above DHUN-}
108
108
109109 notsogreek :: [[Char]]
110110 notsogreek = ["cap", "cup", "sim"]
111
111
112112 {-DHUN| Full list of characters with Greek like processing explained above DHUN-}
113
113
114114 greek :: [[Char]]
115115 greek
116116 = concat
119119 (x : xs) -> return [(toUpper x) : xs]
120120 [] -> return [])
121121 ++ lowergreek ++ notsogreek
122
122
123123 {-DHUN| HTML entities to latex DHUN-}
124
124
125125 htmlchars :: [([Char], [Char])]
126126 htmlchars
127127 = [("thetasym", "{\\mbox{$\\vartheta$}}"),
504504 ("zwnj", "{}"), ("zwj", ""), ("lrm", ""), ("rlm", ""),
505505 ("gt", "{\\mbox{$>$}}"), ("Mu", "{\\mbox{$\\Mu$}}"),
506506 ("#151", "{--}"), ("Nu", "{\\mbox{$\\Nu$}}"), ("frasl", "\8260")]
507
507
508508 {-DHUN| get latex representation of HTML entity like &amp; DHUN-}
509
509
510510 getHtmlChar :: String -> String
511511 getHtmlChar x = Map.findWithDefault [] x (Map.fromList htmlchars)
512
512
513513 {-DHUN| a function to remove the taling print version string from urls (usually on wikibooks). This function is needed so the name of the book without the tailing print version string will be printed on the titlepage of the book DHUN-}
514
514
515515 removePrintVersion :: [Char] -> [Char]
516516 removePrintVersion lem
517517 = fun
518 ["/Druckversion","/ Druckversion","/Print version", "/Complete Wikibook", "/All Chapters",
519 "/Print Version", "/print version", "/Printable version",
520 "/The Whole Book", "/print", ": Druckversion"]
518 ["/Druckversion", "/ Druckversion", "/Print version",
519 "/Complete Wikibook", "/All Chapters", "/Print Version",
520 "/print version", "/Printable version", "/The Whole Book",
521 "/print", ": Druckversion"]
521522 lem
522523 where fun (y : ys) x
523524 = case splitOn y x of
524525 (z : _) -> fun ys z
525526 _ -> []
526527 fun [] x = x
527
528
528529 {-DHUN| Nearly all HTML tags DHUN-}
529
530
530531 goodtags1 :: [[Char]]
531532 goodtags1
532533 = ["includeonly", "references", "blockquote", "noinclude",
536537 "strike", "object", "input", "center", "legend", "iframe", "small",
537538 "video", "audio", "style", "input", "label", "tbody", "thead",
538539 "title", "frame", "param", "base", "area", "font", "code", "span",
539 "abbr", "body", "link", "menu", "math", "meta", "samp", "cite", "head",
540 "html", "poem", "form", "cite", "ref", "div", "pre", "sub", "sup",
541 "big", "del", "map", "bdo", "var", "dfn", "kbd", "col", "ins", "bdi",
542 "dir", "img", "h1", "h2", "h3", "h4", "h5", "h6", "li", "ul", "ol",
543 "tt", "dd", "dl", "dt", "hr", "em", "b", "i", "s", "u", "p", "q",
544 "a"]
545
540 "abbr", "body", "link", "menu", "math", "meta", "samp", "cite",
541 "head", "html", "poem", "form", "cite", "ref", "div", "pre", "sub",
542 "sup", "big", "del", "map", "bdo", "var", "dfn", "kbd", "col",
543 "ins", "bdi", "dir", "img", "h1", "h2", "h3", "h4", "h5", "h6",
544 "li", "ul", "ol", "tt", "dd", "dl", "dt", "hr", "em", "b", "i",
545 "s", "u", "p", "q", "a"]
546
546547 {-DHUN| HTML tags for tables rows in tables and so on, only lower case DHUN-}
547
548
548549 tabletags :: [[Char]]
549550 tabletags = ["table", "td", "th", "tr"]
550
551
551552 {-DHUN| HTML tags for tables rows in tables and so on, lower case as well as upper case DHUN-}
552
553
553554 listOfTableTags :: [[Char]]
554555 listOfTableTags = tabletags ++ (map (map toUpper) tabletags)
555
556
556557 {-DHUN| All HTML tags DHUN-}
557
558
558559 listOfTags :: [[Char]]
559560 listOfTags = goodtags1 ++ (map (map toUpper) goodtags1)
560
561
561562 {-DHUN| Character escaping from Unicode to latex DHUN-}
562
563
563564 chartrans :: Char -> String
564565 chartrans '\'' = "\\textquotesingle{}"
565566 chartrans '[' = "{$\\text{[}$}"
578579 chartrans '<' = "<{}"
579580 chartrans '>' = ">{}"
580581 chartrans '-' = "-{}"
581 chartrans ' ' = "\\,"
582 chartrans '\8239' = "\\,"
582583 chartrans c = c : []
583
584
584585 {-DHUN| Character escaping from Unicode to web links inside latex with the URL package DHUN-}
585
586
586587 chartransforlink :: Char -> String
587588 chartransforlink '&' = "\\&"
588589 chartransforlink '%' = "\\%"
00 {-DHUN| A module providing all necessary types of a parse tree for the representation of source written in the MediaWiki markup language DHUN-}
11 module MediaWikiParseTree where
22 import Data.Map.Strict (Map)
3
3
44 {-DHUN| Lists the different environment possible in the mediawiki markup language and example of an environment is an HTML tag with everything included between the its opening and closing tags. Her is is called Tag DHUN-}
5
5
66 data EnvType = Wikilink
77 | IncludeOnly
88 | ImageMap
5050 | Parameter
5151 | NumHtml
5252 deriving (Show, Eq, Read)
53
53
5454 {-DHUN| A type representing a node in a the parse tree. Open and Close represent opening and closing bracket. They will be replace by environments (look at 'Environment' in this data structure) before the parse tree is processed further. The C represents a single character. S stands for a String. Tab is a special elements used like the tabulator character for line breaking purposes. Quad is similar to that. The Item... data construction are for processing itemization enumerations and so on and well be replace be environments before further processing DHUN-}
55
55
5656 data Anything a = Environment EnvType StartData [Anything a]
5757 | Open Int EnvType StartData Int
5858 | Close Int EnvType
6363 | Quad
6464 | Tab
6565 deriving (Show, Eq, Read)
66
66
6767 {-DHUN| represents the result of a parser for the begin of an environment. A parser for an opening HTML tag is an example. TagAttr means tag with attributes. And is thus a string for the element and a map from string to string for it attributes. Str is a String. And Attr is key value pair and used for attribute in tables. DHUN-}
68
68
6969 data StartData = Str [Char]
7070 | TagAttr String (Map String String)
7171 | Attr (String, String)
1313 import Data.Maybe
1414 import Network.URI
1515 import WikiHelper
16
1617 {-DHUN| flattens out the HTML 'a' tags. That is it replaces each 'a' element with its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'a' HTML elements flattened |DHUN-}
17
18
1819 reducea :: [Anything Char] -> [Anything Char]
1920 reducea ll = concat (map go ll)
20 where
21 go :: Anything Char -> [Anything Char]
21 where go :: Anything Char -> [Anything Char]
2222 go (Environment Tag (TagAttr "a" _) l) = l
2323 go (Environment x y l) = [Environment x y (reducea l)]
2424 go x = [x]
25
25
2626 {-DHUN| flattens out the HTML 'div' tags, which have got a 'class' attributes present with the value 'noresize'. That is it replaces each 'div' element with the properties mentioned above by its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'div' HTML elements with the properties given above flattened |DHUN-}
27
27
2828 reducediv :: [Anything Char] -> [Anything Char]
2929 reducediv ll = concat (map go ll)
30 where
31 go :: Anything Char -> [Anything Char]
30 where go :: Anything Char -> [Anything Char]
3231 go (Environment Tag (TagAttr "div" m) l)
3332 | (Map.lookup "class" m) == (Just "noresize") = l
3433 go (Environment x y l) = [Environment x y (reducea l)]
3534 go x = [x]
3635
3736 {-DHUN| a function to get HTML elements out of a parse tree. The first parameter is name of the tag to be looked for. The second parameter is a key in the attributes of that element that has to be present for the element to be considered. The third parameter is a value that has to be found under the given key in the attributes of the element, in order for the element to be part of the returned output list. The fourth parameter is the parse tree. |DHUN-}
38
37
3938 deepGet ::
4039 [Char] -> String -> [Char] -> [Anything a] -> [Anything a]
4140 deepGet t k v ll = concat $ map go ll
4645 go _ = []
4746
4847 {-DHUN| flattens a part of the parse tree, that is takes the characters found in the tree and turns them into a string dropping all other information in the tree DHUN-}
49
48
5049 deepFlatten :: [Anything t] -> [Anything t]
5150 deepFlatten ll = concat $ map go ll
52 where go (Environment _ _ l) = (deepFlatten l)
51 where go (Environment HtmlChar s l) = [Environment HtmlChar s l]
52 go (Environment _ _ l) = (deepFlatten l)
5353 go x = [x]
5454
5555 {-DHUN| converts a wiki source document to a parse tree to be converted to LaTeX be treeToLaTeX3. The first parameter is that list of parsers. That is the list of environments to be recognized by the parser. This is usually either only plain HTML, or HTML mixed with mediawiki markup. The second parameter is the source code to be parsed. This function returns a parse tree |DHUN-}
56
56
5757 parseit :: [MyParser Char] -> String -> [Anything Char]
5858 parseit pp x
5959 = (parseit2
6666 []))
6767 ('\n' : x))
6868
69
7069 {-DHUN| helper function of parseit, not to be called directly. This function takes the parser for the grammar, in the sense of a parser of the parsec library, (so that is the final combined parser) as first argument. It takes the source code to be parsed (usually HTML of mediawiki markup mixed with HTML) as second and runs the parser on the source code. It returns the resulting parse tree. |DHUN-}
71
70
7271 parseit2 :: Parser [Anything Char] -> String -> [Anything Char]
7372 parseit2 p input
7473 = case (parse p "" input) of
7675 Right x -> x
7776
7877 {-DHUN| A parser for one particular element of the mediawiki grammar DHUN-}
79
78
8079 data MyParser tok = MyParser{bad ::
8180 [Anything tok] -> GenParser tok () (),
8281 start :: MyStack tok -> GenParser tok () StartData,
8483 self :: EnvType,
8584 modify :: StartData -> [Anything tok] -> [Anything tok],
8685 reenv :: EnvType -> EnvType}
87
86
8887 {-DHUN| the stack of the parser. See documentation on MyStackFrame in this module for details. DHUN-}
89
88
9089 type MyStack tok = [MyStackFrame tok]
91
90
9291 {-DHUN| A stack frame on the parsers stack. A stack frame represents an environment that was opened. So something like an opening HTML tag. The value endparser. Is a parser that should match exactly the closing bracket of the environment. The value startdata is the return value of the start parser of the enviroment. This stack frame is created immediately after the startparser of an environment has matches and is given the return value of that startparser as startdata. The value environment is the environment this stack frame belongs to in case of an HTML tag this would be Tag. See the type EnvType in the module MediaWikiParseTree for a full list of possible environments. In the parse tree that is finally generated each node with arbitrarily nested children has got an EnvType associated with it. The badparser is a parser that is repeatedly tries while processing the current environment, if it matches the current environment is considered to be invalid. Backtracking occurs an the characters currently under consideration are parser (possible very) different manner. So with badparser you can signal that an environment is invalid if a creating parser (the badparser) matches within the environment. The parserenumber is just a number that uniquely identifies each parsers in the list of parsers active for the whole parsing process. These numbers are usually generated by the remake function. The nestingdepth is a bit of a misnomer. It is a unique number for each stack frame. So each stack frame that is newly created gets a different number. DHUN-}
93
92
9493 data MyStackFrame tok = MyStackFrame{endparser ::
9594 GenParser tok () (),
9695 startdata :: StartData, environment :: EnvType,
9796 badparser :: [Anything tok] -> GenParser tok () (),
9897 parsernumber :: Int, nestingdepth :: Int}
99
98
10099 {-DHUN| takes a result returned by parseAnything3 and converts it into a parse tree for further processing. The only purpose of this function is to convert the notation of bracket. The bracket a denoted by the Open and Close parse tree elements of the type Anything be the function parseAnything3. The need to be converted to environments, that is node with children in the parse tree. The environments will be denoted by the 'Environment' data constructors of the type Anything|DHUN-}
101
100
102101 decon2 ::
103102 (Monad m) =>
104103 [(a1, MyParser a)] -> m (t, [Anything a]) -> m [Anything a]
105104 decon2 l x
106105 = do (_, s) <- x
107106 return (findMatchingBrackets l (reverse s))
108
107
109108 {-DHUN| Usually bracket can be close in an order different from the reverse one in which they were opened. But for certain environments this is not allowed, and the order has to be strictly followed. This value is the list of those environments. DHUN-}
110
109
111110 preserving :: [EnvType]
112111 preserving
113112 = [Math, Source, Comment, Gallery, NoWiki, NoInclude, BigMath,
114113 Preformat, TableCap, TableRowSep, TableColSep, TableHeadColSep,
115114 TemplateInside, Wikitable, TableTag]
116
115
117116 {-DHUN| Helper function for parseAnyClosingBracket. Should not be called directly. The only parameter is the current parser stack. Returns the depth on the stack of the stack frame whose closing bracket matched. DHUN-}
118
117
119118 parseAnyClosingBracket2 ::
120119 (Show tok, Eq tok, Read tok) =>
121120 MyStack tok -> GenParser tok () Integer
122121 parseAnyClosingBracket2
123122 = (parseAnyClosingBracket3 0) . (List.map (\ x -> endparser x))
124
123
125124 {-DHUN| Helper function for parseAnyClosingBracket. Should not be called directly. The this will take stack frame by stack frame of the stack. The first parameter is an integer and indicates how many stack frames have allready been take of the stack. Returns the depth on the stack of the stack frame whose closing bracket matched. DHUN-}
126
125
127126 parseAnyClosingBracket3 ::
128127 Integer -> [GenParser tok () ()] -> GenParser tok () Integer
129128 parseAnyClosingBracket3 i (x : xs)
132131 return i)
133132 <|> (parseAnyClosingBracket3 (i + 1) xs)
134133 parseAnyClosingBracket3 _ [] = pzero
135
134
136135 {-DHUN| Remove the n'th elements from a list. n is an integer an given is first parameter. The list to be processed is given as second parameter DHUN-}
137
136
138137 myremove :: Integer -> [a] -> [a]
139138 myremove _ [] = []
140139 myremove 0 (_ : xs) = myremove (-1) xs
141140 myremove i (x : xs) = x : (myremove (i - 1) xs)
142
141
143142 {-DHUN| Enumerates a list of parsers. needed to prepare a list of parsers for use with parseAnything2 DHUN-}
144
143
145144 remake :: [a] -> [(Int, a)]
146145 remake x = zip (iterate (+ 1) 0) x
147
146
148147 {-DHUN| predicate to test whether the current stack-frame-index is in a stack. The first parameter is the stack-frame-index the second parameter is the stack. Returns true if it could be found DHUN-}
149
148
150149 isin ::
151150 (Show tok, Eq tok, Read tok) => Int -> (MyStack tok) -> Bool
152151 isin i s = i `elem` (List.map nestingdepth s)
153
152
154153 {-DHUN| tries to parse exactly one specific opening bracket. The parameters are identical to the ones of parseAnyOpeningBracket, which the exception of the second parameter. The second parameter is that parser for the bracket currently under consideration. This function 'catch' the BBad 'exception' 'thrown' by parseAnything. In this case it returns pzero, causing the parser to backtrack. DHUN-}
155
154
156155 parseSpecificOpeningBracket ::
157156 (Show tok, Eq tok, Read tok) =>
158157 Int ->
173172 case r of
174173 BBad (ss, y) -> if isin v ss then pzero else return (BBad (ss, y))
175174 _ -> return r
176
175
177176 {-DHUN| tried to parse any of the opening brackets given by the parsers passed as the third parameter. The first parameter is the stack number (see documentation of the parseAnything function for more details on that). The second parameter is the current parser stack (see documentation of the parseAnything2 function for more details on that). The forth parameter is the list of parsers to b taken into account by the general parsing process. In contrast the third parameter contains only a list of parsers that are allowed to match in the current step of the parsing process. The fifth parameter is the current parser output stream. That is the information returned by the parser up to the current step. It is kind of an accumulator for parser results. DHUN-}
178
177
179178 parseAnyOpeningBracket ::
180179 (Show tok, Eq tok, Read tok) =>
181180 Int ->
188187 parseAnyOpeningBracket v s (x : xs) l i
189188 = try (parseSpecificOpeningBracket v x s l i) <|>
190189 parseAnyOpeningBracket v s xs l i
191
190
192191 {-DHUN| insert a list of closing brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be close. The second parameter is the parser stack. It says which kind of brackets should be closed. It returns a parser output stream just containing the opening brackets DHUN-}
193
192
194193 generateClosingBrackets ::
195194 (Num a, Eq a, Show tok, Eq tok, Read tok) =>
196195 a -> MyStack tok -> [Anything tok]
200199 = (Close (length xs) (environment s)) :
201200 (generateClosingBrackets (mi - 1) xs)
202201 generateClosingBrackets _ _ = []
203
202
204203 {-DHUN| insert a list of opening brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be opened. The second parameter is the parser stack. It says which kind of brackets should be opened. It returns a parser output stream just containing the opening brackets DHUN-}
205
204
206205 generateOpeningBrackets ::
207206 (Num a, Eq a, Show tok, Eq tok, Read tok) =>
208207 a -> MyStack tok -> [Anything tok]
211210 = (Open (length xs) (environment s) (startdata s) (parsernumber s))
212211 : (generateOpeningBrackets (mi - 1) xs)
213212 generateOpeningBrackets _ _ = []
214
213
215214 {-DHUN| a version of either with the difference that the left and right types are the same. RRight stands for sucessful parse of a token. BBad stands for parse failure in which the next possiblity is tried. DHUN-}
216
215
217216 data Either2 b = RRight b
218217 | BBad b
219
218
220219 {-DHUN| tries to match any of the currently possible closing brackets. Brackets closed in a order different from the reverse to the one in which they were opened are usually possible. And exception are the so called preserving elements, they can only be closed in the correct order. In the general case of this kind of crossbracketing it is necessary to add some opening and closing brackets to the output stream and to take the right stack frame of the stack keeping all others on it in the right order. DHUN-}
221
220
222221 parseAnyClosingBracket ::
223222 (Show tok, Eq tok, Read tok) =>
224223 Int ->
245244 ((generateClosingBrackets mi s) ++
246245 (reverse (generateOpeningBrackets mi ss))))
247246 ++ i)
248
247
249248 {-DHUN| this function tries to match the bad parser of the current environment. If it matches it returns BBAD, otherwise it returns RRight. See also comment of the parserAnything function. DHUN-}
250
249
251250 trybadparser ::
252251 (Show tok, Eq tok, Read tok) =>
253252 MyStack tok ->
260259 [] -> return False
261260 if x == True then return (BBad (s, [])) else
262261 return (RRight (s, []))
263
262
264263 {-DHUN| this is the main function of the parser which calls itself recursively. To run the parser you should not call this function directly but rather use parseAnything2. The parameter are the same as the parameters to the parameters to the function parseAnything2. So look at the documentation for their meaning. But there is one additional parameter namely the first one. This is the stack frame number, it is increase for every stack frame and never decreased this way each stack frame has got a unique identifier this way. An other difference is the return type this function returns the always same type as the function parseAnything2, but wrapped in the Either2 monad. The Either2 monad has an additional bit to signal whether the parse was good or bad. The bad bit signals so called bad parser of the current environment has matched signaling that the environment is to be considered invalid, and we have to backtrack. But what we do here is just stop paring and return a successful parse, but return the bad flag as set in the return type. This will propagate through to the parser that was trying to open the environment that caused the current problem. If that recognizes the problem it can flag the environment as failed by returning pzero. So again here we just return BBad. So we kind of throw an exception. And in parseSpecificOpeningBracket we will catch BBad and signal the actual problem by returning pzero and that way kick of backtracking. DHUN-}
265
264
266265 parseAnything ::
267266 (Show tok, Eq tok, Read tok) =>
268267 Int ->
296295 i)
297296 <|> return (BBad (s, i))
298297 [] -> pzero
299
298
300299 {-DHUN| This is the main entry point of the parse. So the function you need to call when you want to convert the source into the parse tree. The first parameter is the stack. I usually should contain only and exactly the root stack frame. The second parameter is an enumerated list of parsers. You usually take a list like the list parsers from this module and enumerate it by running remake on it. So thats the list of environments the parser is able to recognize. The third parameter is the parse results that have been created so far. Since we are just starting the parse this has to be the empty list. The function returns a parser. See the documentation of the parse module for more details on the type GenParser. Roughly is means that this parser takes an input list whose items are of type tok and that the parsers does not have state (hence the void type '()') and return a tuple. The first elements of that tuple is a stack. Where a new stack frame is added to the stack for each new environment that is found to open by the parser, like an opening HTML tag. And the second elements of the tuple is a parse tree, that is a list of parse tree elements, where each parse tree element may contain sublists of parse tree element. This way it is a real tree. DHUN-}
301
300
302301 parseAnything2 ::
303302 (Show tok, Eq tok, Read tok) =>
304303 MyStack tok ->
309308 case x of
310309 BBad (_, b) -> return (s, b)
311310 RRight b -> return b
312
311
313312 {-DHUN| this find the matching closing bracket for an opening bracket. It returns a tuple. Its first element is the environment created form the given opening bracket together with its closing bracket and the content between opening and closing bracket. Its second elements is the remaining list of parsed elements, after the closing bracket. This list does still contain the Open and Close parser tree elements for opening and closing bracket, and those are not yet converted to environments. This function takes the list of parse tree elements after the opening bracket as first input parameter. It takes the index of the parser that created the opening bracket as second input parameter. That is the index created by the remake function in this module. It takes the size of the stack at the time when the opening bracket was found as third input parameter. It takes the EnvType of the environment of the opening bracket as fourth input parameter. It takes the StartData parse result associated with the opening bracket as fifth parameter. The sixth parameter is the accumulator an should be the empty list when calling this function externally. The seventh parameter is the remaining parse tree after the opening bracket without the opening and closing brackets converted to environments DHUN-}
314
313
315314 findMatchingClosingBracket ::
316315 [(a1, MyParser a)] ->
317316 Int ->
331330 = (Environment ((reenv (snd (l !! n))) e) s
332331 ((modify (snd (l !! n))) s (findMatchingBrackets l (reverse b))),
333332 [])
334
333
335334 {-DHUN| run findMatchingBrackets on the inner part environment given as second parameter. This function takes the enumerated list of parsers created by remake as first input parameter DHUN-}
336
335
337336 findMatchingBrackets2 ::
338337 [(a1, MyParser a)] -> Anything a -> Anything a
339338 findMatchingBrackets2 l (Environment e s b)
340339 = Environment e s (findMatchingBrackets l b)
341340 findMatchingBrackets2 l xs
342341 = Environment Root (Str "") (findMatchingBrackets l [xs])
343
342
344343 {-DHUN| the parser (Anything3) creates a list of parser elements, which is not a tree. The environments which will form the nodes with children in the final tree are denoted as opening and closing brackets in this list. This function takes that list as second input parameter, finds matching pairs of opening and closing brackets and converts the to environments. The opening an closing brackets are already balanced because of the way Anything3 works, that means there is exactly one matching closing bracket for each opening one and they open and close in to proper order. This function takes the enumerated list of parsers as first input parameter, that is the same list also given to the function Anything. DHUN-}
345
344
346345 findMatchingBrackets ::
347346 [(a1, MyParser a)] -> [Anything a] -> [Anything a]
348347 findMatchingBrackets l ((Open i e s n) : xs)
350349 (findMatchingBrackets2 l t) : (findMatchingBrackets l xxs)
351350 findMatchingBrackets l (x : xs) = x : (findMatchingBrackets l xs)
352351 findMatchingBrackets _ [] = []
353
352
354353 {-DHUN| a list of environments. Most parsers use this list as their 'allowed' variable. Meaning that the parser is only allowed to match within the environments given in the 'allowed' list DHUN-}
355
354
356355 everywhere :: [EnvType]
357356 everywhere = [Wikitable] ++ everywheretbl
358
357
359358 {-DHUN| list containing the Italic and Bold environments, see documentation on the list 'everywhere' in this module DHUN-}
360
359
361360 bi :: [EnvType]
362361 bi = [Italic, Bold]
363
362
364363 {-DHUN| list containing the same environments as the list 'everywhere' except the Wikitable environment, see documentation on the list 'everywhere' in this module DHUN-}
365
364
366365 everywheretbl :: [EnvType]
367366 everywheretbl = bi ++ everywherebi
368
367
369368 {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold and Italic, see documentation on the list 'everywhere' in this module DHUN-}
370
369
371370 everywherebi :: [EnvType]
372371 everywherebi = basicwhere ++ [Wikilink]
373
372
374373 {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic and Wikilink see documentation on the list 'everywhere' in this module DHUN-}
375
374
376375 basicwhere :: [EnvType]
377376 basicwhere = [Link] ++ verybasicwhere
378
377
379378 {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic, Wikilink and Link see documentation on the list 'everywhere' in this module DHUN-}
380
379
381380 verybasicwhere :: [EnvType]
382381 verybasicwhere
383382 = [Itemgroup, Root, Wikiheading, TableCap, Chapter, Tag, TableTag,
384383 TemplateInside, IncludeOnly]
385
384
386385 {-DHUN| list containing the environments where the parser linkp is allowed to match. Currently this seems to be everywhere. So this possibly can go away DHUN-}
387
386
388387 everywherel :: [EnvType]
389388 everywherel = basicwhere ++ bi ++ [Wikitable, Wikilink]
390
389
391390 {-DHUN| list containing the same environments as the list 'everywhere' except the Link environment see documentation on the list 'everywhere' in this module DHUN-}
392
391
393392 everywherel2 :: [EnvType]
394393 everywherel2 = verybasicwhere ++ bi ++ [Wikitable, Wikilink]
395
394
396395 {-DHUN| list containing the TableColSep and TableHeadColSep environments, see documentation on the list 'everywhere' in this module. the environments mean table header column separator and table column separator DHUN-}
397
396
398397 wikilinkwhere :: [EnvType]
399398 wikilinkwhere = [TableColSep, TableHeadColSep]
400
399
401400 {-DHUN| the list of parsers needed for processing the HTML output created by MediaWiki DHUN-}
402
401
403402 minparsers :: [MyParser Char]
404403 minparsers
405404 = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
406405 pagebreakp, htmlcharp, p302p, attrp, greekp, brparser, mytablep,
407 mytrsepp, mytcolsepp, mytcapp, mythcolsepp, annop, tagparser, tagparserp,
408 tagparser2, tagparser2p, tagparsert, tagparsert, tagparser2t,
409 tagparsers, stagparser, commentp, numhtmlp, rtagparser]
410
406 mytrsepp, mytcolsepp, mytcapp, mythcolsepp, annop, tagparser,
407 tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert,
408 tagparser2t, tagparsers, stagparser, commentp, numhtmlp,
409 rtagparser]
410
411411 {-DHUN| the list of parsers for parsing contributor information for images on MediaWiki websites DHUN-}
412
412
413413 htmlminparsers :: [MyParser Char]
414414 htmlminparsers
415415 = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
418418 tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert,
419419 tagparser2t, tagparsers, stagparser, commentp, numhtmlp,
420420 rtagparser]
421
421
422422 {-DHUN| the list of parsers needed for processing the image title description so that is the content a html attibutes DHUN-}
423
423
424424 imgparsers :: [MyParser Char]
425425 imgparsers = [supp, subp, htmlcharp, p302p, greekp, numhtmlp]
426
426
427427 {-DHUN| the list of parsers needed for parsing source code in the MediaWiki markup language DHUN-}
428
428
429429 parsers :: [MyParser Char]
430430 parsers
431431 = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
439439 tagparser, tagparser2, tagparsert, tagparser2t, tagparsers,
440440 stagparser, commentp, reservedp, templatewikilinkp, wikiparamp,
441441 wikitemplatep, templateinsideverbatimp, templateinsidep,
442 gallerywlp, imagemapwlp, hdevlinep, linkp, linkp2, presectionp, presectionpt,
443 numhtmlp, rtagparser]
444
442 gallerywlp, imagemapwlp, hdevlinep, linkp, linkp2, presectionp,
443 presectionpt, numhtmlp, rtagparser]
444
445445 {-DHUN| the parser record, with some fields initialized with default values DHUN-}
446
446
447447 baseParser :: MyParser tok
448448 baseParser
449449 = MyParser{bad = \ _ -> pzero, start = undefined,
450450 end = \ _ -> return (), allowed = everywhere, self = undefined,
451451 modify = \ _ x -> x, reenv = id}
452
452
453453 {-DHUN| this function takes a string and returns a parser that matches any of the given strings DHUN-}
454
454
455455 oneOfTheStrings :: [String] -> Parser String
456456 oneOfTheStrings (x : xs) = try (string x) <|> (oneOfTheStrings xs)
457457 oneOfTheStrings [] = pzero
458
458
459459 {-DHUN| parses a HTML entity, that is a character escaped with the ampersand notation DHUN-}
460
460
461461 htmlcharp :: MyParser Char
462462 htmlcharp
463463 = baseParser{start =
468468 return (Str (s)),
469469 allowed = Preformat : SpaceIndent : NoWiki : everywhere,
470470 self = HtmlChar}
471
471
472472 {-DHUN| parses a HTML entity, escaped with numeric ampersand notation DHUN-}
473
473
474474 numhtmlp :: MyParser Char
475475 numhtmlp
476476 = baseParser{start =
484484 return (Str (s)),
485485 allowed = Preformat : SpaceIndent : NoWiki : everywhere,
486486 self = NumHtml}
487
487
488488 {-DHUN| parses a HTML #302 character. Special parser needed since it acts on the receding character DHUN-}
489
489
490490 p302p :: MyParser Char
491491 p302p
492492 = baseParser{start =
496496 return (Str (c : [])),
497497 self = P302}
498498
499
500
501499 {-DHUN| parses a HTML &sub entity. DHUN-}
502
500
503501 subp :: MyParser Char
504502 subp
505503 = baseParser{start =
509507 _ <- string ";"
510508 return (Str (c : [])),
511509 self = Sub}
512
510
513511 {-DHUN| parses a HTML &sup entity. DHUN-}
514
512
515513 supp :: MyParser Char
516514 supp
517515 = baseParser{start =
521519 _ <- string ";"
522520 return (Str (c : [])),
523521 self = Sup}
524
522
525523 {-DHUN| parses the start of a new URL. That is the place where a page begin that was downloaded from an URL different from the previous one DHUN-}
526
524
527525 dhunurlp :: MyParser Char
528526 dhunurlp
529527 = baseParser{start =
532530 return (Str ""),
533531 end = \ _ -> string "\n" >> return (), self = DhunUrl,
534532 allowed = [Root, Tag]}
535
533
536534 {-DHUN| parses a Greek HTML entity. So a Greek letter or something similar DHUN-}
537
535
538536 greekp :: MyParser Char
539537 greekp
540538 = baseParser{start =
544542 _ <- char ';'
545543 return (Str (s)),
546544 self = Greek}
547
545
548546 {-DHUN| parses the mediawiki math tag. That is a latex formula in the wiki DHUN-}
549
547
550548 mathp :: MyParser Char
551549 mathp
552550 = (maketagparser ["math"]){allowed =
553551 SpaceIndent : everywhere ++ wikilinkwhere,
554552 self = Math}
555
553
556554 annop :: MyParser Char
557555 annop
558556 = (maketagparser ["annotation"]){allowed =
559 SpaceIndent : everywhere ++ wikilinkwhere,
560 self = Math, reenv = const Tag}
557 SpaceIndent : everywhere ++ wikilinkwhere,
558 self = Math, reenv = const Tag}
561559
562560 {-DHUN| parses a new chapter heading DHUN-}
563
561
564562 chapterp :: MyParser Char
565563 chapterp
566564 = baseParser{start =
571569 string "dhunincludechaper" >> return (Str ""),
572570 end = \ _ -> string "/dhunincludechaper" >> return (),
573571 self = Chapter}
574
572
575573 {-DHUN| parses a horizontal dividing line DHUN-}
576
574
577575 hdevlinep :: MyParser Char
578576 hdevlinep
579577 = baseParser{start =
582580 skipMany (string "-")
583581 return (Str ""),
584582 allowed = [Root], self = HDevLine}
585
583
586584 {-DHUN| parses the mediawiki 'nowiki' tag DHUN-}
587
585
588586 nowikip :: MyParser Char
589587 nowikip
590588 = baseParser{start = \ _ -> string "<nowiki>" >> return (Str ""),
591589 end = \ _ -> string "</nowiki>" >> return (),
592590 allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
593591 self = NoWiki}
594
592
595593 {-DHUN| parses the mediawiki 'noinclude' tag DHUN-}
596
594
597595 noincludep :: MyParser Char
598596 noincludep
599597 = baseParser{start =
603601 try (string "</noinclude>" >> return ()) <|>
604602 lookAhead (eof >> return ()),
605603 self = NoInclude}
606
604
607605 {-DHUN| parses the mediawiki 'includeonly' tag DHUN-}
608
606
609607 includep :: MyParser Char
610608 includep
611609 = baseParser{start =
612610 \ _ -> string "<includeonly>" >> return (Str ""),
613611 end = \ _ -> string "</includeonly>" >> return (),
614612 self = IncludeOnly}
615
613
616614 {-DHUN| parses the mediawiki 'onlyinclude' tag DHUN-}
617
615
618616 includep2 :: MyParser Char
619617 includep2
620618 = baseParser{start =
621619 \ _ -> string "<onlyinclude>" >> return (Str ""),
622620 end = \ _ -> string "</onlyinclude>" >> return (),
623621 self = IncludeOnly}
624
622
625623 {-DHUN| parses the mediawiki 'gallery' tag DHUN-}
626
624
627625 galleryp :: MyParser Char
628626 galleryp
629627 = baseParser{start =
634632 _ <- char '>'
635633 return (Str ""),
636634 end = \ _ -> string "</gallery>" >> return (), self = Gallery}
637
635
638636 {-DHUN| parses a wikilink inside a gallery DHUN-}
639
637
640638 gallerywlp :: MyParser Char
641639 gallerywlp
642640 = baseParser{bad =
650648 return (),
651649 modify = \ _ x -> dropWhile (== (C ' ')) x, allowed = [Gallery],
652650 self = Wikilink}
653
651
654652 {-DHUN| parses the mediawiki 'imagemap' tag DHUN-}
655
653
656654 imagemapp :: MyParser Char
657655 imagemapp
658656 = baseParser{start =
663661 _ <- char '>'
664662 return (Str ""),
665663 end = \ _ -> string "</imagemap>" >> return (), self = ImageMap}
666
664
667665 {-DHUN| parses a wikilink inside and imagemap DHUN-}
668
666
669667 imagemapwlp :: MyParser Char
670668 imagemapwlp
671669 = baseParser{bad =
684682 do _ <- lookAhead (string "\n")
685683 return (),
686684 allowed = [ImageMap], self = Wikilink}
687
685
688686 {-DHUN| matches a sequence of arbitrary characters up to the character (an excluding it) where one of the strings given as first parameter matches DHUN-}
689
687
690688 myany :: [String] -> Parser String
691689 myany x
692690 = do b <- (try (lookAhead (oneOfTheStrings x) >> return False)) <|>
696694 cs <- (myany x)
697695 return (c : cs)
698696 else return ""
699
697
700698 {-DHUN| parses the mediawiki template DHUN-}
701
699
702700 wikitemplatep :: MyParser Char
703701 wikitemplatep
704702 = baseParser{start =
711709 everywhere ++
712710 wikilinkwhere ++ [TemplateInsideVerbatim, SpaceIndent],
713711 self = Template}
714
712
715713 {-DHUN| a special stack frame for parsing the inside of a template DHUN-}
716
714
717715 madframe :: MyStackFrame Char
718716 madframe
719717 = MyStackFrame{endparser =
720718 (try (lookAhead (oneOfTheStrings ["}}", "|", "="]))) >> return (),
721719 startdata = Str "", environment = TemplateInside,
722720 badparser = \ _ -> pzero, parsernumber = 0, nestingdepth = 0}
723
721
724722 {-DHUN| parses the inside of a mediawiki template DHUN-}
725
723
726724 templateinsidep :: MyParser Char
727725 templateinsidep
728726 = baseParser{start =
746744 else return (Str ""),
747745 end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (),
748746 allowed = [Template], self = TemplateInside}
749
747
750748 {-DHUN| parses the inside of a mediawiki template, it is parser verbatim that means inner structures are parsed, but returned as plain characters in the parse tree. Needed for templates use for source codes DHUN-}
751
749
752750 templateinsideverbatimp :: MyParser Char
753751 templateinsideverbatimp
754752 = baseParser{start =
759757 (trim gg) `elem`
760758 ["HaskellGHCiExample",
761759 "\"HaskellGHCi\",Visual Basic .NET: Vorlage:Code",
762 "C++-Programmierung/ Vorlage:Syntax","syntax","Syntax",
763 "C++-Programmierung/ Vorlage:Code","BigJava",
764 "LaTeX/Usage", "LaTeX/LaTeX", "Latex Index"]
760 "C++-Programmierung/ Vorlage:Syntax", "syntax",
761 "Syntax", "C++-Programmierung/ Vorlage:Code",
762 "BigJava", "LaTeX/Usage", "LaTeX/LaTeX",
763 "Latex Index"]
765764 then
766765 do _ <- string "|"
767766 try
776775 end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (),
777776 allowed = [Template], self = TemplateInsideVerbatim,
778777 reenv = const TemplateInside}
779
778
780779 {-DHUN| parses the inside of a template parameter DHUN-}
781
780
782781 wikiparamp :: MyParser Char
783782 wikiparamp
784783 = baseParser{start = \ _ -> string "{{{" >> return (Str ""),
785784 end = \ _ -> string "}}}" >> return (),
786785 allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
787786 self = Parameter}
788
787
789788 {-DHUN| parses a wikilink DHUN-}
790
789
791790 wikilinkp :: MyParser Char
792791 wikilinkp
793792 = baseParser{start = \ _ -> string "[[" >> return (Str ""),
812811 else pzero,
813812 allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
814813 self = Wikilink}
815
814
816815 {-DHUN| parses a wikilink template for wikipedia links DHUN-}
817
816
818817 templatewikilinkp :: MyParser Char
819818 templatewikilinkp
820819 = baseParser{start = \ _ -> string "{{w|" >> return (Str ""),
821820 end = \ _ -> string "}}" >> return (),
822821 allowed = everywhere ++ wikilinkwhere,
823822 modify = \ _ x -> (C 'w') : (C ':') : x, self = Wikilink}
824
823
825824 {-DHUN| parses a link DHUN-}
826
825
827826 linkp :: MyParser Char
828827 linkp
829828 = baseParser{bad =
838837 return (Str (if s == "//" then "http://" else s)),
839838 end = \ _ -> string "]" >> return (), allowed = everywherel,
840839 self = Link}
841
840
842841 {-DHUN| parses a link, in contrast to linkp this does not match inside links and does not require the square bracket notation DHUN-}
843
842
844843 linkp2 :: MyParser Char
845844 linkp2
846845 = baseParser{start =
849848 return (Str s),
850849 end = \ _ -> lookAhead (oneOf " \n\r\t<>|\"") >> return (),
851850 allowed = everywherel2, self = Link2, reenv = const Link}
852
851
853852 {-DHUN| parses a wikitable DHUN-}
854
853
855854 wikitablep :: MyParser Char
856855 wikitablep
857856 = baseParser{start =
858857 \ _ ->
859 do _ <- try (char '\n') <|> return '\n'
858 do _ <- try (char '\n') <|> return '\n'
860859 skipMany (char ' ')
861860 _ <- try (string "{|") <|> try (string "{{(!}}")
862861 s <- many (noneOf "\n")
868867 _ <- try (string "|}") <|> try (string "{{!)}}")
869868 return (),
870869 self = Wikitable}
871
870
872871 {-DHUN| parses a heading, can be a chapter heading as well as a section heading and so on DHUN-}
873
872
874873 wikiheadingp :: MyParser Char
875874 wikiheadingp
876875 = baseParser{bad =
895894 _ <- notFollowedBy (char '=')
896895 return (),
897896 self = Wikiheading, allowed = everywherel2}
898
897
899898 {-DHUN| parses an italic text DHUN-}
900
899
901900 italicp :: MyParser Char
902901 italicp
903902 = baseParser{start =
913912 return (),
914913 allowed = SpaceIndent : Wikitable : Bold : everywherebi,
915914 self = Italic}
916
915
917916 {-DHUN| parses a bold text DHUN-}
918
917
919918 boldp :: MyParser Char
920919 boldp
921920 = baseParser{start =
930929 return (),
931930 allowed = SpaceIndent : Wikitable : Italic : everywherebi,
932931 self = Bold}
933
932
934933 {-DHUN| parses a table caption DHUN-}
935
934
936935 tablecapp :: MyParser Char
937936 tablecapp
938937 = baseParser{bad =
964963 notFollowedBy . char $ '}'
965964 return ()),
966965 allowed = [Wikitable], self = TableCap}
967
966
968967 {-DHUN| parses a table caption with additional parameter given to the beginning of the caption element in the wiki source DHUN-}
969
968
970969 tablecapp2 :: MyParser Char
971970 tablecapp2
972971 = baseParser{start =
989988 <|>
990989 (try ((string "||" >> return ())) <|> (string "!!" >> return ()))),
991990 allowed = [Wikitable], self = TableCap}
992
991
993992 {-DHUN| parses a table caption DHUN-}
994
993
995994 tablecapp3 :: MyParser Char
996995 tablecapp3
997996 = baseParser{start =
10021001 _ <- notFollowedBy (oneOf "-}")
10031002 return (Str "2"),
10041003 allowed = [Wikitable], self = TableCap}
1005
1004
10061005 {-DHUN| parses a table row separator DHUN-}
1007
1006
10081007 rowsepp :: MyParser Char
10091008 rowsepp
10101009 = baseParser{start =
10151014 s <- many (noneOf "\n")
10161015 return (Str s),
10171016 allowed = [Wikitable], self = TableRowSep}
1018
1017
10191018 {-DHUN| parses a table column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See DHUN-}
1020
1019
10211020 colsepp :: MyParser Char
10221021 colsepp
10231022 = baseParser{bad =
10521051 _ <- notFollowedBy (oneOf "}|")
10531052 return ()),
10541053 allowed = [Wikitable], self = TableColSep}
1055
1054
10561055 {-DHUN| parses a column separator without anything inside it DHUN-}
1057
1056
10581057 colsepp2 :: MyParser Char
10591058 colsepp2
10601059 = baseParser{start =
10691068 do _ <- try (string "||") <|> try (string "{{!!}}")
10701069 return (Str "2"),
10711070 allowed = [Wikitable], self = TableColSep}
1072
1071
10731072 {-DHUN| parses a table header column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See DHUN-}
1074
1073
10751074 headcolsepp :: MyParser Char
10761075 headcolsepp
10771076 = baseParser{bad =
11041103 notFollowedBy (oneOf "-}|")
11051104 return (),
11061105 allowed = [Wikitable], self = TableHeadColSep}
1107
1106
11081107 {-DHUN| parses a header column separator without anything inside it DHUN-}
1109
1108
11101109 headcolsepp2 :: MyParser Char
11111110 headcolsepp2
11121111 = baseParser{start =
11261125 attrinside x = try (string "&amp;" >> return '&') <|> (noneOf x)
11271126
11281127 {-DHUN| matches a key value pair . So an attribute of an HTML element DHUN-}
1129
1128
11301129 attr :: GenParser Char () ([Char], [Char])
11311130 attr
11321131 = do skipMany1 (oneOf " \n")
11331132 k <- many1 (try (alphaNum) <|> oneOf ":-")
11341133 v <- try
11351134 (do skipMany (oneOf " \n")
1136 _ <- char '='
1135 _ <- char '='
11371136 skipMany (oneOf " \n")
11381137 vv <- try
11391138 (do _ <- try (char '"')
11541153 return vv)
11551154 <|> return ""
11561155 return (k, v)
1157
1156
11581157 {-DHUN| Matches a key value pair. So an attribute of an HTML element DHUN-}
1159
1158
11601159 attrns :: GenParser Char u ([Char], [Char])
11611160 attrns
11621161 = do k <- many1 (try (alphaNum) <|> oneOf ":-")
11761175 return vv
11771176 _ <- try (many (oneOf " \n")) <|> return []
11781177 return (k, v)
1179
1178
11801179 {-DHUN| Matches a list of key value pairs . So all attributes of an HTML element DHUN-}
1181
1180
11821181 attrp :: MyParser Char
11831182 attrp
11841183 = baseParser{start =
11871186 return (Attr atr),
11881187 allowed = [TableHeadColSep, TableColSep, TableCap],
11891188 self = Attribute}
1190
1189
11911190 {-DHUN| Parses the HTML 'pre' tag DHUN-}
1192
1191
11931192 prep :: MyParser Char
11941193 prep
11951194 = baseParser{start =
12101209 _ <- char '>'
12111210 return (),
12121211 allowed = everywhere, self = Preformat}
1213
1212
12141213 {-DHUN| Parses the HTML 'br' tag DHUN-}
1215
1214
12161215 brparser :: MyParser Char
12171216 brparser
12181217 = baseParser{start =
12271226 _ <- char '>'
12281227 return (TagAttr "br" Map.empty),
12291228 allowed = SpaceIndent : everywhere, self = Tag}
1230
1229
12311230 {-DHUN| Returns a Parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser does not match inside tables. Use makettagparser for that DHUN-}
1232
1231
12331232 maketagparser :: [String] -> MyParser Char
12341233 maketagparser tags
12351234 = baseParser{start =
12501249 _ <- char '>'
12511250 return (),
12521251 allowed = SpaceIndent : everywheretbl, self = Tag}
1253
1252
12541253 {-DHUN| Parser for the 'meta' tag of HTML DHUN-}
1255
1254
12561255 metatagparser :: MyParser Char
12571256 metatagparser
12581257 = baseParser{start =
12651264 _ <- try (char '>') <|> (return '>')
12661265 return (TagAttr (t) (Map.fromList atr)),
12671266 allowed = SpaceIndent : everywhere, self = Tag}
1268
1267
12691268 {-DHUN| Parser for the !DOCTYPE tag of HTML DHUN-}
1270
1269
12711270 doctagparser :: MyParser Char
12721271 doctagparser
12731272 = baseParser{start =
12781277 _ <- char '>'
12791278 return (TagAttr (t) (Map.fromList [])),
12801279 allowed = SpaceIndent : everywhere, self = Tag}
1281
1280
12821281 {-DHUN| Parser for closing HTML tags that have not matching opening tag. DHUN-}
1283
1282
12841283 ctagparser :: [String] -> GenParser Char () ()
12851284 ctagparser tags
12861285 = do _ <- string "</"
12881287 _ <- try (many (oneOf " \n")) <|> return []
12891288 _ <- char '>'
12901289 return ()
1291
1290
12921291 {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. DHUN-}
1293
1292
12941293 maketagparser2 :: [String] -> MyParser Char
12951294 maketagparser2 tags
12961295 = baseParser{start =
13031302 _ <- char '>'
13041303 return (TagAttr (t) (Map.fromList atr)),
13051304 allowed = Wikitable : SpaceIndent : everywheretbl, self = Tag}
1306
1305
13071306 {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may not be self closing so not like <br/> because of the tailing /. DHUN-}
1308
1307
13091308 maketagparser3 :: [String] -> MyParser Char
13101309 maketagparser3 tags
13111310 = baseParser{start =
13171316 _ <- char '>'
13181317 return (TagAttr (t) (Map.fromList atr)),
13191318 allowed = SpaceIndent : everywheretbl, self = Tag}
1320
1319
13211320 {-DHUN| tags that can not be nested and are thus allowed to closed by an opening tag instead of a closing one DHUN-}
1322
1321
13231322 nonNestTags :: [String]
13241323 nonNestTags
13251324 = ["tt", "pre", "TT", "PRE", "b", "B", "i", "I", "sc", "SC",
13261325 "code", "CODE"]
1327
1326
13281327 {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only inside tables. Use maketagparser for that DHUN-}
1329
1328
13301329 makettagparser :: [String] -> MyParser Char
13311330 makettagparser tags
13321331 = baseParser{bad =
13511350 _ <- char '>'
13521351 return (),
13531352 allowed = [Wikitable], self = Tag}
1354
1353
13551354 {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. Matches only inside tables DHUN-}
1356
1355
13571356 makettagparser2 :: [String] -> MyParser Char
13581357 makettagparser2 tags
13591358 = baseParser{bad =
13721371 _ <- char '>'
13731372 return (TagAttr (t) (Map.fromList atr)),
13741373 allowed = [Wikitable], self = Tag}
1375
1374
13761375 {-DHUN| maketagparser for all HTML elements see documentation on function maketagparser DHUN-}
1377
1376
13781377 tagparser :: MyParser Char
13791378 tagparser = maketagparser listOfTags
1380
1379
13811380 {-DHUN| maketagparser for the 'pre' HTML tag see documentation on function maketagparser DHUN-}
1382
1381
13831382 tagparserp :: MyParser Char
13841383 tagparserp = maketagparser ["pre"]
1385
1384
13861385 {-DHUN| maketagparser for the HTML tags for HTML tables see documentation on function maketagparser DHUN-}
1387
1386
13881387 tagparsert :: MyParser Char
13891388 tagparsert
13901389 = (maketagparser listOfTableTags){self = TableTag,
13911390 reenv = const Tag}
1392
1391
13931392 {-DHUN| makettagparser for all HTML elements see documentation on function makettagparser DHUN-}
1394
1393
13951394 ttagparser :: MyParser Char
13961395 ttagparser = makettagparser listOfTags
1397
1396
13981397 {-DHUN| makettagparser for the 'pre' HTML tag see documentation on function makettagparser DHUN-}
1399
1398
14001399 ttagparserp :: MyParser Char
14011400 ttagparserp = makettagparser ["pre"]
1402
1401
14031402 {-DHUN| makettagparser for the HTML tags for HTML tables see documentation on function makettagparser DHUN-}
1404
1403
14051404 ttagparsert :: MyParser Char
14061405 ttagparsert
14071406 = (makettagparser listOfTableTags){self = TableTag,
14081407 reenv = const Tag}
1409
1408
14101409 {-DHUN| maketagparser2 for all HTML elements see documentation on function maketagparser2 DHUN-}
1411
1410
14121411 tagparser2 :: MyParser Char
14131412 tagparser2 = maketagparser2 listOfTags
1414
1413
14151414 {-DHUN| maketagparser2 for the 'pre' HTML tag see documentation on function maketagparser2 DHUN-}
1416
1415
14171416 tagparser2p :: MyParser Char
14181417 tagparser2p = maketagparser2 ["pre"]
1419
1418
14201419 {-DHUN| maketagparser2 for the HTML tags for HTML tables see documentation on function maketagparser2 DHUN-}
1421
1420
14221421 tagparser2t :: MyParser Char
14231422 tagparser2t
14241423 = (maketagparser2 listOfTableTags){self = TableTag,
14251424 reenv = const Tag}
1426
1425
14271426 {-DHUN| makettagparser2 for all HTML elements see documentation on function makettagparser2 DHUN-}
1428
1427
14291428 ttagparser2 :: MyParser Char
14301429 ttagparser2 = makettagparser2 listOfTags
1431
1430
14321431 {-DHUN| makettagparser2 for the 'pre' HTML tag see documentation on function makettagparser2 DHUN-}
1433
1432
14341433 ttagparser2p :: MyParser Char
14351434 ttagparser2p = makettagparser2 ["pre"]
1436
1435
14371436 {-DHUN| makettagparser2 for the HTML tags for HTML tables see documentation on function makettagparser2 DHUN-}
1438
1437
14391438 ttagparser2t :: MyParser Char
14401439 ttagparser2t
14411440 = (makettagparser2 listOfTableTags){self = TableTag,
14421441 reenv = const Tag}
1443
1442
14441443 {-DHUN| a parser for mediawiki source extension tags DHUN-}
1445
1444
14461445 tagparsers :: MyParser Char
14471446 tagparsers
14481447 = (maketagparser ["source", "syntaxhighlight"]){self = Source}
1449
1448
14501449 {-DHUN| a parser for mediawiki source extension tags inside tables DHUN-}
1451
1450
14521451 ttagparsers :: MyParser Char
14531452 ttagparsers
14541453 = (maketagparser ["source", "syntaxhighlight"]){self = Source,
14551454 allowed = [Wikitable]}
1456
1455
14571456 {-DHUN| a parser for HTML tables DHUN-}
1458
1457
14591458 mytablep :: MyParser Char
14601459 mytablep
14611460 = (maketagparser ["table"]){self = TableTag,
14621461 reenv = const Wikitable,
14631462 allowed = Wikitable : SpaceIndent : everywheretbl}
1464
1463
14651464 {-DHUN| a parser for HTML table rows DHUN-}
1466
1465
14671466 mytrsepp :: MyParser Char
14681467 mytrsepp
14691468 = (maketagparser3 ["tr"]){reenv = const TableRowSep,
14701469 allowed = everywhere}
1471
1470
14721471 {-DHUN| a parser for normal HTML table cells DHUN-}
1473
1472
14741473 mytcolsepp :: MyParser Char
14751474 mytcolsepp
14761475 = (maketagparser3 ["td"]){reenv = const TableColSep,
14771476 allowed = everywhere}
1478
1477
14791478 {-DHUN| a parser for HTML table captions cells DHUN-}
1480
1479
14811480 mytcapp :: MyParser Char
14821481 mytcapp
14831482 = (maketagparser ["caption"]){reenv = const TableCap,
14841483 allowed = everywhere}
1485
1484
14861485 {-DHUN| a parser for HTML table header cells, so th tags DHUN-}
1487
1486
14881487 mythcolsepp :: MyParser Char
14891488 mythcolsepp
14901489 = (maketagparser3 ["th"]){reenv = const TableHeadColSep,
14911490 allowed = everywhere}
1492
1491
14931492 {-DHUN| a parser for HTML tables for html parse mode only DHUN-}
1494
1493
14951494 htmytablep :: MyParser Char
14961495 htmytablep = (maketagparser ["table"])
1497
1496
14981497 {-DHUN| a parser for HTML table rows for html parse mode only DHUN-}
1499
1498
15001499 htmytrsepp :: MyParser Char
15011500 htmytrsepp = (maketagparser ["tr"])
1502
1501
15031502 {-DHUN| a parser for normal HTML table cells for html parse mode only DHUN-}
1504
1503
15051504 htmytcolsepp :: MyParser Char
15061505 htmytcolsepp = (maketagparser ["td"])
1507
1506
15081507 {-DHUN| a parser for HTML table captions cells for html parse mode only DHUN-}
1509
1508
15101509 htmytcapp :: MyParser Char
15111510 htmytcapp = (maketagparser ["caption"])
1512
1511
15131512 {-DHUN| a parser for HTML table header cells, so th tags for html parse mode only DHUN-}
1514
1513
15151514 htmythcolsepp :: MyParser Char
15161515 htmythcolsepp = (maketagparser ["th"])
1517
1516
15181517 {-DHUN| a parser for closing HTML tags which don't have an opening partner. This parser is only allowed to match within itemization enumerations etc. DHUN-}
1519
1518
15201519 itagparser :: MyParser Char
15211520 itagparser
15221521 = baseParser{start =
15291528 _ <- char '>'
15301529 return (TagAttr (t) (Map.fromList atr)),
15311530 allowed = [Itemgroup], self = Tag}
1532
1531
15331532 {-DHUN| a parser for closing HTML tags which don't have an opening partner DHUN-}
1534
1533
15351534 rtagparser :: MyParser Char
15361535 rtagparser
15371536 = baseParser{start =
15421541 atr <- many (try (attr))
15431542 _ <- try (many (oneOf " \n")) <|> return []
15441543 _ <- char '>'
1545 return (TagAttr ("bad"++s) (Map.fromList atr)),
1544 return (TagAttr ("bad" ++ s) (Map.fromList atr)),
15461545 allowed = everywhere, self = Tag}
1547
1546
15481547 {-DHUN| a parser for HTML opening tags which might be self closing but never have a matching closing partner DHUN-}
1549
1548
15501549 stagparser :: MyParser Char
15511550 stagparser
15521551 = baseParser{start =
15591558 _ <- char '>'
15601559 return (TagAttr t (Map.fromList atr)),
15611560 self = Tag, allowed = []}
1562
1561
15631562 {-DHUN| a parser for HTML page breaks DHUN-}
1564
1563
15651564 pagebreakp :: MyParser Char
15661565 pagebreakp
15671566 = baseParser{start =
15691568 do _ <- string "<div style=\"page-break-before:always\"/>"
15701569 return (Str ""),
15711570 end = \ _ -> return (), allowed = everywhere, self = PageBreak}
1572
1571
15731572 {-DHUN| a parser for HTML comments DHUN-}
1574
1573
15751574 commentp :: MyParser Char
15761575 commentp
15771576 = baseParser{start =
15841583 do _ <- string "-->"
15851584 return (),
15861585 self = Comment}
1587
1586
15881587 {-DHUN| a parser for mediawiki reserved words DHUN-}
1589
1588
15901589 reservedp :: MyParser Char
15911590 reservedp
15921591 = baseParser{start =
16021601 <|> string "&nbsp;"
16031602 return (Str s),
16041603 self = Reserved, allowed = SpaceIndent : everywhere}
1605
1604
16061605 {-DHUN| See documentation on evaluateItemgroup and itemParserLevelTwoDHUN-}
1607
1606
16081607 itemStartString :: Anything Char -> String
16091608 itemStartString (ItemStart x) = x : []
16101609 itemStartString _ = ""
1611
1610
16121611 {-DHUN| See documentation on evaluateItemgroup and itemParserLevelTwo DHUN-}
1613
1612
16141613 itemStopString :: Anything Char -> String
16151614 itemStopString (ItemStop x) = x : []
16161615 itemStopString _ = ""
1617
1616
16181617 {-DHUN| see documentation on evaluateItemgroup. Parser to parse the ItemStart and ItemStop elements which are essentially bracket into environments which are essentially nodes with children in the parse tree DHUN-}
1619
1618
16201619 itemParserLevelTwo :: MyParser (Anything Char)
16211620 itemParserLevelTwo
16221621 = MyParser{bad = \ _ -> pzero,
16341633 return (),
16351634 allowed = [Root, ItemEnv], self = ItemEnv, modify = \ _ x -> x,
16361635 reenv = id}
1637
1636
16381637 {-DHUN| see documentation on evaluateItemgroup. helper function to generate a list of only opening or only closing brackets, that is ItemStart or ItemStop values. The string given as first parameter states which kinds of bracket shall be opened or closed. For Openning brackets you have to call this function ItemStart as second and True as third parameter. For closing brackets you have to use ItemStop and False DHUN-}
1639
1638
16401639 generateEnvironmentTagsHelper ::
16411640 String -> (Char -> Anything Char) -> Bool -> [Anything Char]
16421641 generateEnvironmentTagsHelper (c : cs) t b
16441643 (t c) : ([Item c]) ++ (generateEnvironmentTagsHelper cs t b) else
16451644 (generateEnvironmentTagsHelper cs t b) ++ [(t c)]
16461645 generateEnvironmentTagsHelper [] _ _ = []
1647
1646
16481647 {-DHUN| see documentation on evaluateItemgroup and insertEnvironmentTags. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second is the same for the current ItemLine. This function return a list of ItemStart and ItemStop values for the difference between the first and the second parameter DHUN-}
1649
1648
16501649 generateEnvironmentTags :: String -> String -> [Anything Char]
16511650 generateEnvironmentTags (o : os) (n : ns)
16521651 = if (o == n) then generateEnvironmentTags os ns else
16571656 generateEnvironmentTags [] (n : ns)
16581657 = generateEnvironmentTagsHelper (n : ns) ItemStart True
16591658 generateEnvironmentTags [] [] = []
1660
1659
16611660 {-DHUN| see documentation on evaluateItemgroup. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted DHUN-}
1662
1661
16631662 insertEnvironmentTags ::
16641663 String -> [Anything Char] -> [Anything Char]
16651664 insertEnvironmentTags s ((Environment ItemLine (Str x) _) : xs)
16771676 ++ insertEnvironmentTags x xs)
16781677 insertEnvironmentTags s (x : xs) = x : insertEnvironmentTags s xs
16791678 insertEnvironmentTags _ [] = []
1680
1679
16811680 {-DHUN| see documentation on evaluateItemgroup. Inserts the ItemStart and ItemStop values as preparation for running the second level parse on the content of an ItemGroup. The first pararmenter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted DHUN-}
1682
1681
16831682 toEnvironmentTags :: String -> [Anything Char] -> [Anything Char]
16841683 toEnvironmentTags s l
16851684 = insertEnvironmentTags ""
16861685 ((Environment ItemLine (Str s) []) :
16871686 l ++ [(Environment ItemLine (Str "") [])])
1688
1687
16891688 {-DHUN| see documentation on evaluateItemgroup. Get the second level parse result, back to the first level parse result DHUN-}
1690
1689
16911690 convertFromParsingLevelTwoToLevelOne ::
16921691 [Anything (Anything Char)] -> [Anything Char]
16931692 convertFromParsingLevelTwoToLevelOne ((C x) : xs)
17041703 convertFromParsingLevelTwoToLevelOne (_ : xs)
17051704 = (convertFromParsingLevelTwoToLevelOne xs)
17061705 convertFromParsingLevelTwoToLevelOne [] = []
1707
1706
17081707 {-DHUN| see documentation on evaluateItemgroup. Runs the parser itemParserLevelTwo on the inner part of an ItemGroup DHUN-}
1709
1708
17101709 runItemGroupPraserLevelTwo ::
17111710 GenParser (Anything Char) () [Anything (Anything Char)] ->
17121711 [Anything Char] -> [Anything (Anything Char)]
17141713 = case (parse p "" input) of
17151714 Left _ -> []
17161715 Right x -> x
1717
1716
17181717 {-DHUN| this is for parsing itemization. It is implemented as two step process in the first step Itemgroup is created with ItemLine s in it. And ItemLine is a line starting with any combination of *#: . And Itemgroup is a sequence of lines of that kind. After this is done this function gets called with the content of an itemgroup. In the second step ItemStart and ItemStop elements are created, those are essentially the bracketed of the opening an closing part of the enumerations itemizations etc. . The function toEnvironmentTags does the insertion of the ItemStop and ItemStart values and remove the ItemLine values. parseAnything2 is run on it using the itemParserLevelTwo. This is turned into the parse tree by being passed to decon2 and put out of the parser monad by runItemGroupPraserLevelTwo. The resulting parse tree is one of type [Anything (Anything Char)] this needs to be but one level down to [Anything Char] this is done be convertFromParsingLevelTwoToLevelOne. The first parameter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup DHUN-}
1719
1718
17201719 evaluateItemgroup :: String -> [Anything Char] -> [Anything Char]
17211720 evaluateItemgroup s l
17221721 = convertFromParsingLevelTwoToLevelOne
17291728 (remake [itemParserLevelTwo])
17301729 []))
17311730 (toEnvironmentTags s l))
1732
1731
17331732 {-DHUN| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is allowed to match nearly everywhere DHUN-}
1734
1733
17351734 itempgroupp :: MyParser Char
17361735 itempgroupp
17371736 = MyParser{bad = \ _ -> pzero,
17481747 notFollowedBy ((oneOf ['*', ':', ';', '#'])))),
17491748 allowed = [Root, Wikitable, TemplateInside, Tag], self = Itemgroup,
17501749 modify = \ (Str x) -> evaluateItemgroup x, reenv = id}
1751
1750
17521751 {-DHUN| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is only allowed to match within templates DHUN-}
1753
1752
17541753 itempgrouppt :: MyParser Char
17551754 itempgrouppt
17561755 = MyParser{bad = \ _ -> pzero,
17611760 return (Str a),
17621761 end =
17631762 \ _ ->
1764 (try (lookAhead (string "}}") <|> (lookAhead (string "\n">>(many (char ' ')) >>string"|"))) >>
1765 return ()),
1763 (try
1764 (lookAhead (string "}}") <|>
1765 (lookAhead (string "\n" >> (many (char ' ')) >> string "|")))
1766 >> return ()),
17661767 allowed = [TemplateInside], self = Itemgroup,
17671768 modify = \ (Str x) -> evaluateItemgroup x, reenv = id}
1768
1769
17691770 {-DHUN| a parser for a preformat created by indenting with space DHUN-}
1770
1771
17711772 presectionp :: MyParser Char
17721773 presectionp
17731774 = MyParser{bad = \ _ -> try (string "}}") >> (return ()),
17851786 modify = \ _ x -> (C ' ') : x, reenv = id}
17861787
17871788 {-DHUN| a parser for a preformat created by indenting with space withing templates DHUN-}
1788
1789
17891790 presectionpt :: MyParser Char
17901791 presectionpt
17911792 = MyParser{bad = \ _ -> try (string "}}") >> (return ()),
17921793 start =
17931794 \ _ ->
17941795 do _ <- string "\n"
1795 _ <- notFollowedBy ((many1 (char ' '))>>(char '|'))
1796 _ <- notFollowedBy ((many1 (char ' ')) >> (char '|'))
17961797 _ <- char ' '
17971798 return (Str ""),
17981799 end =
17991800 \ _ ->
18001801 lookAhead
18011802 (do _ <- string "\n"
1802 notFollowedBy (char ' ')) ,
1803 notFollowedBy (char ' ')),
18031804 allowed = [TemplateInside], self = SpaceIndent,
18041805 modify = \ _ x -> (C ' ') : x, reenv = id}
18051806
1806
18071807 {-DHUN| a parser for a line starting with one of *:;# representing in enumeration itemization etc. DHUN-}
1808
1808
18091809 itemlinep :: MyParser Char
18101810 itemlinep
18111811 = MyParser{bad = \ _ -> pzero,
18171817 end = \ _ -> (return ()), allowed = [Itemgroup], self = ItemLine,
18181818 modify = \ _ x -> x, reenv = id}
18191819
1820
18211820 {-DHUN| takes a parse tree that was created form the HTML returned by MediaWiki when being requested for the print version of a wiki page. And returns a modified version of that parse tree ready for being converted to LaTeX with treeToLaTeX3 |DHUN-}
1822
1821
18231822 printPrepareTree :: [Anything Char] -> [Anything Char]
18241823 printPrepareTree ll = concat (map printPrepareNode ll)
1825 where
1826 printPrepareNode :: Anything Char -> [Anything Char]
1824 where printPrepareNode :: Anything Char -> [Anything Char]
18271825 printPrepareNode (Environment Tag (TagAttr "div" mm) l)
18281826 | (Map.lookup "class" mm) == (Just "thumbinner") =
18291827 case
18401838 . (filter magnpred)
18411839 $ tt
18421840 _ -> mzero
1843 return $ imgfun m llll (Just tt)
1841 return $ imgfun m (printPrepareTree llll) (Just (printPrepareTree tt))
18441842 of
18451843 Just x -> x
18461844 _ -> printPrepareTree l
18471845 printPrepareNode (Environment Wikitable (TagAttr "table" m) _)
18481846 | (Map.lookup "class" m) == (Just "toc") = []
1849 printPrepareNode (Environment Tag (TagAttr "tbody" _) x) = printPrepareTree x
1847 printPrepareNode (Environment Tag (TagAttr "tbody" _) x)
1848 = printPrepareTree x
18501849 printPrepareNode (Environment Tag (TagAttr "div" m) _)
18511850 | ((Map.lookup "class" m) == (Just "toc") ||
18521851 (Map.lookup "id" m) == (Just "toc"))
18911890 te <- case deepGet "div" "class" "gallerytext" l of
18921891 [Environment Tag (TagAttr "div" _) te] -> return te
18931892 _ -> mzero
1894 return $ imgfun mmm lll (Just te))
1893 return $ imgfun mmm (printPrepareTree lll) (Just te))
18951894 of
18961895 Just x -> x
18971896 _ -> printPrepareTree l
19151914 | (Map.lookup "class" m) == (Just "mw-editsection") = []
19161915 printPrepareNode (Environment Tag (TagAttr "a" m) l)
19171916 = case (Map.lookup "class" m) of
1918 (Just "image") -> imgfun m l Nothing
1917 (Just "image") -> imgfun m (printPrepareTree l) Nothing
19191918 _ -> case (Map.lookup "class" m) of
19201919 (Just "external free") -> [Environment Tag (TagAttr "a" m) []]
19211920 _ -> [Environment Tag (TagAttr "a" m) l]
19221921 printPrepareNode (Environment Tag (TagAttr "div" m) _)
19231922 | (Map.lookup "class" m) == (Just "bodyContent") = []
1924
19251923 printPrepareNode (Environment Tag (TagAttr "math" m) l)
1926 = case deepGet "annotation" "encoding" "application/x-tex" l of
1927 [Environment Tag (TagAttr "annotation" _) x] -> [Environment Math (TagAttr "math" m)
1928 (map C
1929 (replace2 (replace2 (replace2 (shallowFlatten x) "&amp;" "&") "&lt;" "<") "gt;"
1930 ">"))]
1931 _ -> []
1924 = case deepGet "annotation" "encoding" "application/x-tex" l of
1925 [Environment Tag (TagAttr "annotation" _) x] -> [Environment Math
1926 (TagAttr "math" m)
1927 (map C
1928 (replace2
1929 (replace2
1930 (replace2
1931 (shallowFlatten x)
1932 "&amp;"
1933 "&")
1934 "&lt;"
1935 "<")
1936 "&gt;"
1937 ">"))]
1938 _ -> []
19321939 printPrepareNode (Environment Tag (TagAttr "img" m) l)
1933 | (Map.lookup "class" m) `elem` (map Just ["tex","mwe-math-fallback-png-inline tex","mwe-math-fallback-image-inline tex"]) =
1940 | (Map.lookup "class" m) `elem`
1941 (map Just
1942 ["tex", "mwe-math-fallback-png-inline tex",
1943 "mwe-math-fallback-image-inline tex"])
1944 =
19341945 case Map.lookup "alt" m of
19351946 Just x -> [Environment Math (TagAttr "math" m)
19361947 (map C
19411952 printPrepareNode (Environment Tag (TagAttr "div" m) l)
19421953 = case
19431954 do c <- Map.lookup "class" m
1944 guard $ ((isInfixOf "source" c)||(isInfixOf "highlight" c))
1955 guard $ ((isInfixOf "source" c) || (isInfixOf "highlight" c))
19451956 return $
19461957 Environment Source
19471958 (TagAttr "source"
19531964 printPrepareNode (Environment x y l)
19541965 = [Environment x y (printPrepareTree l)]
19551966 printPrepareNode x = [x]
1956
1967
19571968 mypred :: String -> Anything Char -> Bool
19581969 mypred x y
19591970 = case y of
19601971 (Environment Tag (TagAttr z _) _) | z == x -> True
19611972 _ -> False
1962
1973
19631974 magnpred :: Anything Char -> Bool
19641975 magnpred y
19651976 = case y of
20202031 Just x -> return $ [C '|'] ++ (map C (x ++ "px"))
20212032 Nothing -> return []
20222033 return (Environment Wikilink (Str "") ((map C h) ++ w ++ t))
2023
2024
00 {-DHUN| module storing information on which ttf font should be used for which character and fontstyle DHUN-}
11 module MegaFont where
22 import BaseFont
3
3
44 {-DHUN| map (written as list) storing information on which ttf font should be used for which character and fontstyle DHUN-}
5
5
66 megafont :: [(FontStyle, [Char])]
77 megafont
88 = [(FontStyle{stylebase = Normal, bold = False, italic = False},
44 import Control.Monad.Trans.State (State)
55 import MediaWikiParseTree
66 import BaseFont
7
7
88 {-DHUN| a type used as mutable state while processing a table. See documentation of the TableHelper module DHUN-}
9
9
1010 data TableState = TableState{rowCounter :: Int,
1111 inputLastRowOfHeader :: Int, outputLastRowOfHeader :: Int,
1212 outputTableHasHeaderRows :: Bool,
1818 currentRowIsHeaderRow :: Bool,
1919 lastCellWasNotFirstCellOfRow :: Bool, columnsWidthList :: [Float],
2020 lastCellWasMultiColumn :: Bool, activeColumn :: Maybe Int}
21
21
2222 {-DHUN| see documentation of the makeLables function in WikiHelper module DHUN-}
23
23
2424 data UrlState = UrlState{iUrlState :: Int, sUrlState :: String,
2525 mUrlState :: Map String String}
2626 deriving (Show, Eq, Read)
27
27
2828 {-DHUN| see initial value of the type UrlState DHUN-}
29
29
3030 initialUrlState :: UrlState
3131 initialUrlState
3232 = UrlState{iUrlState = 0, sUrlState = "", mUrlState = Map.empty}
33
33
3434 {-DHUN| a type used as mutable state during the course of the LaTeXRederer DHUN-}
35
35
3636 data MyState = MyState{getImages :: [String], getJ :: Int,
3737 getF :: Float, getC :: Int, getInTab :: Int, getInGallery :: Bool,
3838 getInFootnote :: Bool, getInHeading :: Bool, getInCenter :: Bool,
4141 urld :: WikiUrlData, getGalleryNumbers :: [Integer],
4242 currentUrl :: String, fndict :: Map String [Anything Char],
4343 tablist :: [[String]], tabmap :: Map Int (Map Int Double),
44 fontStack :: [FontStyle], font :: Font, langu:: Maybe String, forms::Map String Int, lastChar::Char, lastFontChanged::Bool}
44 fontStack :: [FontStyle], font :: Font, langu :: Maybe String,
45 forms :: Map String Int, lastChar :: Char, lastFontChanged :: Bool}
4546 deriving (Show, Eq)
46
47
4748 {-DHUN| Renderer is the State monad using MyState as mutable state DHUN-}
48
49
4950 type Renderer = State MyState
50
51
5152 {-DHUN| the initial value for MyState DHUN-}
52
53
5354 initialState :: MyState
5455 initialState
5556 = MyState{getImages = [], getJ = 1, getF = 1, getC = 1,
6162 tabmap = Map.empty,
6263 fontStack =
6364 [FontStyle{stylebase = Normal, bold = False, italic = False}],
64 font = ComputerModernRoman, langu=Nothing, forms=Map.empty , lastChar=' ', lastFontChanged=False}
65
65 font = ComputerModernRoman, langu = Nothing, forms = Map.empty,
66 lastChar = ' ', lastFontChanged = False}
67
6668 {-DHUN| represents an URL to a wiki (not to a page thereof), which is not a sister project of wikipedia, so not wikibooks wikisource, etc. DHUN-}
67
69
6870 data WikiBaseUrl = WikiBaseUrl{baseUrl :: String}
6971 deriving (Show, Eq)
70
72
7173 {-DHUN| represents an URL to a wiki (not to a page thereof), which is a sister project of wikipedia, so wikibooks wikisource, etc. DHUN-}
72
74
7375 data WikiUrlInfo = WikiUrlInfo{language :: String,
7476 wikitype :: String}
7577 deriving (Show, Eq)
76
78
7779 {-DHUN| represents an URL to a wiki (not to a page thereof), which is either a sister project of wikipedia, so wikibooks wikisource, etc. or isn't a sister project of wikipedia DHUN-}
78
80
7981 data WikiUrlData = BaseUrl WikiBaseUrl
8082 | UrlInfo WikiUrlInfo
8183 deriving (Show, Eq)
82
84
8385 {-DHUN| represents an URL to a page on a wiki DHUN-}
84
86
8587 data WikiLinkInfo = WikiLinkInfo{urldata :: WikiUrlData,
8688 page :: String}
8789 deriving (Show, Eq)
00 {-DHUN| module for parallel computations. The paralellism is reached by forking threads with liftIO. When starting a thread and empty MVar is returned immediately. The MVar is filled on completing of the thread. Normal function can be lifted to threaded function operation on MVar of the parameters instead of the parameters themself. Thus it is possible to define dependencies of function on the return values of other functions and let the order of excecution evolove automatically, so a function is called as soon as all its parameters have been calculated. All functions considered in this module are understood to have an IO return type. This module is mainly used for paraller downloading from the web. DHUN-}
11 module Parallel where
22 import Control.Concurrent.MVar
3
3
44 {-DHUN| takes a function of return type IO which takes on parameter and an MVar containing at the same type as the parameter of the function and returns and IO action containing an MVar containing the same type as the tyoe contained in the IO Action returned by the function. This function retuns immediately and passed an empty MVar as return type. As soon as the MVar containing the same type as the parameter of the function is ready for reading the the function is executed in a new thread. As soon as the function return a value, the it is written into the returned MVar. DHUN-}
5
5
66 (<$$>) :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b)
77 (<$$>) vv f x
88 = do var <- newEmptyMVar
1212 = do xx <- readMVar x
1313 result <- f xx
1414 putMVar v result
15
15
1616 {-DHUN| takes a value and returns an IO action that contain a filled MVar conatining the value DHUN-}
17
17
1818 base :: MVar Int -> a -> IO (MVar a)
19 base v x = ((ppure v). return) x
20
19 base v x = ((ppure v) . return) x
20
2121 {-DHUN| takes an IO action and return an MVar containing the and IO action the returns an MVar containing the same type as contained by the IO action passed to this function. The IO action passed as first parameter to this function is executed in a new thread. As soon as it finishes its result is written into the returned MVar DHUN-}
22
23 ppure :: MVar Int -> IO a -> IO (MVar a)
22
23 ppure :: MVar Int -> IO a -> IO (MVar a)
2424 ppure vv x
2525 = do var <- newEmptyMVar
2626 _ <- myFork vv (go var)
3131
3232 {-DHUN| alias to switch between forkOS and forkIO for testing DHUN-}
3333
34 myFork :: MVar Int -> IO () -> IO ()
34 myFork :: MVar Int -> IO () -> IO ()
3535 myFork _ x = x
3636
37 {-DHUN| takes a function which takes a list and return an IO action as first parameter. It takes a list of MVar containing the same type as the type contained in the list as second parameter. It returnes an empty MVar immediately. As soon as all MVar in the list given as second parameter could be read, the function is started in a new thread. As soon as it finishes, the returend MVar is filled with the value returend by the function DHUN-}
3738
38
39 {-
40 do i<-takeMVar v
41 if i<1
42 then do putMVar v $!! (i+1)
43 forkIO $ do x
44 ii<-takeMVar v
45 putMVar v $!! (ii-1)
46 else do putMVar v $!! i
47 threadDelay 1000
48 myFork v x
49
50 -}
51 {-DHUN| takes a function which takes a list and return an IO action as first parameter. It takes a list of MVar containing the same type as the type contained in the list as second parameter. It returnes an empty MVar immediately. As soon as all MVar in the list given as second parameter could be read, the function is started in a new thread. As soon as it finishes, the returend MVar is filled with the value returend by the function DHUN-}
52
5339 liftList :: MVar Int -> ([a] -> IO b) -> [MVar a] -> IO (MVar b)
5440 liftList vv f x
5541 = do var <- newEmptyMVar
5945 = do xx <- mapM readMVar x
6046 result <- f xx
6147 putMVar v result
62
48
6349 {-DHUN| the same as liftList. Only difference is that the seconds parameter is not a list of MVars containing values but an MVar containing a list of MVars containing values. So an addinal step is made to read the out MVar DHUN-}
64
65 liftList2 :: MVar Int -> ([a] -> IO b) -> MVar [MVar a] -> IO (MVar b)
50
51 liftList2 ::
52 MVar Int -> ([a] -> IO b) -> MVar [MVar a] -> IO (MVar b)
6653 liftList2 vv f x
6754 = do var <- newEmptyMVar
6855 _ <- myFork vv (go var)
7259 xx <- mapM readMVar xxx
7360 result <- f xx
7461 putMVar v result
75
62
7663 {-DHUN| prefix version of the angle bracked dollar operator described above DHUN-}
77
78 liftA :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b)
64
65 liftA :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b)
7966 liftA v f x = (<$$>) v f x
80
67
8168 {-DHUN| same as liftA. Just function must have exactly two parameter instead of only one DHUN-}
82
83 liftA2 :: MVar Int -> (a -> b -> IO c) -> MVar a -> MVar b -> IO (MVar c)
69
70 liftA2 ::
71 MVar Int -> (a -> b -> IO c) -> MVar a -> MVar b -> IO (MVar c)
8472 liftA2 vv f x y
8573 = do var <- newEmptyMVar
8674 _ <- myFork vv (go var)
9078 yy <- readMVar y
9179 result <- f xx yy
9280 putMVar v result
93
81
9482 {-DHUN| same as liftA. Just function must have exactly three parameter instead of only one DHUN-}
95
83
9684 liftA3 ::
97 MVar Int -> (a -> b -> c -> IO d) -> MVar a -> MVar b -> MVar c -> IO (MVar d)
85 MVar Int ->
86 (a -> b -> c -> IO d) -> MVar a -> MVar b -> MVar c -> IO (MVar d)
9887 liftA3 vv f x y z
9988 = do var <- newEmptyMVar
10089 _ <- myFork vv (go var)
0
01 module SelfTest where
1 runSelfTest::Integer->Integer->IO ()
2
3 runSelfTest :: Integer -> Integer -> IO ()
24 runSelfTest _ _ = return ()
88 import System.IO
99 import Control.Monad.Trans
1010 import Data.ByteString.UTF8
11 import Data.ByteString.Lazy hiding (pack, reverse, takeWhile, dropWhile)
11 import Data.ByteString.Lazy
12 hiding (pack, reverse, takeWhile, dropWhile)
1213 import Control.Concurrent
1314 import Control.Monad.State
1415 import ImperativeState hiding (name)
1516 import Hex
1617 import Data.Map.Strict
17 import Data.Maybe
18 import Data.Maybe
1819 import System.Process hiding (cwd)
1920 import Data.List
2021 import Text.Blaze.Internal
2324 import Control.DeepSeq
2425 import System.Exit
2526
26 mytext ::String-> H.Html
27 mytext :: String -> H.Html
2728 mytext = text . pack
2829
29 pageFrame :: H.Html-> H.Html
30 pageFrame input = H.table H.! A.style "min-width:100%;border:0px;padding:0;border-spacing:0px 0px;" $
31 do H.tr H.! A.style "min-width:100%;border:0px;padding:0" $
32 do H.td H.! A.style "background-color:#444444" H.! A.colspan "3" $
33 do H.div H.! A.style "color:white;font-size:60;border:20px;font-family:Arial,Helvetica,sans-serif;" $ H.b $
34 do mytext "MediaWiki2LaTeX"
35 H.tr H.! A.style "min-width:100%;border:0px;padding:0" $
36 do H.td H.! A.style "background-color:#222222" H.! A.colspan "3" $ H.div H.! A.style "padding: 5px;padding-left: 10px" $ H.div H.! A.style "padding:3px;display:inline;background-color:#595959;border-radius:3px" $ H.div H.! A.style "font-family: times, serif;display:inline;font-size:20;color:#cccccc" $ "Home"
37 H.tr $
38 do H.td $ H.table H.! A.style "padding:20px" $
39 do H.tr $ infoBox "MediaWiki to LaTeX" "MediaWiki to LaTeX converts Wiki pages to LaTeX and PDF. It works with any website running MediaWiki, especially Wikipedia and Wikibooks. MediaWiki to LaTeX is purely written in the purely functional language Haskell. It was mainly devolved by Dirk Hünniger. The source code is freely available under the terms of the GNU General Public License. Binary releases for the most common operating systems are available for download. The Debian package is maintained by Georges Khaznadar."
40 H.tr $ infoBox "Contact" $
41 do mytext "Dirk Hünniger"
42 H.br
43 mytext "Emil Schweitzer Str. S 10"
44 H.br
45 mytext "D-47506 Neukirchen Vluyn"
46 H.br
47 mytext "Germany"
48 H.br
49 mytext "Telephone ++49-2845-3799993"
50 H.br
51 H.a H.! A.href "mailto:dirk.hunniger@googlemail.com?Subject=MediaWiki2LaTeX" $ "dirk.hunniger@googlemail.com"
52 input
53 H.td $ H.table H.! A.style "padding:20px" $
54 do H.tr $ infoBox "Documentation and Links" $
55 do H.ul $
56 do H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf/manual" $ "Users Manual"
57 H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" $ "Project Wiki Page"
58 H.li $ H.div H.! A.style "font-size:x-large" $ do
59 H.b $ H.a H.! A.href "https://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" $ "Download Full Version for Free"
60
61 H.li $ H.a H.! A.href "http://sourceforge.net/p/wb2pdf/git/ci/master/tree/" $ "Git Sourcecode Repository"
62 H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer_Diskussion:Dirk_Huenniger/wb2pdf/Requests" $ "Bug Tracker"
63 H.tr $ infoBox "Web Interface" $
64 do mytext "The web interface was artistically designed by Thomas Blume using "
65 H.a H.! A.href "http://www.yaml.org/" $ "YAML"
66 mytext ". It was technically implemented by Dirk Hünniger using the Blaze Html Framework in Haskell. Important ideas on the design of the user interface were contributed by Georges Khaznadar and Martin Wermers. The server this interface is running on is run by a private person, who has to cover the costs, so "
67 H.b "please consider downloading the full version for free"
68 mytext " and running at on your own hardware instead of using this server."
69
70
71
72 infoBox :: String->H.Html->H.Html
73 infoBox heading content = H.td $ H.div H.! A.style "padding:10px" $H.div H.! A.style "text-align:jusitfy;text-justify:inter-word;padding:20px;border-color:#aaaaaa;background-color:#f4f4f4;border-radius:5px;border-width:1px;border-style:solid;font-family:Arial,Helvetica,sans-serif" $
74 do H.div H.! A.style "font-size:30" $ H.b ( mytext heading)
75 H.br
76 H.div H.! A.style "text-align:jusitfy" $content
77
78
79
80 data ProgressInfo = ProgressInfo { progress :: Double, filename:: Maybe String , startTime::Double, barValue::Double, lastStepTime::Double, lastRuntime::Double, extension::String, failed::Bool}
81
82
83 instance NFData ProgressInfo
84 where
85 rnf a = a `seq` ()
30 pageFrame :: H.Html -> H.Html
31 pageFrame input
32 = H.table H.!
33 A.style
34 "min-width:100%;border:0px;padding:0;border-spacing:0px 0px;"
35 $
36 do H.tr H.! A.style "min-width:100%;border:0px;padding:0" $
37 do H.td H.! A.style "background-color:#444444" H.! A.colspan "3" $
38 do H.div H.!
39 A.style
40 "color:white;font-size:60;border:20px;font-family:Arial,Helvetica,sans-serif;"
41 $ H.b $ do mytext "MediaWiki2LaTeX"
42 H.tr H.! A.style "min-width:100%;border:0px;padding:0" $
43 do H.td H.! A.style "background-color:#222222" H.! A.colspan "3" $
44 H.div H.! A.style "padding: 5px;padding-left: 10px" $
45 H.div H.!
46 A.style
47 "padding:3px;display:inline;background-color:#595959;border-radius:3px"
48 $
49 H.div H.!
50 A.style
51 "font-family: times, serif;display:inline;font-size:20;color:#cccccc"
52 $ "Home"
53 H.tr $
54 do H.td $
55 H.table H.! A.style "padding:20px" $
56 do H.tr $
57 infoBox "MediaWiki to LaTeX"
58 "MediaWiki to LaTeX converts Wiki pages to LaTeX and PDF. It works with any website running MediaWiki, especially Wikipedia and Wikibooks. MediaWiki to LaTeX is purely written in the purely functional language Haskell. It was mainly devolved by Dirk H\252nniger. The source code is freely available under the terms of the GNU General Public License. Binary releases for the most common operating systems are available for download. The Debian package is maintained by Georges Khaznadar."
59 H.tr $
60 infoBox "Contact" $
61 do mytext "Dirk H\252nniger"
62 H.br
63 mytext "Emil Schweitzer Str. S 10"
64 H.br
65 mytext "D-47506 Neukirchen Vluyn"
66 H.br
67 mytext "Germany"
68 H.br
69 mytext "Telephone ++49-2845-3799993"
70 H.br
71 H.a H.!
72 A.href
73 "mailto:dirk.hunniger@googlemail.com?Subject=MediaWiki2LaTeX"
74 $ "dirk.hunniger@googlemail.com"
75 input
76 H.td $
77 H.table H.! A.style "padding:20px" $
78 do H.tr $
79 infoBox "Documentation and Links" $
80 do H.ul $
81 do H.li $
82 H.a H.!
83 A.href
84 "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf/manual"
85 $ "Users Manual"
86 H.li $
87 H.a H.!
88 A.href
89 "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf"
90 $ "Project Wiki Page"
91 H.li $
92 H.div H.! A.style "font-size:x-large" $
93 do H.b $
94 H.a H.!
95 A.href
96 "https://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf"
97 $ "Download Full Version for Free"
98 H.li $
99 H.a H.!
100 A.href "http://sourceforge.net/p/wb2pdf/git/ci/master/tree/"
101 $ "Git Sourcecode Repository"
102 H.li $
103 H.a H.!
104 A.href
105 "http://de.wikibooks.org/wiki/Benutzer_Diskussion:Dirk_Huenniger/wb2pdf/Requests"
106 $ "Bug Tracker"
107 H.tr $
108 infoBox "Web Interface" $
109 do mytext
110 "The web interface was artistically designed by Thomas Blume using "
111 H.a H.! A.href "http://www.yaml.org/" $ "YAML"
112 mytext
113 ". It was technically implemented by Dirk H\252nniger using the Blaze Html Framework in Haskell. Important ideas on the design of the user interface were contributed by Georges Khaznadar and Martin Wermers. The server this interface is running on is run by a WMF, who have to cover the costs, so "
114 H.b "please consider downloading the full version for free"
115 mytext
116 " and running at on your own hardware instead of using this server."
117
118 infoBox :: String -> H.Html -> H.Html
119 infoBox heading content
120 = H.td $
121 H.div H.! A.style "padding:10px" $
122 H.div H.!
123 A.style
124 "text-align:jusitfy;text-justify:inter-word;padding:20px;border-color:#aaaaaa;background-color:#f4f4f4;border-radius:5px;border-width:1px;border-style:solid;font-family:Arial,Helvetica,sans-serif"
125 $
126 do H.div H.! A.style "font-size:30" $ H.b (mytext heading)
127 H.br
128 H.div H.! A.style "text-align:jusitfy" $ content
129
130 data ProgressInfo = ProgressInfo{progress :: Double,
131 filename :: Maybe String, startTime :: Double, barValue :: Double,
132 lastStepTime :: Double, lastRuntime :: Double, extension :: String,
133 failed :: Bool}
134
135 instance NFData ProgressInfo where
136 rnf a = a `seq` ()
86137
87138 {-DHUN| IO action to run the server DHUN-}
88
139
89140 serve :: Int -> IO ()
90 serve p = do a<-newMVar Data.Map.Strict.empty
91 simpleHTTP nullConf{port = p, timeout = 100000} $ msum [dirs "progress" $ path $ \subject -> progressBar a subject, dirs "file" $ path $ \subject -> fileFrame a subject, dirs "fill" $ path $ \subject -> formPage a subject, formPage a ""]
92
93
94
141 serve p
142 = do a <- newMVar Data.Map.Strict.empty
143 simpleHTTP nullConf{port = p, timeout = 100000} $
144 msum
145 [dirs "progress" $ path $ \ subject -> progressBar a subject,
146 dirs "file" $ path $ \ subject -> fileFrame a subject,
147 dirs "fill" $ path $ \ subject -> formPage a subject,
148 formPage a ""]
95149
96150 {-DHUN| template for the start page of the server DHUN-}
97
151
98152 template :: Text -> H.Html -> Response
99153 template title body
100 = toResponse $ H.docTypeHtml $
101 do H.head $ do H.meta H.! A.charset "utf-8"
102 H.title (H.toHtml title)
154 = toResponse $
155 H.docTypeHtml $
156 do H.head $
157 do H.meta H.! A.charset "utf-8"
158 H.title (H.toHtml title)
103159 H.body $ do body
104
160
105161 {-DHUN| takes an url to a wiki article and a filename for the temporary file to be created and return a shell command to run mediawiki2latex to compile a pdf document from the given url and write it to the given temporary filename DHUN-}
106
162
107163 mainAction :: FullConfig -> IO String
108164 mainAction oldcfg
109165 = do cwd <- getCurrentDirectory
110 let cfg = oldcfg {mainPath = cwd}
166 let cfg = oldcfg{mainPath = cwd}
111167 return (hex (show cfg))
112
168
113169 {-DHUN| main webpage of the server containing a from with an entry for an url to a wiki article with returns the pdf on submission of the form DHUN-}
114
170
115171 gogo :: Eq b => [(a, b)] -> b -> b
116 gogo ((_,v):xs) vv = if vv==v then
117 case xs of
118 (h:_)->(snd h)
119 _->v
120 else
121 gogo xs vv
172 gogo ((_, v) : xs) vv
173 = if vv == v then
174 case xs of
175 (h : _) -> (snd h)
176 _ -> v
177 else gogo xs vv
122178 gogo [] vv = vv
123179
124 progressBar :: MVar (Map Int ProgressInfo) -> String->ServerPart Response
180 progressBar ::
181 MVar (Map Int ProgressInfo) -> String -> ServerPart Response
125182 progressBar t sub
126 = do let theIndex=case (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int,String)] of
127 ((k,_):_)->k
128 _ -> 1
129 m<-liftIO (takeMVar t)
130 let uu = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m))
131 let nextKnownRelativeProgressToBeReached = gogo mylist (progress uu)
132 let lastReachedKnownRelativeProgress= progress uu
133 let timeOfLastReachedKnownRelativeProgress= lastStepTime uu
134 ttime <- liftIO getPOSIXTime
135 let time= (realToFrac ttime)
136 let expectedRuntime=if lastReachedKnownRelativeProgress<0.01 then 60.0 else (time-(startTime uu))/lastReachedKnownRelativeProgress
137 let runtime=time-startTime uu
138 let p=lastReachedKnownRelativeProgress+(1-exp(-(time-timeOfLastReachedKnownRelativeProgress)/(expectedRuntime*(nextKnownRelativeProgressToBeReached-lastReachedKnownRelativeProgress))))*(nextKnownRelativeProgressToBeReached-lastReachedKnownRelativeProgress)
139 let oldProgressBarValue = barValue uu
140 let progressBarValue=oldProgressBarValue+(max((p-oldProgressBarValue)*(runtime-(lastRuntime uu))/5.0) 0.0)
141 let prog=if lastReachedKnownRelativeProgress==1.0 then 1000 else if failed uu then 0 else round(progressBarValue*1000.0)::Integer
142 liftIO $ if not (member theIndex m) then putMVar t m else putMVar t $!! (Data.Map.Strict.insert theIndex uu{barValue=progressBarValue,lastRuntime=runtime} m )
143 case filename uu of
144 Nothing -> do method GET
145 ok $
146 template "Converting" $ pageFrame $ infoBox (if not (member theIndex m) then "Not enough resources availiable to process your request! Your request has been dropped! Please download the full version for free and run it on your own computer!" else (if not (failed uu) then"Conversion Running" else "Conversion Failed due to timeout or non zero exit code")) $
147 do H.meta H.! A.httpEquiv "refresh" H.! A.content "1"
148 H.table $
149 do H.tr $
150 do H.td $
151 do H.progress H.! A.style wwidth H.! A.value (stringValue (show (prog))) H.! A.max "1000" $ ""
152 Just _ -> do method GET
153 ok $
154 template "Conversion Finished" $ pageFrame $ infoBox "Conversion Finished" $
155 do H.meta H.! A.httpEquiv "refresh" H.! A.content (stringValue ("0;url=/file/"++(show theIndex)++"."++(extension uu)))
156 H.table $
157 do H.tr $
158 do H.td $
159 do H.progress H.! A.style wwidth H.! A.value (stringValue (show (1000::Integer))) H.! A.max "1000" $ ""
160
161
162 fileFrame :: MVar (Map Int ProgressInfo) -> String->ServerPart Response
163 fileFrame t sub
164 = do let theIndex=case (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int,String)] of
165 ((k,_):_)->k
166 _ -> 1
167 m<-liftIO (takeMVar t)
168 liftIO $ putMVar t m
169 let uu = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m))
170 case filename uu of
171 Just x-> do f <- serveFile (guessContentTypeM mimeTypes) x
172 _ <- liftIO (forkIO (do threadDelay 2000000
173 removeFile x))
174 return f
175 Nothing -> do method GET
176 ok $
177 template "Conversion Failed" $ pageFrame $ infoBox "Conversion Failed" $ (mytext "We are sorry the converion failed, please contact our us")
178
183 = do let theIndex
184 = case
185 (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int, String)] of
186 ((k, _) : _) -> k
187 _ -> 1
188 m <- liftIO (takeMVar t)
189 let uu
190 = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m))
191 let nextKnownRelativeProgressToBeReached
192 = gogo mylist (progress uu)
193 let lastReachedKnownRelativeProgress = progress uu
194 let timeOfLastReachedKnownRelativeProgress = lastStepTime uu
195 ttime <- liftIO getPOSIXTime
196 let time = (realToFrac ttime)
197 let expectedRuntime
198 = if lastReachedKnownRelativeProgress < 1.0e-2 then 60.0 else
199 (time - (startTime uu)) / lastReachedKnownRelativeProgress
200 let runtime = time - startTime uu
201 let p = lastReachedKnownRelativeProgress +
202 (1 -
203 exp
204 (-(time - timeOfLastReachedKnownRelativeProgress) /
205 (expectedRuntime *
206 (nextKnownRelativeProgressToBeReached -
207 lastReachedKnownRelativeProgress))))
208 *
209 (nextKnownRelativeProgressToBeReached -
210 lastReachedKnownRelativeProgress)
211 let oldProgressBarValue = barValue uu
212 let progressBarValue
213 = oldProgressBarValue +
214 (max
215 ((p - oldProgressBarValue) * (runtime - (lastRuntime uu)) / 5.0)
216 0.0)
217 let prog
218 = if lastReachedKnownRelativeProgress == 1.0 then 1000 else
219 if failed uu then 0 else
220 round (progressBarValue * 1000.0) :: Integer
221 liftIO $
222 if not (member theIndex m) then putMVar t m else
223 putMVar t $!!
224 (Data.Map.Strict.insert theIndex
225 uu{barValue = progressBarValue, lastRuntime = runtime}
226 m)
227 case filename uu of
228 Nothing -> do method GET
229 ok $
230 template "Converting" $
231 pageFrame $
232 infoBox
233 (if not (member theIndex m) then
234 "Not enough resources availiable to process your request! Your request has been dropped! Please download the full version for free and run it on your own computer!"
235 else
236 (if not (failed uu) then "Conversion Running" else
237 "Conversion Failed due to timeout or non zero exit code"))
238 $
239 do H.meta H.! A.httpEquiv "refresh" H.! A.content "1"
240 H.table $
241 do H.tr $
242 do H.td $
243 do H.progress H.! A.style wwidth H.!
244 A.value (stringValue (show (prog)))
245 H.! A.max "1000"
246 $ ""
247 Just _ -> do method GET
248 ok $
249 template "Conversion Finished" $
250 pageFrame $
251 infoBox "Conversion Finished" $
252 do H.meta H.! A.httpEquiv "refresh" H.!
253 A.content
254 (stringValue
255 ("0;url=/file/" ++
256 (show theIndex) ++ "." ++ (extension uu)))
257 H.table $
258 do H.tr $
259 do H.td $
260 do H.progress H.! A.style wwidth H.!
261 A.value (stringValue (show (1000 :: Integer)))
262 H.! A.max "1000"
263 $ ""
264
265 fileFrame ::
266 MVar (Map Int ProgressInfo) -> String -> ServerPart Response
267 fileFrame t sub
268 = do let theIndex
269 = case
270 (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int, String)] of
271 ((k, _) : _) -> k
272 _ -> 1
273 m <- liftIO (takeMVar t)
274 liftIO $ putMVar t m
275 let uu
276 = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m))
277 case filename uu of
278 Just x -> do f <- serveFile (guessContentTypeM mimeTypes) x
279 _ <- liftIO
280 (forkIO
281 (do threadDelay 2000000
282 removeFile x))
283 return f
284 Nothing -> do method GET
285 ok $
286 template "Conversion Failed" $
287 pageFrame $
288 infoBox "Conversion Failed" $
289 (mytext "We are sorry the converion failed, please contact our us")
179290
180291 currentlyrunning :: Map Int ProgressInfo -> Bool
181 currentlyrunning m = not (Data.List.all (\x->(isJust (filename x))||(failed x)) (Data.Map.Strict.elems m))
182
292 currentlyrunning m
293 = not
294 (Data.List.all (\ x -> (isJust (filename x)) || (failed x))
295 (Data.Map.Strict.elems m))
296
183297 wwidth2 :: [Char]
184 wwidth2="width:400px"
298 wwidth2 = "width:400px"
299
185300 wwidth :: AttributeValue
186 wwidth=stringValue wwidth2
187 formPage :: MVar (Map Int ProgressInfo) -> String -> ServerPart Response
301 wwidth = stringValue wwidth2
302
303 formPage ::
304 MVar (Map Int ProgressInfo) -> String -> ServerPart Response
188305 formPage m s
189306 = do decodeBody (defaultBodyPolicy "/tmp/" 0 1000000 1000000)
190307 msum [viewForm, processForm]
191 where
192 viewForm :: ServerPart Response
308 where viewForm :: ServerPart Response
193309 viewForm
194310 = do method GET
195311 ok $
196 template "MediaWiki2LaTeX" $ pageFrame $ infoBox "Create Your PDF" $
197 do mytext "To compile MediaWiki pages via LaTeX to PDF choose any URL from "
198 H.a H.! A.href "http://en.wikipedia.org/" $ "Wikibooks"
199 mytext " or any other website running MediaWiki. If you intent to compile a wikibook make sure you use the link to the printable version of the book."
200 H.br
201 H.div H.! A.style "font-size:20" $ H.b $ mytext "Send your request"
202 H.form H.! A.action "/form" H.! A.enctype "multipart/form-data" H.!
203 A.method "POST"
204 $ H.div $
205 do
206 H.table $
207 do H.tr $
208 do H.td "URL to the Wiki to be converted"
209 H.td $
210 do H.input H.! A.style wwidth H.! A.type_ "text" H.! A.id "msg" H.! A.name "msg" H.! A.value (stringValue s)
211 H.tr $
212 do H.td "Output Format"
213 H.td $ do
214 H.select H.! A.style wwidth H.! A.name "output" $
215 do H.option H.! A.value "pdf" $ "Compiled PDF"
216 H.option H.! A.value "zip" $ "Source ZIP"
217 H.option H.! A.value "epub" $ "EPUB File"
218 H.option H.! A.value "odt" $ "ODT File (Word Processor)"
219 H.tr $
220 do H.td "Template expansion"
221 H.td $ do
222 H.select H.! A.style wwidth H.! A.name "expansion" $
223 do H.option H.! A.value "Print" $ "Print"
224 H.option H.! A.value "BookMode" $ "BookMode"
225 H.option H.! A.value "MediaWiki" $ "Mediawiki"
226 H.option H.! A.value "Normal" $ "Normal"
227 H.tr $
228 do H.td "Paper"
229 H.td $ do
230 H.select H.! A.style wwidth H.! A.name "paper" $
231 do H.option H.! A.value "A4" $ "A4"
232 H.option H.! A.value "A5" $ "A5"
233 H.option H.! A.value "B5" $ "B5"
234 H.option H.! A.value "letter" $ "letter"
235 H.option H.! A.value "legal" $ "legal"
236 H.option H.! A.value "executive" $ "executive"
237 H.tr $
238 do H.td "Vector graphics"
239 H.td $ do
240 H.select H.! A.style wwidth H.! A.name "vector" $
241 do H.option H.! A.value "Rasterize" $ "Rasterize"
242 H.option H.! A.value "Keep Vector Form" $ "Keep Vector Form"
243 H.tr $
244 do H.td ""
245 H.td $
246 do H.input H.! A.style (stringValue (wwidth2++";height:60px")) H.! A.type_ "submit" H.! A.value "Start!"
247 H.div H.! A.style "text-decoration:underline" $ mytext "Please note:"
248 H.br
249 mytext "The LaTeX source code will be compiled several times to make sure all references are resolved. The whole process will usually take about one minute but can take up to an hour depending on the extend of your request."
250
251
252
312 template "MediaWiki2LaTeX" $
313 pageFrame $
314 infoBox "Create Your PDF" $
315 do mytext
316 "To compile MediaWiki pages via LaTeX to PDF choose any URL from "
317 H.a H.! A.href "http://en.wikipedia.org/" $ "Wikibooks"
318 mytext
319 " or any other website running MediaWiki. If you intent to compile a wikibook make sure you use the link to the printable version of the book."
320 H.br
321 H.div H.! A.style "font-size:20" $ H.b $ mytext "Send your request"
322 H.form H.! A.action "/form" H.! A.enctype "multipart/form-data" H.!
323 A.method "POST"
324 $
325 H.div $
326 do H.table $
327 do H.tr $
328 do H.td "URL to the Wiki to be converted"
329 H.td $
330 do H.input H.! A.style wwidth H.! A.type_ "text" H.!
331 A.id "msg"
332 H.! A.name "msg"
333 H.! A.value (stringValue s)
334 H.tr $
335 do H.td "Output Format"
336 H.td $
337 do H.select H.! A.style wwidth H.! A.name "output" $
338 do H.option H.! A.value "pdf" $ "Compiled PDF"
339 H.option H.! A.value "zip" $ "Source ZIP"
340 H.option H.! A.value "epub" $ "EPUB File"
341 H.option H.! A.value "odt" $
342 "ODT File (Word Processor)"
343 H.tr $
344 do H.td "Template expansion"
345 H.td $
346 do H.select H.! A.style wwidth H.! A.name "expansion" $
347 do H.option H.! A.value "Print" $ "Print"
348 H.option H.! A.value "BookMode" $ "BookMode"
349 H.option H.! A.value "MediaWiki" $ "Mediawiki"
350 H.option H.! A.value "Normal" $ "Normal"
351 H.tr $
352 do H.td "Paper"
353 H.td $
354 do H.select H.! A.style wwidth H.! A.name "paper" $
355 do H.option H.! A.value "A4" $ "A4"
356 H.option H.! A.value "A5" $ "A5"
357 H.option H.! A.value "B5" $ "B5"
358 H.option H.! A.value "letter" $ "letter"
359 H.option H.! A.value "legal" $ "legal"
360 H.option H.! A.value "executive" $ "executive"
361 H.tr $
362 do H.td "Vector graphics"
363 H.td $
364 do H.select H.! A.style wwidth H.! A.name "vector" $
365 do H.option H.! A.value "Rasterize" $ "Rasterize"
366 H.option H.! A.value "Keep Vector Form" $
367 "Keep Vector Form"
368 H.tr $
369 do H.td ""
370 H.td $
371 do H.input H.!
372 A.style (stringValue (wwidth2 ++ ";height:60px"))
373 H.! A.type_ "submit"
374 H.! A.value "Start!"
375 H.div H.! A.style "text-decoration:underline" $
376 mytext "Please note:"
377 H.br
378 do mytext "The LaTeX source code will be compiled several times to make sure all references are resolved. The whole process will usually take about one minute."
379 H.br
380 H.br
381 H.div H.! A.style "font-size:large" $ H.b "There is a time limit of one hour (≈ 200 pages in PDF) on this server!"
382 H.br
383 mytext "Requests taking longer will be terminated and a \"Conversion Failed due to timeout or non zero exit code\" message will be displayed. There is no limit in the downloadable versions shown on the right."
384
253385 getRunmode "Print" = ImperativeState.HTML
254 getRunmode "MediaWiki" = ImperativeState.ExpandedTemplates
386 getRunmode "MediaWiki" = ImperativeState.ExpandedTemplates
255387 getRunmode "Normal" = ImperativeState.StandardTemplates
256388 getRunmode "BookMode" = ImperativeState.Book
257389 getRunmode _ = ImperativeState.HTML
258
390
259391 processForm :: ServerPart Response
260392 processForm
261393 = do msg <- lookBS "msg"
264396 expansion <- lookBS "expansion"
265397 output <- lookBS "output"
266398 zzz <- liftIO $
267 do tmpDir <- getTemporaryDirectory
268 (name, handle) <- openTempFile tmpDir ("MediaWiki2LaTeX"++if (toString (toStrict output))== "zip" then "zip" else if (toString (toStrict output))== "epub" then "epub" else if (toString (toStrict output))== "odt" then "odt" else "pdf")
269 hClose handle >> removeFile name
270 act <- mainAction FullConfig{selfTest=Nothing,headers = Nothing, resolution = 300,
271 outputFilename = name, inputUrl = (toString (toStrict msg)),
272 runMode = if (isInfixOf ("Book:"::[Char]) (toString (toStrict msg))) then getRunmode ("BookMode"::[Char]) else getRunmode (toString (toStrict expansion)), paper = (toString (toStrict paperOpt)), vector = (toString (toStrict vectorOpt))=="Keep Vector Form",
273 ImperativeState.copy = Nothing, mainPath = "", server = Nothing,outputType=if (toString (toStrict output)) == "zip" then ImperativeState.ZipArchive else if (toString (toStrict output)) == "epub" then ImperativeState.EPubFile else if (toString (toStrict output)) == "odt" then ImperativeState.OdtFile else ImperativeState.PlainPDF, compile=Nothing, imgctrb=Nothing}
274 yy<-newEmptyMVar
275 mm<-takeMVar m
276 _<-if (not (currentlyrunning mm))
277 then
278 do _<-forkIO $ do (i,o,e,h) <- runInteractiveCommand ("mediawiki2latex -x "++act)
279 tt<-getPOSIXTime
280 let ss = (if (toString (toStrict output))== "zip" then "zip" else if (toString (toStrict output))== "epub" then "epub" else if (toString (toStrict output))== "odt" then "odt" else "pdf")
281 zz<-forkProgressDriver o m name ss
282 putMVar yy zz
283 ex h i e ((realToFrac tt)+3600.0) m zz ss
284 return ()
285 else
286 do putMVar yy (-1)
287 return ()
288 putMVar m mm
289
290 zzzz<-takeMVar yy
291 return zzzz
399 do tmpDir <- getTemporaryDirectory
400 (name, handle) <- openTempFile tmpDir
401 ("MediaWiki2LaTeX" ++
402 if (toString (toStrict output)) == "zip" then
403 "zip" else
404 if (toString (toStrict output)) == "epub" then
405 "epub" else
406 if (toString (toStrict output)) == "odt" then
407 "odt" else "pdf")
408 hClose handle >> removeFile name
409 act <- mainAction
410 FullConfig{selfTest = Nothing, headers = Nothing,
411 resolution = 300, outputFilename = name,
412 inputUrl = (toString (toStrict msg)),
413 runMode =
414 if
415 (isInfixOf ("Book:" :: [Char])
416 (toString (toStrict msg)))
417 then getRunmode ("BookMode" :: [Char]) else
418 getRunmode (toString (toStrict expansion)),
419 paper = (toString (toStrict paperOpt)),
420 vector =
421 (toString (toStrict vectorOpt)) ==
422 "Keep Vector Form",
423 ImperativeState.copy = Nothing, mainPath = "",
424 server = Nothing,
425 outputType =
426 if (toString (toStrict output)) == "zip" then
427 ImperativeState.ZipArchive else
428 if (toString (toStrict output)) == "epub" then
429 ImperativeState.EPubFile else
430 if (toString (toStrict output)) == "odt" then
431 ImperativeState.OdtFile else
432 ImperativeState.PlainPDF,
433 compile = Nothing, imgctrb = Nothing}
434 yy <- newEmptyMVar
435 mm <- takeMVar m
436 _ <- if (not (currentlyrunning mm)) then
437 do _ <- forkIO $
438 do (i, o, e, h) <- runInteractiveCommand
439 ("mediawiki2latex -x " ++ act)
440 tt <- getPOSIXTime
441 let ss
442 = (if (toString (toStrict output)) == "zip"
443 then "zip" else
444 if (toString (toStrict output)) == "epub"
445 then "epub" else
446 if (toString (toStrict output)) == "odt"
447 then "odt" else "pdf")
448 zz <- forkProgressDriver o m name ss
449 putMVar yy zz
450 ex h i e ((realToFrac tt) + 3600.0) m zz ss
451 return ()
452 else
453 do putMVar yy (-1)
454 return ()
455 putMVar m mm
456 zzzz <- takeMVar yy
457 return zzzz
292458 method POST
293 ok $ toResponse $ template "Redirect" $
294 do H.meta H.! A.httpEquiv "refresh" H.! A.content (stringValue ("0;url=/progress/"++(show zzz)++"."++(if (toString (toStrict output))== "zip" then "html" else "html")))
295
296 ex :: ProcessHandle -> Handle -> Handle -> Double -> MVar (Map Int ProgressInfo)-> Int -> String -> IO ()
459 ok $
460 toResponse $
461 template "Redirect" $
462 do H.meta H.! A.httpEquiv "refresh" H.!
463 A.content
464 (stringValue
465 ("0;url=/progress/" ++
466 (show zzz) ++
467 "." ++
468 (if (toString (toStrict output)) == "zip" then "html" else
469 "html")))
470
471 ex ::
472 ProcessHandle ->
473 Handle ->
474 Handle ->
475 Double -> MVar (Map Int ProgressInfo) -> Int -> String -> IO ()
297476 ex h i e t m n s
298 = do tt<-getPOSIXTime
299 threadDelay 100
300 if (realToFrac tt) > t
301 then
302 do System.IO.hPutStr i "\n"
303 mm<-takeMVar m
304 putMVar m (Data.Map.Strict.update (\x->Just x{failed=True}) n mm)
305 else return ()
306 xi<-hIsOpen i
307 if xi then hFlush i else return ()
308 xo<-hIsOpen e
309 _<- if xo then System.IO.hGetContents e else return ""
310 y<-getProcessExitCode h
311 case y of
312 Just ee -> if ExitSuccess == ee
313 then do threadDelay 3000000
314 mm<-takeMVar m
315 putMVar m (Data.Map.Strict.insertWith (\new old-> old{progress=progress new,lastStepTime=realToFrac tt}) n progressInfoZero{progress=1.0} mm)
316 mmm<-takeMVar m
317 putMVar m mmm
318 case Data.Map.Strict.lookup n mmm of
319 Just yy -> do _ <- takeMVar m
320 putMVar m (Data.Map.Strict.insert n yy{filename=Just s} mmm)
321 _ -> return ()
322 else
323 do mm<-takeMVar m
324 putMVar m (Data.Map.Strict.update (\x->Just x{failed=True}) n mm)
325
326 _-> if (realToFrac tt) > t then return () else ex h i e t m n s
477 = do tt <- getPOSIXTime
478 threadDelay 100
479 if (realToFrac tt) > t then
480 do System.IO.hPutStr i "\n"
481 mm <- takeMVar m
482 putMVar m
483 (Data.Map.Strict.update (\ x -> Just x{failed = True}) n mm)
484 else return ()
485 xi <- hIsOpen i
486 if xi then hFlush i else return ()
487 xo <- hIsOpen e
488 _ <- if xo then System.IO.hGetContents e else return ""
489 y <- getProcessExitCode h
490 case y of
491 Just ee -> if ExitSuccess == ee then
492 do threadDelay 3000000
493 mm <- takeMVar m
494 putMVar m
495 (Data.Map.Strict.insertWith
496 (\ new old ->
497 old{progress = progress new, lastStepTime = realToFrac tt})
498 n
499 progressInfoZero{progress = 1.0}
500 mm)
501 mmm <- takeMVar m
502 putMVar m mmm
503 case Data.Map.Strict.lookup n mmm of
504 Just yy -> do _ <- takeMVar m
505 putMVar m
506 (Data.Map.Strict.insert n yy{filename = Just s} mmm)
507 _ -> return ()
508 else
509 do mm <- takeMVar m
510 putMVar m
511 (Data.Map.Strict.update (\ x -> Just x{failed = True}) n mm)
512 _ -> if (realToFrac tt) > t then return () else ex h i e t m n s
513
327514 mylist :: [([Char], Double)]
328 mylist=[("downloading article and contributor information", 0.02540431143798292),("parsing article text", 0.057625916241286344),("forking threads to download of images and contributor information on them", 0.058045207449988465),("precompiling table columns", 0.08075814224942594),("joining threads to download the images and contributor information on them", 0.37003149457779727),("preparing for PDF generation", 0.5479855803098518),("preparing images for LaTeX document", 0.637605216120732),("generating PDF file. LaTeX run 1 of 4", 0.6911489294291799),("generating PDF file. LaTeX run 2 of 4", 0.7673758195622185),("generating PDF file. LaTeX run 3 of 4", 0.8463397892914045),("generating PDF file. LaTeX run 4 of 4", 0.9231746180088297),("finished", 1.0)]
329
515 mylist
516 = [("downloading article and contributor information",
517 2.540431143798292e-2),
518 ("parsing article text", 5.7625916241286344e-2),
519 ("forking threads to download of images and contributor information on them",
520 5.8045207449988465e-2),
521 ("precompiling table columns", 8.075814224942594e-2),
522 ("joining threads to download the images and contributor information on them",
523 0.37003149457779727),
524 ("preparing for PDF generation", 0.5479855803098518),
525 ("preparing images for LaTeX document", 0.637605216120732),
526 ("generating PDF file. LaTeX run 1 of 4", 0.6911489294291799),
527 ("generating PDF file. LaTeX run 2 of 4", 0.7673758195622185),
528 ("generating PDF file. LaTeX run 3 of 4", 0.8463397892914045),
529 ("generating PDF file. LaTeX run 4 of 4", 0.9231746180088297),
530 ("finished", 1.0)]
330531
331532 wwait :: Handle -> IO ()
332 wwait h = do b<-hIsEOF h
333 if b
334 then
335 do threadDelay 1000
336 wwait h
337 else return ()
338
339 progressDriver::Int->Handle->MVar (Map Int ProgressInfo)->String->IO ()
340 progressDriver n o t s = do xo<-hIsOpen o
341 threadDelay 100
342 tt <- getPOSIXTime
343 l<- if xo then do wwait o
344 hGetLine o
345 else return ""
346 case msum (Data.List.map (\(k,v)-> if isInfixOf k l then Just v else Nothing) mylist) of
347 Just x -> do m<-takeMVar t
348 putMVar t (Data.Map.Strict.insertWith (\new old-> old{progress=progress new,lastStepTime=realToFrac tt}) n progressInfoZero{progress=x} m)
349 _ -> return ()
350 m<-readMVar t
351 case Data.Map.Strict.lookup n m of
352 Just yy | (progress yy)==1.0 -> do _<-takeMVar t
353 putMVar t (Data.Map.Strict.insert n yy{filename=Just s} m)
354 Just yy | (failed yy)->return ()
355 _ -> progressDriver n o t s
356 progressInfoZero::ProgressInfo
357 progressInfoZero = ProgressInfo {progress=0.0,filename=Nothing,startTime=0.0,failed=False,barValue=0.0,lastStepTime=0.0,lastRuntime=0.0,extension=""}
358
359 forkProgressDriver::Handle->MVar (Map Int ProgressInfo)->String->String->IO Int
533 wwait h
534 = do b <- hIsEOF h
535 if b then
536 do threadDelay 1000
537 wwait h
538 else return ()
539
540 progressDriver ::
541 Int -> Handle -> MVar (Map Int ProgressInfo) -> String -> IO ()
542 progressDriver n o t s
543 = do xo <- hIsOpen o
544 threadDelay 100
545 tt <- getPOSIXTime
546 l <- if xo then
547 do wwait o
548 hGetLine o
549 else return ""
550 case
551 msum
552 (Data.List.map
553 (\ (k, v) -> if isInfixOf k l then Just v else Nothing)
554 mylist)
555 of
556 Just x -> do m <- takeMVar t
557 putMVar t
558 (Data.Map.Strict.insertWith
559 (\ new old ->
560 old{progress = progress new, lastStepTime = realToFrac tt})
561 n
562 progressInfoZero{progress = x}
563 m)
564 _ -> return ()
565 m <- readMVar t
566 case Data.Map.Strict.lookup n m of
567 Just yy | (progress yy) == 1.0 ->
568 do _ <- takeMVar t
569 putMVar t (Data.Map.Strict.insert n yy{filename = Just s} m)
570 Just yy | (failed yy) -> return ()
571 _ -> progressDriver n o t s
572
573 progressInfoZero :: ProgressInfo
574 progressInfoZero
575 = ProgressInfo{progress = 0.0, filename = Nothing, startTime = 0.0,
576 failed = False, barValue = 0.0, lastStepTime = 0.0,
577 lastRuntime = 0.0, extension = ""}
578
579 forkProgressDriver ::
580 Handle -> MVar (Map Int ProgressInfo) -> String -> String -> IO Int
360581 forkProgressDriver o t s ext
361 = do m<-takeMVar t
362 tt <- getPOSIXTime
363 let mm = case (keys m) of
364 (x:xs)-> Data.List.maximum (x:xs)
365 _ -> 0
366 putMVar t (Data.Map.Strict.insert (mm+1) ProgressInfo {progress=0.0,filename=Nothing,startTime=realToFrac tt,barValue=0.0,lastStepTime=0.0,lastRuntime=0.0,extension=ext,failed=False} m)
367 _<-forkIO (progressDriver (mm+1) o t s)
368 return (mm+1)
369
582 = do m <- takeMVar t
583 tt <- getPOSIXTime
584 let mm
585 = case (keys m) of
586 (x : xs) -> Data.List.maximum (x : xs)
587 _ -> 0
588 putMVar t
589 (Data.Map.Strict.insert (mm + 1)
590 ProgressInfo{progress = 0.0, filename = Nothing,
591 startTime = realToFrac tt, barValue = 0.0, lastStepTime = 0.0,
592 lastRuntime = 0.0, extension = ext, failed = False}
593 m)
594 _ <- forkIO (progressDriver (mm + 1) o t s)
595 return (mm + 1)
1717 import Licenses
1818 import Tools
1919 import ImperativeState
20
20
2121 makeUrl2 :: String -> String -> [Char]
2222 makeUrl2 theLemma theHost
2323 = (unify . exportURL)
2424 (URL{url_path = "w/index.php",
2525 url_params =
26 [("title", (replace2 theLemma "%" "%25")), ("offset", ""), ("limit", "500000"),
27 ("action", "history")],
26 [("title", (replace2 theLemma "%" "%25")), ("offset", ""),
27 ("limit", "500000"), ("action", "history")],
2828 url_type =
2929 Absolute
3030 (Host{protocol = HTTP True, host = theHost, port = Nothing})})
31
31
3232 makeUrl4 :: String -> [Char]
3333 makeUrl4 uuu
3434 = fromMaybe uuu
3838 (unify . exportURL)
3939 (URL{url_path = (url_path uu),
4040 url_params =
41 [("title", (replace2 ti "%" "%25")), ("offset", ""), ("limit", "500000"),
42 ("action", "history")],
41 [("title", (replace2 ti "%" "%25")), ("offset", ""),
42 ("limit", "500000"), ("action", "history")],
4343 url_type = url_type uu}))
44
44
4545 makeUrl3 :: String -> String -> [Char]
4646 makeUrl3 theLemma theHost
4747 = (unify . exportURL)
4949 url_type =
5050 Absolute
5151 (Host{protocol = HTTP True, host = theHost, port = Nothing})})
52
52
5353 deepGet2 :: [Char] -> [Anything a] -> [Anything a]
5454 deepGet2 tag ll = concat $ map go ll
5555 where go (Environment Tag (TagAttr t m) l)
5757 [Environment Tag (TagAttr tag m) l] ++ (deepGet2 tag l)
5858 go (Environment _ _ l) = (deepGet2 tag l)
5959 go _ = []
60
60
6161 getLicense :: [Anything Char] -> Maybe [Char]
6262 getLicense l = (go l)
63 where
64 go :: [Anything Char] -> Maybe String
63 where go :: [Anything Char] -> Maybe String
6564 go ll = msum (map (dg ll) licenses)
6665 dg ll (x, c)
6766 = case deepGet "a" "href" x ll of
6867 (_ : _) -> Just c
6968 _ -> Nothing
70
69
7170 getAuthor :: [Anything Char] -> Maybe [Anything Char]
7271 getAuthor x = listToMaybe (concat (map go (deepGet2 "tr" x)))
7372 where go (Environment _ _ l)
7877 _ -> []
7978 _ -> []
8079 go _ = []
81
80
8281 simpleContributors ::
8382 [Char] ->
84 [Char] -> Maybe URL -> ImperativeState-> IO [(String, String, Int, Maybe String)]
83 [Char] ->
84 Maybe URL ->
85 ImperativeState -> IO [(String, String, Int, Maybe String)]
8586 simpleContributors theLemma theHost uu st
8687 = do let theUrl3
8788 = case uu of
115116 let y = decodeString yy
116117 let x = decodeString xx
117118 let dd
118 = ((deepGet "a" "class" "new mw-userlink" (parseit minparsers x))++(deepGet "a" "class" "mw-userlink" (parseit minparsers x))) ::
119 [Anything Char]
119 = ((deepGet "a" "class" "new mw-userlink" (parseit minparsers x))
120 ++ (deepGet "a" "class" "mw-userlink" (parseit minparsers x)))
121 :: [Anything Char]
120122 let ll = (filter pre (map go dd))
121123 let n = (nub ll) :: [(String, String)]
122124 let out = map go2 (zip (map (count ll) n) n)
123125 let ht = (parseit htmlminparsers y)
124126 case (getAuthor ht) of
125127 Just zz -> return
126 [(fst (treeToLaTeX3 zz initialState{urld = analyseNetloc (hostname . fullUrl $ st)}), "", 1 :: Int, getLicense ht)]
128 [(fst
129 (treeToLaTeX3 zz
130 initialState{urld = analyseNetloc (hostname . fullUrl $ st)}),
131 "", 1 :: Int, getLicense ht)]
127132 _ -> return out
128
129 where
130 go :: Anything Char -> (String, String)
133 where go :: Anything Char -> (String, String)
131134 go (Environment Tag (TagAttr _ m) l)
132135 = ((shallowFlatten (deepFlatten l)), findWithDefault "" "href" m)
133136 go _ = ("", "")
134137 go2 (c, (a, h)) = (a, h, c, Nothing)
135
138
136139 count :: (Eq a) => [a] -> a -> Int
137140 count l s = length (filter (== s) l)
138
141
139142 pre :: (String, String) -> Bool
140143 pre s
141144 = case (runParser ipaddr () "" (fst s)) of
142145 Right _ -> False
143146 Left _ -> True
144
147
145148 intdigit :: Parser Int
146149 intdigit
147150 = do a <- digit
148151 case reads [a] of
149152 [(i, [])] -> return i
150153 _ -> pzero
151
154
152155 ipnum3 :: ParsecT String () Identity Int
153156 ipnum3
154157 = do a <- intdigit
155158 b <- intdigit
156159 c <- intdigit
157160 return (a * 100 + b * 10 + c)
158
161
159162 ipnum2 :: ParsecT String () Identity Int
160163 ipnum2
161164 = do a <- intdigit
162165 b <- intdigit
163166 return (a * 10 + b)
164
167
165168 ipnum1 :: Parser Int
166169 ipnum1 = do intdigit
167
170
168171 ipnum :: ParsecT [Char] () Identity ()
169172 ipnum
170173 = do n <- (try (ipnum3)) <|> (try (ipnum2)) <|> ipnum1
171174 if ((n <= 255) && (n >= 0)) then return () else pzero
172
175
173176 ipaddr ::
174177 Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity
175178 ()
176179 ipaddr = try (ipv4addr) <|> ipv6addr
177
180
178181 ipv4addr ::
179182 Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity
180183 ()
187190 _ <- char '.'
188191 _ <- ipnum
189192 return ()
190
193
191194 ipv6num ::
192195 Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity
193196 ()
198201 _ <- try (hexDigit) <|> return '0'
199202 _ <- try (hexDigit) <|> return '0'
200203 return ()
201
204
202205 ipv6addr ::
203206 Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity
204207 ()
7979 extract pathname
8080 = do _ <- createDirectories pathname
8181 writeFiles (pathname ++ "/document/headers/") headerFiles
82
88 import Data.Maybe
99 import Control.Monad
1010 import MyState
11
11
1212 {-DHUN| the width of a columns as float wrapped in a Just value of the maybe monad if it could be determined. Return the value Nothing of the maybe monad otherwise. The only parameter is of part of the parse tree that describe the opening part of the table cell element. DHUN-}
13
13
1414 widthOfColumn :: [Anything Char] -> Maybe Float
1515 widthOfColumn = msum . (map f)
1616 where f (Environment Attribute (Attr (k, v)) _)
2020 guard ('%' `elem` v)
2121 return (1.0e-2 * z)
2222 f _ = Nothing
23
23
2424 columnMultiplicityForSimple :: [Anything Char] -> Int
2525 columnMultiplicityForSimple x
2626 = case columnMultiplicity x of
2727 Just a -> a
2828 _ -> -1
29
29
3030 raggedArrayOfWidths ::
3131 [Anything Char] -> [Maybe Float] -> [[Maybe Float]]
3232 raggedArrayOfWidths ((Environment TableRowSep _ _) : xs) temp
4646 (replicate ((columnMultiplicityForSimple x) - 1) [Nothing]))))
4747 raggedArrayOfWidths (_ : xs) temp = (raggedArrayOfWidths xs temp)
4848 raggedArrayOfWidths [] temp = [temp]
49
49
5050 numberOfColumns :: [Anything Char] -> Int
5151 numberOfColumns a
5252 = (maximum ([length x | x <- (raggedArrayOfWidths a [])]))
53
53
5454 initialListofWidths :: [Anything Char] -> [Maybe Float]
5555 initialListofWidths x = replicate (numberOfColumns x) Nothing
56
56
5757 listMax :: [Maybe Float] -> [Maybe Float] -> [Maybe Float]
5858 listMax (Just x : xs) (Just y : ys)
5959 = Just (max x y) : listMax xs ys
6161 listMax [] (y : ys) = y : listMax [] ys
6262 listMax (x : xs) [] = x : listMax xs []
6363 listMax [] [] = []
64
64
6565 preliminaryWidths ::
6666 [[Maybe Float]] -> [Maybe Float] -> [Maybe Float]
6767 preliminaryWidths l k = foldl (listMax) k l
68
68
6969 standardColumnWitdh :: [Anything Char] -> Maybe Float
7070 standardColumnWitdh a
7171 = if columns > columnsWithDefinedWidth then
7878 columns = numberOfColumns a
7979 columnsWithDefinedWidth = length (filter isJust l)
8080 sumOfDefinedWidths = sum (map fromJust (filter isJust l))
81
81
8282 rawWidths :: [Anything Char] -> [Maybe Float]
8383 rawWidths a
8484 = (preliminaryWidths (raggedArrayOfWidths a [])
8585 (initialListofWidths a))
86
86
8787 {-DHUN fallback function for the width of columns if the precompilation procedure for the width of columns is not available yet. So particularly when the precompilation is just running. Takes the parse tree representation of the table as first input parameter. Returns the list of widths of the columns. I am not documenting its subfunctions since the final width of the columns don't have anything to do with the width calculated here, still these preliminary widths are needed for the precompilation procedure DHUN-}
88
88
8989 columnWidths :: [Anything Char] -> [Float]
9090 columnWidths a = w
9191 where l = rawWidths a
9595 w = fromMaybe (concat (replicate m [f / mf])) $
9696 do ww <- standardColumnWitdh a
9797 return [x * f | x <- map (fromMaybe ww) l]
98
98
9999 {-DHUN| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor DHUN-}
100
100
101101 scalefactor :: (Fractional a, Ord a) => a -> a
102102 scalefactor n | n <= 10 = 12.8 * (n) / 448.0
103103 scalefactor _ = 12.8 * (11.0) / 448.0
104
104
105105 {-DHUN| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor DHUN-}
106
106
107107 tableScale :: Int -> Float
108108 tableScale nColumns = (1.0 / n) * (1.0 - (scalefactor n))
109109 where n = fromIntegral nColumns
110
110
111111 {-DHUN| returns the latex environment name for a table environments. It the float passes as first parameter it 1.0 the result is longtable, otherwise it is tabular. This function is usually called with the width of the current cell in units of the line width as first parameter. Outside any table this value is 1.0 inside a table it is always smaller than 1.0. So this function will return tabular in case of a nested table, which is necessary since longtables can not be nested inside other longtables, but tabulars can be nested within longtables as well as tabulars. DHUN-}
112
112
113113 tableEnvironment :: Float -> String
114114 tableEnvironment 1.0 = "longtable"
115115 tableEnvironment _ = "tabular"
116
116
117117 innerTableSpecifier :: [Float] -> String -> String
118118 innerTableSpecifier (f : xs) t
119119 = ">{\\RaggedRight}p{" ++
120120 (printf "%0.5f" f) ++
121121 "\\linewidth}" ++ t ++ (innerTableSpecifier xs t)
122122 innerTableSpecifier [] _ = []
123
123
124124 {-DHUN| Returns the table header which represents the width of the columns of a table in units of the line width. It takes a list of width as second parameter. It is understood that necessary correction for the width to compensate for the space needed by separations of columns have already been applied. The is the first boolean parameter is true rules will be drawn in the table, otherwise they won't. See also documentation of the wdth3 function in the module LatexRenderer. DHUN-}
125
125
126126 tableSpecifier :: Bool -> [Float] -> String
127127 tableSpecifier True f = '|' : (innerTableSpecifier f "|")
128128 tableSpecifier False f = (innerTableSpecifier f "")
129
129
130130 {-DHUN| Takes the multirowmap as first input parameter. See documentation on the function multiRowDictChangeStart in this module for details on the multirowmap. It returns true if there are currently no multirow cells active in the given multirowdict DHUN-}
131
131
132132 myempty :: Map Int (Int, Int) -> Bool
133133 myempty d = [x | x <- Map.toList d, (fst (snd x)) /= 0] == []
134
134
135135 {-DHUN| takes the string found in the header symbol of a table or the opening tag of the html table tag. That is the place where the attributes are, but only understood as string so without parsing the attributes as map, and returns a boolean. It this boolean is true the rules of the table need to be drawn, otherwise they must not be drawn DHUN-}
136
136
137137 seperatingLinesRequested :: String -> Bool
138138 seperatingLinesRequested s
139139 = (isInfixOf2 "Prettytable" (map toLower s)) ||
140140 (isInfixOf2 "prettytable" (map toLower s)) ||
141141 (isInfixOf2 "wikitable" (map toLower s))
142
142
143143 {-DHUN| returns the latex symbol for a horizontal line on the last row of a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table DHUN-}
144
144
145145 rowDelimiter :: Bool -> String
146146 rowDelimiter True = "\\\\ \\hline"
147147 rowDelimiter False = ""
148
148
149149 {-DHUN| returns the latex symbol for a horizontal line in a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table DHUN-}
150
150
151151 horizontalLine :: Bool -> String
152152 horizontalLine True = " \\hline"
153153 horizontalLine False = ""
154
154
155155 {-DHUN| return the latex symbol for a partly drawn inner horizontal line in a table, that is a horizontal rule. It has to be drawn only partly since multirow cells intersect with it. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table. The first parameter is a and index that is incremented during the course of this function and has to be 1 when called this function from outside DHUN-}
156
156
157157 makeCLines :: Int -> Map Int (Int, Int) -> Int -> [Char]
158158 makeCLines m d t
159159 = if m <= t then
165165 where def
166166 = "\\cline{" ++
167167 (show m) ++ "-" ++ (show m) ++ "}" ++ (makeCLines (m + 1) d t)
168
168
169169 {-DHUN| return the latex symbol for an inner horizontal line in a table, that is a horizontal rule. If the first boolean parameter is true the rule is drawn otherwise it is not. If multirow cells interfere with this rule the rule is only drawn in parts as required. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table DHUN-}
170
170
171171 innerHorizontalLine :: Bool -> Map Int (Int, Int) -> Int -> String
172172 innerHorizontalLine b d m
173173 = if b then
174174 if myempty d then horizontalLine True else ' ' : makeCLines 1 d m
175175 else ""
176
176
177177 {-DHUN| the symbol in latex for separating columns. It is returned if the fist boolean parameter is true otherwise the empty string is returned. This function is usually called with the first parameter being true if the current column was not the first column of a row since the symbol is not needed before the start of the first column of a row. This is a contrast to html where the first cell of a row has its own td or th tag. The mediawiki markup notation is similar to html in this respect. DHUN-}
178
178
179179 columnSeperator :: Bool -> String
180180 columnSeperator True = "&"
181181 columnSeperator False = ""
182
182
183183 {-DHUN| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. If the key is present in the map, and the value found in the map at that key can be parsed as an Integer that integer is returned. If no value for the key could be found in the map or it could not be parsed as an integer the value Nothing of the Maybe monad is returned. DHUN-}
184
184
185185 genMultiplicity :: String -> [Anything Char] -> Maybe Int
186186 genMultiplicity s = msum . (map f)
187187 where f (Environment Attribute (Attr (k, v)) _)
190190 guard (k == s)
191191 return z
192192 f _ = Nothing
193
193
194194 {-DHUN| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. It returns a result of the lookup of the key in the map (so the value). as string wrapped into the Maybe monad. If no value for the key could be found in the map it returns the value Nothing of the Maybe monad. DHUN-}
195
195
196196 genLookup :: String -> [Anything Char] -> Maybe String
197197 genLookup s = msum . (map f)
198198 where f (Environment Attribute (Attr (k, v)) _)
201201 guard (k == s)
202202 return v
203203 f _ = Nothing
204
204
205205 {-DHUN| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute colspan is not present (or did not have a value parseable as Integer) within the first parameter DHUN-}
206
206
207207 columnMultiplicity :: [Anything Char] -> Maybe Int
208208 columnMultiplicity = genMultiplicity "colspan"
209
209
210210 {-DHUN| the row multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute rowspan is not present (or did not have a value parseable as Integer) within the first parameter DHUN-}
211
211
212212 rowMultiplicity :: [Anything Char] -> Maybe Int
213213 rowMultiplicity = genMultiplicity "rowspan"
214
214
215215 {-DHUN| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements.The result is an integer that default to zero DHUN-}
216
216
217217 columnMultiplicityForCounting :: [Anything Char] -> Int
218218 columnMultiplicityForCounting = (fromMaybe 1) . columnMultiplicity
219
219
220220 {-DHUN| return the symbol for the start of a multicolumn cell in latex. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. It takes the list of the final widths of all columns of the table as second parameter. It takes to the column index of the current column as third parameter. The fourth parameter is a boolean if it is true rules will be drawn in the table otherwise they won't. The fifth parameter is the table state. That is the mutable state that exists during rendering of a table. DHUN-}
221
221
222222 multiColumnStartSymbol ::
223223 [Anything Char] -> [Float] -> Int -> Bool -> TableState -> String
224224 multiColumnStartSymbol l f i t st
231231 _ -> "l"
232232 mylist nn
233233 = [min 1.0
234 (((1.0 - (scalefactor 1)) * (sum (take nn (drop (i - 1) f))))
235 / (1.0 - (scalefactor (fromIntegral nn))))]
236
234 (((1.0 - (scalefactor 1)) * (sum (take nn (drop (i - 1) f)))) /
235 (1.0 - (scalefactor (fromIntegral nn))))]
236
237237 {-DHUN| return the symbol for the end of a multicolumn cell in latex. The first boolean parameter tells if the cell is actually a multicolumn cell. If it is false the empty string is returned instead DHUN-}
238
238
239239 multiColumnEndSymbol :: Bool -> String
240240 multiColumnEndSymbol True = "}"
241241 multiColumnEndSymbol False = ""
242
242
243243 {-DHUN| return the symbol for the end of a multirow cell in latex. The first boolean parameter tells if the cell is actually a multirow cell. If it is false the empty string is returned instead DHUN-}
244
244
245245 multiRowEndSymbol :: Bool -> String
246246 multiRowEndSymbol True = "}"
247247 multiRowEndSymbol False = ""
248
248
249249 {-DHUN| This function takes a default value as first parameter. It takes a predicate as second parameter. It take a map from Int to a two tuple of Int as firth parameter. It takes a key for that map as fourth parameter. It takes a function mapping a two tuple of Int to the same type as the default value as third parameter. It tries to find a two tuple of Ints (that is value) in the map under the given key. If it finds one and the predicate returns true on the first element of that two tuple it returns the result of the function on the two tuple. In any other case it returns the default value DHUN-}
250
250
251251 withDefault ::
252252 t ->
253253 (Int -> Bool) ->
257257 do (a, b) <- Map.lookup i d
258258 guard $ p a
259259 return $ f a b
260
260
261261 {-DHUN| this function return the vertical separator for column for the table header in latex. The only parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-}
262
262
263263 verticalSeperator :: Bool -> [Char]
264264 verticalSeperator True = "|"
265265 verticalSeperator False = ""
266
266
267267 {-DHUN| the function returns the a string to be inserted into a latex document for multirows at a when a new column (that is a new cell) starts, thats when a column separator, or header column separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-}
268
268
269269 multiRowSymbol :: Int -> Map Int (Int, Int) -> Bool -> String
270270 multiRowSymbol i d t
271271 = withDefault "" (> 0)
278278 (verticalSeperator t) ++ "}{}&" ++ (multiRowSymbol (i + b) d t))
279279 i
280280 d
281
281
282282 {-DHUN| the function returns the a string to be inserted into a latex document for multirows at a when a new row starts, thats when a row separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-}
283
283
284284 multiRowSymbolForRowSep ::
285285 Int -> Map Int (Int, Int) -> Bool -> String
286286 multiRowSymbolForRowSep i d t
295295 "}{}" ++ (multiRowSymbolForRowSep (i + b) d) t)
296296 i
297297 d
298
298
299299 {-DHUN| the function returns the a string to be inserted into a latex document for multirows at the end of the table. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-}
300
300
301301 multiRowSymbolForTableEnd ::
302302 Int -> Map Int (Int, Int) -> Bool -> String
303303 multiRowSymbolForTableEnd i d t
312312 "}{}" ++ (multiRowSymbolForTableEnd (i + 1) d t))
313313 i
314314 d
315
315
316316 {-DHUN| in case of a multirow, that cell has to be skipped further down. So if I got a multirow in row 1 column 2 with a rowspan of 2 or more I need to expand row 2 column 1 by 1 . So if I passed row 2 column one I am not in row 2 column 2 since that is where the multirow cell resides, I am rather in row 2 cell 3. And if there are more multicolumns involved I am more possible even further right. So this function just tell me how many cells I have to skip. The first parameter is the index of the current column. The second parameter is the multirowdict. See also documentation on the function multiRowDictChangeStart in this module DHUN-}
317
317
318318 multiRowCount :: Int -> Map Int (Int, Int) -> Int
319319 multiRowCount i d
320320 = withDefault 0 (/= 0) (\ _ b -> b + (multiRowCount (i + 1) d)) i d
321
321
322322 {-DHUN| see documentation on multiRowDictChangeStart. This function take the index of the current column as first parameter. This function takes the multiRowDict as first parameter and returns the modified version of it. DHUN-}
323
323
324324 multiRowDictChangeEnd ::
325325 Int -> Map Int (Int, Int) -> Map Int (Int, Int)
326326 multiRowDictChangeEnd i d
329329 multiRowDictChangeEnd (i + 1) (Map.insert i (a - 1, b) d))
330330 i
331331 d
332
332
333333 {-DHUN| The multiRowDict is a facility for keeping track of cells spanning multiple rows. It is stored as mutable state in the type TableState in the parameter multiRowMap. It is passed to this function as second parameter. This function return an updated version of it. It is a map mapping and Int to a tuple whose both elements are also ints. It the key is the column index and the value is a pair (rowmultiplicity, columnmultiplicity). The rowmultiplicity is the number of rows the cell spans. This number is decrease every time a row ends. So it actually says how many rows the columns spans further down from the current column. The column multiplicity is the number of columns the cell spans. This function take the index of the current column as first parameter. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function calculates only the changes in the multirowdict for the opening environment of the cell. You should not use this function but rather use multiRowDictChange since this also considers the effect by ending of cells DHUN-}
334
334
335335 multiRowDictChangeStart ::
336336 Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int)
337337 multiRowDictChangeStart i d l
339339 do n <- rowMultiplicity l
340340 return (Map.insert i ((n - 1), c) d)
341341 where c = (columnMultiplicityForCounting l)
342
342
343343 {-DHUN| calculate the full change to the multirowdict. See documentation on the function multiRowDictChangeStart in this module for more information on the multirowdict. The first parameter is the index of the current column. The second parameter is the current multirowdict. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function returns the updated multirowdict. DHUN-}
344
344
345345 multiRowDictChange ::
346346 Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int)
347347 multiRowDictChange i d l
348348 = multiRowDictChangeStart n (multiRowDictChangeEnd i d) l
349349 where n = i + (multiRowCount i d)
350
350
351351 {-DHUN| returns the latex symbol for the start of a multirow cell. That is a cell spanning multiple rows. The second parameter is activeColumn. This is an integer wrapped in the maybe monad. If it is has the value Just then the row is to be renderer in a special mode. This mode is needed to determine the width of the columns. In this special mode no line break occur and width of the paper is infinite, so that the width of the of each column is its respective natural width. So there is no limit on the width of the column. If the value is Nothing this means that the table is typeset in normal mode and the width is to be limited. The first parameter is the inner parse result of the row separator containing information on whether or not the cell is multirow DHUN-}
352
352
353353 multiRowStartSymbol :: [Anything Char] -> Maybe Int -> String
354354 multiRowStartSymbol l m
355355 = fromMaybe "" $
358358 "\\multirow{" ++
359359 (show n) ++
360360 "}{" ++ (if isJust m then "*" else "\\linewidth") ++ "}{"
361
361
362362 {-DHUN| a symbol to be added at the end of header cell in order to make its content bold. The only boolean parameter is use to indicate whether the cell currently under consideration is a header cell. Otherwise the empty string is returned. DHUN-}
363
363
364364 headendsym :: Bool -> String
365365 headendsym False = ""
366366 headendsym True = "}"
367
367
368368 {-DHUN| a symbol to be added at the start of header cell in order to make its content bold DHUN-}
369
369
370370 headstartsym :: String
371371 headstartsym = "{\\bfseries "
372
372
373373 {-DHUN| the symbol to be inserted into the latex document, for the end of a row in a table. The first boolean parameter is should be true if the current table is not nested in an other one. The second boolean parameter should be true if the column was the last column of the header of the table. The header of the table is repeated by latex each time page wrapping inside the table occurs. See also documentation of the longtable latex package DHUN-}
374
374
375375 rowendsymb :: Bool -> Bool -> String
376376 rowendsymb True True = "\\endhead "
377377 rowendsymb _ _ = "\\\\"
77 import System.IO.Strict
88 import Data.Time.Clock.POSIX
99
10 pad :: Int -> String -> String
11 pad n s = if length s < n then pad n (' ':s) else s
10 {-DHUN| Pads a string with spaces. Adds space charactes to the beginning of a string until it has got a desired length. The first paramter is the desired length. The second paramter is the original string. The return value is the padded string. DHUN-}
1211
1312
13 pad :: Int -> String -> String
14 pad n s = if length s < n then pad n (' ' : s) else s
15
16 {-DHUN| creates a list of line numbers as padded strings. The first parameter is the length of length that each linenumber (a string) should have. The second paramter is the minimum linenumber to start with (inclusive). The third parameter is the maximum linenumber to end with (inclusive). The return value is the list of linenumbers as strings. DHUN-}
17
1418 linenumbers :: Int -> Int -> Int -> [String]
15 linenumbers n mini maxi = if mini == maxi then [pad n (show mini)] else (pad n (show mini)):(linenumbers n (mini+1) maxi)
19 linenumbers n mini maxi
20 = if mini == maxi then [pad n (show mini)] else
21 (pad n (show mini)) : (linenumbers n (mini + 1) maxi)
22
23 {-DHUN| Prints out a given message together with the current unix timestamp. The first parameter is the message to be printed DHUN-}
1624
1725 myprint :: String -> IO ()
1826 myprint s
2129 hFlush stdout
2230
2331 {-DHUN| Write a unicode string to a utf8 encoded file. The first parameter is the filename, the second the contend to be written to the file. DHUN-}
24
32
2533 writeFile :: FilePath -> String -> IO ()
2634 writeFile f s
2735 = do h <- openFile f WriteMode
2836 hSetEncoding h utf8
2937 hPutStr h s
3038 hClose h
31
39
3240 {-DHUN| read a utf8 encoded file fully as a unicode string. The first parameter is the filename. The return value is the content of the file wrapped in the IO monad. DHUN-}
33
41
3442 readFile :: FilePath -> IO String
3543 readFile f
3644 = do h <- openFile f ReadMode
3745 hSetEncoding h utf8
3846 z <- System.IO.Strict.hGetContents h
3947 return z
40
48
4149 {-DHUN| If the list is not empty it returns the list without the last item, otherwise the empty list- DHUN-}
42
50
4351 nullinit :: [a] -> [a]
4452 nullinit l = if null l then [] else init l
45
53
4654 {-DHUN| Returns the list with the first element stripped wrapped in a Just of the Maybe monad. If the list is empty returns the value Nothing of the maybe monad. DHUN-}
47
55
4856 maybeTail :: [a] -> Maybe [a]
4957 maybeTail [] = Nothing
5058 maybeTail (_ : xs) = Just xs
51
59
5260 {-DHUN| Returns the first element of a list wrapped in a Just of the Maybe monad. If the list is empty returns the value Nothing of the Maybe monad, DHUN-}
53
61
5462 maybeHead :: [a] -> Maybe a
5563 maybeHead [] = Nothing
5664 maybeHead (x : _) = Just x
57
65
5866 {-DHUN| The first parameter is and item. the second parameter is a list. If the list contains the item it returns the list up to the first occurrence of the item with the item itself excluded, otherwise it returns the empty list. DHUN-}
59
67
6068 headSplitEq :: (Eq a) => a -> [a] -> [a]
6169 headSplitEq c s
6270 = case splitOn [c] s of
6371 g : _ -> g
6472 [] -> []
65
73
6674 {-DHUN| Removes all white space characters trailing on the right hand side of a string DHUN-}
67
75
6876 rtrim :: String -> String
6977 rtrim = reverse . (dropWhile isSpace) . reverse
70
78
7179 {-DHUN| The first parameter is an original item. The second parameter is a replacement item. The third parameter is a list. This function replaces all occurrences of the original item, with the replacement item in the list. DHUN-}
72
80
7381 replace :: (Eq a) => a -> a -> [a] -> [a]
7482 replace src target = map (\ x -> if x == src then target else x)
75
83
7684 {-DHUN| The first parameter is an input list. The second parameter is an original list that may or may not or may several times be part of the input list. The third parameter is an a replacement list. This function replaces all occurrences of the original list in the input list, with the replacement list. DHUN-}
77
85
7886 replace2 :: (Eq a) => [a] -> [a] -> [a] -> [a]
7987 replace2 hay needle nail
8088 | needle `isPrefixOf` hay =
8290 replace2 (x : xs) needle nail
8391 | otherwise = x : (replace2 xs needle nail)
8492 replace2 [] _ _ = []
85
93
8694 {-DHUN| The first parameter is an input list. The second parameter is also a list, that might be contained in the input list. If this is the case this function returns true. Otherwise this function returns false DHUN-}
87
95
8896 isInfixOf2 :: (Eq a) => [a] -> [a] -> Bool
8997 isInfixOf2 needle haystack
9098 = any (needle `isPrefixOf`) (tails haystack)
91
99
92100 {-DHUN| Converts a single character in hex notation to an integer. The integer is wrapped in a mMybe monad. If an integer could be found it is wrapped in a Just of the Maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-}
93
101
94102 unhexChar :: Char -> Maybe Integer
95103 unhexChar c = lookup c hexTable
96104 where hexTable
97105 = zip ['0' .. '9'] [0 .. 9] ++
98106 zip ['a' .. 'f'] [10 .. 15] ++ zip ['A' .. 'F'] [10 .. 15]
99
107
100108 {-DHUN| Converts a sequence of characters in hex notation to an integer. The integer is wrapped in a Maybe monad. If an integer could be found it is wrapped in a Just of the Maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-}
101
109
102110 unhex :: String -> Maybe Integer
103111 unhex = foldM f 0
104112 where f acc ch
0 {-# LANGUAGE StandaloneDeriving #-}
01 {-DHUN| module for processing urls and downloading their content with repect to mediawiki DHUN-}
1 {-# LANGUAGE StandaloneDeriving #-}
22 module UrlAnalyse
33 (getpage, analyse, analyseFull, unify, WikiUrl, getLemma,
44 FullWikiUrl, hostname, url, alternatives, lemma, wikiUrl, geturl,
2828 deriving instance Read URL.Protocol
2929 deriving instance Read URL.URLType
3030
31 {-DHUN| This represents the main url of a wiki page. So the url of the wiki page that should be converted to latex. It is a tuple wrapped into the maybe monad, to deal with case in which the url could not be parses. The first element of the tuple is just the main url parser with Network.URL The second element is a list of urls. These URLs are possible base urls for wiki pages. So en.wikipedia.org/wiki/Foobar has got the main url en.wikipedia.org/wiki/Foobar and one of the base urls us en.wikipedia.org/wiki/. Base urls are important since the wiki source related to the main url might include subpage in wiki notation that is [[JohnDow]]. The actual url to look up JohnDow is the baseurl plus JohnDow so that is en.wikipedia.org/wiki/JohnDow Since also images in the commons and similar things are possible, the are usually some baseurls to be looked at. This way the base URl have to be a list. DHUN-}
31 {-DHUN| This represents the main url of a wiki page. So the url of the wiki page that should be converted to latex. It is a tuple wrapped into the maybe monad, to deal with case in which the url could not be parsed. The first element of the tuple is just the main url parsed with Network.URL The second element is a list of urls. These URLs are possible base urls for wiki pages. So en.wikipedia.org/wiki/Foobar has got the main url en.wikipedia.org/wiki/Foobar and one of the base urls us en.wikipedia.org/wiki/. Base urls are important since the wiki source related to the main url might include subpage in wiki notation that is [[JohnDow]]. The actual url to look up JohnDow is the baseurl plus JohnDow so that is en.wikipedia.org/wiki/JohnDow Since also images in the commons and similar things are possible, the are usually some baseurls to be looked at. This way the base URLs have to be a list. DHUN-}
3232
3333 type WikiUrl = Maybe (URL, [URL])
3434
3737 wikiUrl :: FullWikiUrl -> WikiUrl
3838 wikiUrl fu = return (url fu, alternatives fu)
3939
40 {-DHUN| A type describing a reference to an article on a MediaWiki server. The entry url is the url under which the article is located. The entry alternatives is a list of baseurls of the wiki. See documentation on type WikiUrl for more information about baseurls. The entry hostname contains the hostname of the server. The entry lemma contains the lemma (that is the name of the article on the wiki) DHUN-}
41
42
4043 data FullWikiUrl = FullWikiUrl{url :: URL, alternatives :: [URL],
4144 hostname :: String, lemma :: String}
4245 deriving (Eq, Ord)
4346
47
48 {-DHUN| base instance of type FullWikiUrl, to be filled with useful data using the record syntax DHUN-}
49
4450 fullWikiUrlZero :: FullWikiUrl
4551 fullWikiUrlZero
4652 = FullWikiUrl{url =
6773 "Special:Export"}
6874 where p = (url_path u)
6975
76
77 {-DHUN| modify an URL to point to the special page on the wiki to expand the templates useful for command line option -m DHUN-}
78
79
7080 modpathForExpansion :: URL -> URL
7181 modpathForExpansion u
7282 = u{url_path =
97107 statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())}))
98108
99109
110 {-DHUN| Loads the data stored under an URL from the web. Result will be a ByteString. Mainly useful for loading HTML for further processing, as well as binary image files. DHUN-}
111
112
100113 geturl2 :: String -> IO BStr.ByteString
101114 geturl2 u
102115 = if u=="" then return (BStr.pack []) else Control.Exception.catch
115128 fun _ = return (BStr.pack [])
116129 statusExceptionHandler :: SomeException -> IO (Network.HTTP.Conduit.Response L.ByteString)
117130 statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())}))
131
132 {-DHUN| loads the wiki sourcecode strored under a lemma in on a server running mediawiki. The first parameter is the lemma to look up. The second parameter is the URL to the special:export page on the server. The return value is the source wikitext DHUN-}
133
118134
119135 geturl4 :: String -> String -> IO String
120136 geturl4 s u
137153 statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())}))
138154
139155
140
156 {-DHUN| loads the wikisource of a wiki article from a server running mediawiki, with all mediawiki templates expanded into wiki text. The first parameter is the url to special:expand templates page on the server. The second parameter is the wikitext source including the mediawiki templates to be expanded. The third parameter is the name of the lemma on the server. DHUN-}
141157
142158
143159 geturl3 :: String -> String -> String -> IO String
183199 getTextContent2 z
184200 = catchJust myfun (getTextContent z) (\ _ -> return Nothing)
185201
202 {-DHUN| this function extracts the expanded wiki source of out of the result String returned by the Special:ExpandTemplates function of mediawiki. This function returns its result wrapped in a maybe monad so it can return the maybe value Nothing in case of failure but does not break the flow of control. It is also wrapped in the IO monad since the xml parser used is bound to the IO monad DHUN-}
203
186204 getExpandedTextContent :: String -> IO (Maybe String)
187205 getExpandedTextContent z
188206 = do h <- runX
193211 x <- return . toText $ h
194212 return (seq x x)
195213
214 {-DHUN| this function extracts the expanded wiki source of out of the result String returned by the Special:ExpandTemplates function of mediawiki. This function returns its result wrapped in a maybe monad so it can return the maybe value Nothing in case of failure but does not break the flow of control. It is also wrapped in the IO monad since the xml parser used is bound to the IO monad. Possible IO errors are caught an rethrown as Nothing in the Maybe Monad DHUN-}
215
216
196217 getExpandedTextContent2 :: String -> IO (Maybe String)
197218 getExpandedTextContent2 z
198219 = catchJust myfun (getExpandedTextContent z)
214235 (listToMaybe $ concat (map maybeToList lll)) >>=
215236 (return . decodeString)
216237
217
238 {-DHUN| Loads a page from a wiki when mediawiki2latex is running with command line option --bookmode. The first parameter is the lemma to load from the wiki the second parameter is the WikiUrl to the server hosting the wiki DHUN-}
218239
219240
220241 getBookpage :: String -> WikiUrl -> IO (Maybe String)
226247 (return)
227248 where
228249 go x =if (x==(UTF8Str.fromString [])) then Nothing else Just (UTF8Str.toString x)
250
251
252 {-DHUN| Loads the wikitext of an article form a mediawiki server when mediawiki2latex is running with the --mediawiki option. This function downloads the orignial wikitext source without expanding the templates. This is going to happen later by call to getExpandedPage. The first parmeter is lemma to load. The second paramerter is the WikiUrl to the server hosting the wiki. The return value is a pair. The first element of it is the wikitext source of the article. The second element of it is the URL under which the article was downloaded DHUN-}
253
229254 getpage2 :: String -> WikiUrl -> IO (Maybe (String, URL))
230255 getpage2 ss u
231256 = do l <- mapM ((geturl4 ss) . unify . exportURL . modpath) (parses u)
235260 where go (Just xx, uu) = [(decodeString xx, uu)]
236261 go _ = []
237262
263 {-DHUN| This function expands all templates in a wikitext source using MediaWiki. The first parameter is lemma to be processed. The second parameter is the wikitext source of the article stored under the lemma. The third parameter is url to Special:ExpandTemplates page on the mediawiki server. The return value is the wikitext source with all templates expanded by MediaWiki DHUN-}
264
238265 getExpandedPage :: String -> String -> URL -> IO (Maybe String)
239266 getExpandedPage ss d u
240267 = do l <- mapM
301328 case (url_type u) of
302329 Absolute h -> return (URL.host h)
303330 _ -> mzero
304
331
332 {-DHUN| Parse an URL supplied as string in the first parameter into a FullWikiUrl which is returned. See documentation on the types WikiUrl and FullWikiUrl for more information DHUN-}
333
334
305335 analyseFull :: String -> Maybe FullWikiUrl
306336 analyseFull theUrl
307337 = do ana <- analyse theUrl
4545 l = zip [0..] (map (\x->zip [0..] (map (bf.(strip "\n\r\t ")) ((drop 1) ((splitOn "|") x)))) (take 8 (drop 2 sp)))
4646
4747
48
49
50
51
52
53
54
55
88 import MyState
99 import Data.List
1010 import Data.String.HT (trim)
11
11
1212 {-DHUN| add latex label elements to the parse tree. chapter section subsection and so on will be referenced by these labels. It takes the initial UrlState as second parameter. And the parse tree to be processed as first parameter. It returns a tuple. The first element is them modified urlstate and the second is the modified parse tree with the labels added. The field sUrlState of UrlState contains the page name of the downloaded page currently being processed by the algorithm. the filed mUrlState of UrlState is the number of the currenty label. And ,UrlState is a mapping from combined page and chapter, section etc. names (like urls) to their label numbers. DHUN-}
13
13
1414 makeLables ::
1515 [Anything Char] -> UrlState -> (UrlState, [Anything Char])
1616 makeLables ll states
1717 = let (f, s) = mapAccumL makeLablesForNode states ll in
1818 (f, concat s)
19 where
20 makeLablesForNode ::
19 where makeLablesForNode ::
2120 UrlState -> Anything Char -> (UrlState, [Anything Char])
2221 makeLablesForNode st (Environment DhunUrl ss l)
2322 = (st{iUrlState = (iUrlState st) + 1, sUrlState = yy,
3736 = (fst zz, [Environment e s (snd $ zz)])
3837 where zz = makeLables l st
3938 makeLablesForNode st x = (st, [x])
40
39
4140 {-DHUN| remove superfluous br html tags from the parse tree. Always run before converting the parse tree to latex DHUN-}
42
41
4342 removeBr :: [Anything Char] -> [Anything Char]
4443 removeBr ((C '\n') : ((Environment Tag (TagAttr "br" _) _) : xs))
4544 = (C '\n') : removeBr xs
5857 = (Environment SpaceIndent x (removeBr l)) : removeBr xs
5958 removeBr (x : xs) = x : removeBr xs
6059 removeBr [] = []
61
60
6261 {-DHUN| checks if a given string is an image inclusion in wiki notation. In wiki notation an image is included by [[Image:FooBar.png]], but image may be replace by localized versions like Bild in German, so this function checks for those and return true if it seems to be an image DHUN-}
63
62
6463 isImage :: String -> Bool
6564 isImage x
6665 = ([z | z <- map (++ ":") imgtags, z `isPrefixOf` (map toLower x)]
6766 /= [])
68
67
6968 {-DHUN| flattens a parse tree shallowly. that is take all characters on the surface level of the parse tree and combines them into a single string. It does not decent into substructures of the parse and so neglects all characters there and does not return those with the exception of character in SpaceIndent environments directly attached to the surface level DHUN-}
70
69
7170 shallowFlatten :: [Anything Char] -> String
7271 shallowFlatten ((C a) : xs) = a : (shallowFlatten xs)
72 shallowFlatten ((Environment HtmlChar (Str "amp") a) : xs)
73 = '&' : (shallowFlatten xs)
74 shallowFlatten ((Environment HtmlChar (Str "lt") a) : xs)
75 = '<' : (shallowFlatten xs)
76 shallowFlatten ((Environment HtmlChar (Str "gt") a) : xs)
77 = '>' : (shallowFlatten xs)
7378 shallowFlatten ((Environment SpaceIndent _ l) : xs)
7479 = '\n' : ((shallowFlatten l) ++ (shallowFlatten xs))
7580 shallowFlatten (_ : xs) = shallowFlatten xs
7681 shallowFlatten [] = []
77
82
7883 {-DHUN| A link in wiki notation is given by [foorbar.com a caption]. This function returns the location the link points to so foobar.com as String. It takes the parse tree representation of the link as input parameter DHUN-}
79
84
8085 linkLocation :: [Anything Char] -> String
8186 linkLocation l
8287 = case yy of
8994 [] -> ""
9095 (g : _) -> g)
9196
92
9397 normalizeExtensionHtml :: String -> String
9498 normalizeExtensionHtml ('s' : ('v' : ('g' : _))) = "png"
9599 normalizeExtensionHtml ('j' : ('p' : ('e' : ('g' : _)))) = "jpg"
102106 normalizeExtensionHtml (x : xs) = x : (normalizeExtension xs)
103107 normalizeExtensionHtml [] = []
104108
105
106109 {-DHUN| changes the extension for a filename given in the wiki source to the extension to be used in the latex document. For example gif documents are converted to png documents. So this function converts the string 'gif' to the string 'png' DHUN-}
107
110
108111 normalizeExtension :: String -> String
109112 normalizeExtension ('s' : ('v' : ('g' : _))) = "\\SVGExtension"
110113 normalizeExtension ('j' : ('p' : ('e' : ('g' : _)))) = "jpg"
116119 normalizeExtension (' ' : xs) = normalizeExtension xs
117120 normalizeExtension (x : xs) = x : (normalizeExtension xs)
118121 normalizeExtension [] = []
119
122
120123 {-DHUN| changes the extension for a filename given in the wiki source to the extension to be used in as filename when storing the image in the latex tree. DHUN-}
121
124
122125 normalizeExtension2 :: String -> String
123126 normalizeExtension2 ('s' : ('v' : ('g' : _))) = "svg"
124127 normalizeExtension2 ('j' : ('p' : ('e' : ('g' : _)))) = "jpg"
130133 normalizeExtension2 (' ' : xs) = normalizeExtension xs
131134 normalizeExtension2 (x : xs) = x : (normalizeExtension xs)
132135 normalizeExtension2 [] = []
133
136
134137 {-DHUN| returns the extension of a filename. DHUN-}
135
138
136139 fileNameToExtension :: String -> String
137140 fileNameToExtension s = last (splitOn "." (map toLower s))
138
141
139142 {-DHUN| a predicate that can be run on an element of a parse tree that returns true if the element is a wikilink. A wikilink is denoted as [[Foobar]] in the wiki notation, an links to an other mediawiki page on the same or a different wiki DHUN-}
140
143
141144 isWikiLink :: (Anything Char) -> Bool
142145 isWikiLink (Environment Wikilink _ []) = False
143146 isWikiLink (Environment Wikilink _ _) = True
144147 isWikiLink _ = False
145
148
146149 {-DHUN| changes Math elements on the surface level of a parse tree to bigmath elements. Those will be rendered as equation environments. Normal math is usually only display be the dollar math environment in latex. DHUN-}
147
150
148151 shallowEnlargeMath :: [Anything Char] -> [Anything Char]
149152 shallowEnlargeMath ((Environment Math s l) : xs)
150153 = (Environment BigMath s l) : shallowEnlargeMath xs
151154 shallowEnlargeMath (x : xs) = x : shallowEnlargeMath xs
152155 shallowEnlargeMath [] = []
153
156
154157 {-DHUN| this function modified chapter headings. It is used when converting a chapter heading from the parse tree to latex. In the wiki often the page name is used as chapter heading and thus only the parts after the slashes and colons are to be taken into account. Also underscores have to be replaced by spaces DHUN-}
155
158
156159 chapterTransform :: String -> String
157160 chapterTransform s
158161 = replace '_' ' ' (last (splitOn ":" (last (splitOn "/" s))))
159
162
160163 {-DHUN| returns the separator to separate items in enumeration, itemizations and so on. Currently this is always \\item{} but this may change depending on which latex package is used to display enumerations and so on. Takes the char for this type of enumeration etc. in wiki notation. That is a hash for enumeration and a asterisk for itemization and so on DHUN-}
161
164
162165 itemSeperator :: Char -> String
163166 itemSeperator c = itemSeperator2 [c]
164
167
165168 {-DHUN| see documentation on itemSeperator. The only difference is that this function takes a string containing a single character instead of the single character itself DHUN-}
166
169
167170 itemSeperator2 :: String -> String
168171 itemSeperator2 "#" = "\\item{}"
169172 itemSeperator2 ":" = "\\item{}"
170173 itemSeperator2 ";" = "\\item{}"
171174 itemSeperator2 "*" = "\\item{}"
172175 itemSeperator2 _ = "\\item{}"
173
176
174177 {-DHUN| returns the name of a latex environment for an itemization enumeration etc.. The first parameter is a string in wiki notation and declare which type environment should be used. The second parameter is a float giving the width of the current cell in units of the line width when inside a table and is 1.0 if currently not inside any table. DHUN-}
175
178
176179 itemEnvironmentName :: String -> Float -> String
177180 itemEnvironmentName "#" _ = "myenumerate"
178181 itemEnvironmentName ":" _ = "myquote"
179182 itemEnvironmentName ";" _ = "mydescription"
180183 itemEnvironmentName "*" _ = "myitemize"
181184 itemEnvironmentName _ _ = "list"
182
185
183186 {-DHUN| returns additional parameter for the opening of a latex environment for an itemization enumeration etc. The second parameter is a float giving the width of the current cell in units of the line width when inside a table and is 1.0 if currently not inside any table. DHUN-}
184
187
185188 itemEnvironmentParameters :: String -> Float -> String
186189 itemEnvironmentParameters "#" _ = ""
187190 itemEnvironmentParameters ":" _ = ""
188191 itemEnvironmentParameters ";" _ = ""
189192 itemEnvironmentParameters "*" _ = ""
190193 itemEnvironmentParameters _ _ = "{\\labelitemi}{\\leftmargin=1em}"
191
194
192195 {-DHUN| do multple replacements in a row. The first argument is haystack. The second one a list of pair of a needle and a corresponding nail. The haystack with each needle replaced by a nail is returned DHUN-}
193
196
194197 multireplace :: (Eq a) => [a] -> [([a], [a])] -> [a]
195198 multireplace haystack ((needle, nail) : xs)
196199 = multireplace (replace2 haystack needle nail) xs
197200 multireplace haystack [] = haystack
198
201
199202 {-DHUN| converts a mathematical expression (that is something within a math tag in the wiki) to an latex expression. Essentially the wiki is using latex. But it allows for some extra features that are take care of in this transformation DHUN-}
200
203
201204 mathTransform :: [Anything Char] -> String
202205 mathTransform x
203206 = multireplace (replace '\n' ' ' (shallowFlatten x)) replist
204
207
205208 {-DHUN| list of replacements to be applied to contents of math tags in wiki notation for use in the latex equation environment DHUN-}
206
209
207210 replist :: [([Char], [Char])]
208211 replist
209212 = [("\\or", "\\vee{}"), ("%", "\\%"), ("\\and", "\\wedge{}"),
211214 ("\\end{align}", "\\end{aligned}"), ("\\\\%", "\\%"),
212215 ("\\part ", "\\partial "), ("\\part{", "\\partial{"), ("\\;", ""),
213216 ("\\|", "\\Vert"), ("\\!", ""), ("\\part\\", "\\partial\\")]
214
217
215218 {-DHUN| helper function for line breaking in source code and preformatted block. Not to be called from outside this module. Converts character to parse tree entities, to be process by breakLinesHelper3 DHUN-}
216
219
217220 breakLinesHelper4 :: [Anything Char] -> [Anything Char]
218221 breakLinesHelper4 ((C '\n') : xs)
219222 = (Environment Tag (TagAttr "br" Map.empty) []) :
222225 breakLinesHelper4 ((C ' ') : xs) = Quad : breakLinesHelper4 xs
223226 breakLinesHelper4 (x : xs) = x : breakLinesHelper4 xs
224227 breakLinesHelper4 [] = []
225
228
226229 {-DHUN| the width of a tab character in spaces DHUN-}
227
230
228231 tabwidth :: Int
229232 tabwidth = 4
230
233
231234 {-DHUN| helper function for line breaking in source code and preformatted block. Not to be called from outside this module. Inserts br tags where line breaks are needed, the first parameter is an integer which represents the current column in the text, it should be zero when this function is called externally. The second parameter is an integer an represents the maximum length of the line in characters. The third input parameter is the code block to which the line breaks should be added in parse tree notation. The function returns the code block with added br tags for the line breaks in parse tree notation DHUN-}
232
235
233236 breakLinesHelper3 ::
234237 Int -> Int -> [Anything Char] -> [Anything Char]
235238 breakLinesHelper3 _ m ((Environment Tag (TagAttr "br" y) []) : xs)
257260 = (xx /= Quad) &&
258261 (xx /= (Environment Tag (TagAttr "br" Map.empty) []))
259262 breakLinesHelper3 _ _ [] = []
260
263
261264 {-DHUN| Breaks lines in source code and preformatted block. Inserts br tags where line breaks are needed. The first parameter is an integer an represents the maximum length of the line in characters. The second input parameter is the code block to which the line breaks should be added in parse tree notation. The function returns the code block with added br tags for the line breaks in parse tree notation DHUN-}
262
265
263266 breakLines3 :: Int -> [Anything Char] -> [Anything Char]
264267 breakLines3 m s
265268 = rebreak (breakLinesHelper3 0 m (breakLinesHelper4 s))
266
269
267270 {-DHUN| Adds quads in between double br line breaks, needed since double \\newline is not allowed in latex DHUN-}
268
271
269272 rebreak :: [Anything Char] -> [Anything Char]
270273 rebreak
271274 ((Environment Tag (TagAttr "br" a) l) :
274277 Quad : (rebreak ((Environment Tag (TagAttr "br" a2) l2) : xs))
275278 rebreak (x : xs) = x : (rebreak xs)
276279 rebreak [] = []
277
280
278281 {-DHUN| Replaces several parse tree item representations of white space characters with the corresponding whitespace characters themselves in parse tree notation. DHUN-}
279
282
280283 renormalize :: Anything Char -> Anything Char
281284 renormalize (Environment Tag (TagAttr "br" _) []) = C '\n'
282285 renormalize Quad = C ' '
1111 import Data.Char
1212 import Data.List.Split
1313 import Control.Monad
14
14
1515 {-DHUN| takes a list of language prefixes and sister project prefixes and return a the first one or two elements of that list as tuple of Strings wrapped into maybe monads, returns nothing at the appropriate elements to the tuple if the list does not have enough elements. The first element of the tuple is populated first DHUN-}
16
16
1717 getprefixes :: [String] -> (Maybe String, Maybe String)
1818 getprefixes ss
1919 = case ss of
2222 case xs of
2323 [] -> Nothing
2424 (y : _) -> Just $ filter (not . isSpace) (map toLower y))
25
25
2626 {-DHUN| in the wiki source a heading is given by == foo bar == where the number of space gives the level of the heading. In latex this is done with string like 'section. This function translates from wiki notation to latex notation DHUN-}
27
27
2828 getsec :: String -> String
2929 getsec "=" = "chapter"
3030 getsec "==" = "section"
3333 getsec "=====" = "paragraph"
3434 getsec "======" = "subparagraph"
3535 getsec _ = "subparagraph"
36
36
3737 {-DHUN| in the wiki source a heading is given by == foo bar == where the number of space gives the level of the heading. In latex this is done with string like 'section'. Some of with need additional commands after the actual heading command. Those are returned by this function DHUN-}
38
38
3939 getsecpost :: String -> String
4040 getsecpost "=" = "\n\\myminitoc\n"
4141 getsecpost "==" = ""
4444 getsecpost "=====" = "{$\\text{ }$}\\newline"
4545 getsecpost "======" = "{$\\text{ }$}\\newline"
4646 getsecpost _ = "{$\\text{ }$}\\newline"
47
47
4848 {-DHUN| a predicate that returns true if the string is a valid image size in wiki notation. In the wiki an image is include like [[Image:FoorBar.png|300px]] where 300px means that the image should be displayed at a width of 300 pixel. This function takes strings and looks whether it looks like an integer number flowed by 'px' and is thus a valid width definition for an image in wiki notation DHUN-}
49
49
5050 isImageSize :: String -> Bool
5151 isImageSize x
5252 = if (isSuffixOf "px" x) then
5353 if (reads (take ((length x) - 2) x)) == ([] :: [(Float, [Char])])
5454 then False else True
5555 else False
56
56
5757 {-DHUN| this predicate returns true if the string seems to be a caption of an image in wiki notation. And image give like [[Image:JohnDow.png|foo bar]] where 'foo bar' is the caption of the image. So this function just tests whether the string looks like one of the reserved keyword of mediawiki or like an image size definition (see function isImageSize) and return true if none of that fits DHUN-}
58
58
5959 isCaption :: String -> Bool
6060 isCaption x
6161 = if isImageSize x then False else
6262 if x `elem` ["thumb", "right", "left", "center"] then False else
6363 True
64
64
6565 {-DHUN| escapes all charters of a string for use a a link with the hyperref package in latex DHUN-}
66
66
6767 escapelink :: String -> String
6868 escapelink s = concat (map chartransforlink s)
69
69
7070 {-DHUN| analyzes a host name. If it belongs to a wikimedia project it returns an UrlInfo value otherwise it returns a WikiBaseUrl value. In any case it is the type WikiUrlData. This information is needed when building writing down links to subpages in latex or downloading images from the wiki. DHUN-}
71
71
7272 analyseNetloc :: String -> WikiUrlData
7373 analyseNetloc nl = myurldata
7474 where langm
9494 splits = splitOn "." nl
9595 (prefix, prefix2) = getprefixes splits
9696 wikiset = Set.fromList . Map.elems . Map.fromList $ multilangwikis
97
97
9898 {-DHUN| splits a wikilink in caption and link target and returns the target. So a wkilink is given like [[FooBar|John Dow]]. This link links to the lemma FooBar and is displayed with the caption 'John Dow'. So this function returns FooBar for that link. DHUN-}
99
99
100100 localWikiLinkLocation :: String -> String
101101 localWikiLinkLocation s = headSplitEq '|' s
102
102
103103 {-DHUN| This function takes the contend of a wikilink in wiki markup notation as first input parameter in parse tree notation. It takes the state of the Latex Renderer as second input parameter. It returns the absolute url of the given wikilink as string DHUN-}
104
104
105105 wikiLinkLocationesc :: [Anything Char] -> MyState -> String
106106 wikiLinkLocationesc l st
107107 = getUrlFromWikiLinkInfoesc .
108108 getWikiLinkInfo (shallowFlatten l) . urld
109109 $ st
110
110
111111 {-DHUN| converts a WikiLinkInfo value pointing to a page on a wiki to the full url of that page as string. It is escaped for the use with the hyperref latex package DHUN-}
112
112
113113 getUrlFromWikiLinkInfoesc :: WikiLinkInfo -> String
114114 getUrlFromWikiLinkInfoesc i
115115 = case (urldata i) of
119119 (wikitype x) ++ ".org/wiki/" ++ (escapelink (urlEncode (page i)))
120120 BaseUrl (WikiBaseUrl x) -> "https://" ++
121121 x ++ "/wiki/" ++ (escapelink (urlEncode (page i)))
122
122
123123 {-DHUN| converts a WikiLinkInfo value pointing to a page on a wiki to the full url of that page as string. It is not is escaped in any way, so ready for use in a webbrowers DHUN-}
124
124
125125 getUrlFromWikiLinkInfo :: WikiLinkInfo -> String
126126 getUrlFromWikiLinkInfo i
127127 = case (urldata i) of
130130 "." ++ (wikitype x) ++ ".org/wiki/" ++ (urlEncode (page i))
131131 BaseUrl (WikiBaseUrl x) -> "https://" ++
132132 x ++ "/wiki/" ++ (urlEncode (page i))
133
133
134134 {-DHUN| converts a WikiUrlData value pointing to a wiki and a string (like a part of a url) pointing to a page relative to that wiki and returns the full url of that page as string DHUN-}
135
135
136136 wikiUrlDataToString :: WikiUrlData -> String -> String
137137 wikiUrlDataToString w i
138138 = case w of
139139 UrlInfo x -> "https://" ++
140140 (language x) ++ "." ++ (wikitype x) ++ ".org" ++ i
141141 BaseUrl (WikiBaseUrl x) -> "https://" ++ x ++ i
142
142
143143 {-DHUN| This function takes the contend of a wikilink in wiki markup notation as first input parameter. So for the wiki link (mediawiki markup notation) [[en:JohnDow|John Dow]] this is 'en:JohnDow|John Dow' (the single quotes were added to make the string distinguishable from the text and are not part of the passed parameter). It takes the WikiUrlData value describing the wiki the page currently being processed belongs to as second parameter. It returns a WikiLinkInfo value describing the target of the link given as first parameter when evaluated with respect to the second parameter DHUN-}
144
144
145145 getWikiLinkInfo :: String -> WikiUrlData -> WikiLinkInfo
146146 getWikiLinkInfo s i = WikiLinkInfo{urldata = udata, page = pagen}
147147 where udata
1414 import System.Info
1515 import Compiler (runCompile)
1616 import Tools (replace2)
17
17
1818 {-DHUN| Data structure to repesent a single option on the command line. DHUN-}
19
19
2020 data Flag = Verbose
2121 | Vector
2222 | Version
3939 | Odt
4040 | Server String
4141 deriving (Show, Eq)
42
42
4343 {-DHUN| String constant on for the version command line option. DHUN-}
44
44
4545 versionOption :: String
4646 versionOption = "version"
4747
5050 featuredOption :: String
5151 featuredOption = "featured"
5252
53
5453 {-DHUN| String constant on for the resolution command line option. DHUN-}
55
54
5655 resolutionOption :: String
5756 resolutionOption = "resolution"
58
57
5958 {-DHUN| String constant on for the output command line option. DHUN-}
60
59
6160 output :: String
6261 output = "output"
6362
6463 {-DHUN| String constant on for the zip command line option. DHUN-}
65
64
6665 zip :: String
6766 zip = "zip"
6867
69
7068 {-DHUN| String constant on for the hex command line option. DHUN-}
71
69
7270 hexen :: String
7371 hexen = "hex"
74
72
7573 {-DHUN| String constant on for the templates command line option. DHUN-}
76
74
7775 templates :: String
7876 templates = "templates"
79
77
8078 {-DHUN| String constant on for the headers command line option. DHUN-}
81
79
8280 headers :: String
8381 headers = "headers"
84
82
8583 {-DHUN| String constant on for the url command line option. DHUN-}
86
84
8785 url :: String
8886 url = "url"
89
87
9088 {-DHUN| String constant on for the medaiwiki command line option. DHUN-}
91
89
9290 mediawiki :: String
9391 mediawiki = "mediawiki"
94
92
9593 {-DHUN| String constant on for the book-namespace command line option. DHUN-}
96
94
9795 bookmode :: String
9896 bookmode = "bookmode"
9997
10098 {-DHUN| String constant on for the html command line option. DHUN-}
101
99
102100 html :: String
103101 html = "html"
104
102
105103 {-DHUN| String constant on for the paper command line option. DHUN-}
106
104
107105 paperOption :: String
108106 paperOption = "paper"
109
107
110108 {-DHUN| String constant on for the internal command line option. DHUN-}
111
109
112110 internal :: String
113111 internal = "internal"
114
112
115113 {-DHUN| String constant on for the vector command line option. DHUN-}
116
114
117115 vectorOption :: String
118116 vectorOption = "vector"
119
117
120118 {-DHUN| String constant on for the copy command line option. DHUN-}
121
119
122120 copyOption :: String
123121 copyOption = "copy"
124
122
125123 {-DHUN| String constant on for the server command line option. DHUN-}
126
124
127125 serverOption :: String
128126 serverOption = "server"
129127
130128 {-DHUN| String constant on for the epub command line option. DHUN-}
131
129
132130 epubOption :: String
133131 epubOption = "epub"
134132
135133 {-DHUN| String constant on for the odt command line option. DHUN-}
136
134
137135 odtOption :: String
138136 odtOption = "odt"
139137
140
141138 {-DHUN| Datastructure describing all possible command line options DHUN-}
142
139
143140 options :: [OptDescr Flag]
144141 options
145142 = [Option ['V', '?', 'v'] [versionOption, "help"] (NoArg Version)
167164 "use book-namespace mode for expansion",
168165 Option ['z'] [Main.zip] (NoArg Main.Zip)
169166 "output zip archive of latex source",
170 Option ['b'] [epubOption] (NoArg Main.EPub)
171 "output epub file",
172 Option ['d'] [odtOption] (NoArg Main.Odt)
173 "output odt file",
167 Option ['b'] [epubOption] (NoArg Main.EPub) "output epub file",
168 Option ['d'] [odtOption] (NoArg Main.Odt) "output odt file",
174169 Option ['g'] [vectorOption] (NoArg Main.Vector)
175170 "keep vector graphics in vector form",
176171 Option ['i'] [internal] (NoArg Main.InternalTemplates)
179174 "use user supplied latex headers",
180175 Option ['c'] [copyOption] (ReqArg Main.Copy "DIRECTORY")
181176 "copy LaTeX tree to DIRECTORY"]
182
177
183178 {-DHUN| parsed the options given on the command line via the getopt library DHUN-}
184
179
185180 compilerOpts :: [String] -> IO ([Flag], [String])
186181 compilerOpts argv
187182 = case getOpt Permute options argv of
188183 (o, n, []) -> return (o, n)
189184 (_, _, errs) -> ioError
190185 (userError (concat errs ++ usageInfo header options))
191
186
192187 {-DHUN| header string for the usage help DHUN-}
193
188
194189 header :: String
195190 header = "Usage: mediawiki2latex [OPTION...]"
196
191
197192 {-DHUN| header string giving the current version string of mediawiki2latex DHUN-}
198
193
199194 versionHeader :: String
200195 versionHeader
201 = "mediawiki2latex version 7.32\n" ++ (usageInfo header options)
202
196 = "mediawiki2latex version 7.33\n" ++ (usageInfo header options)
197
203198 {-DHUN| print the version string of mediawiki2latex. Takes the output of the compilerOpts function as input. Prints the version string if no options were given or the version command was given does noting otherwise DHUN-}
204
199
205200 printVersion :: (Eq a) => ([Flag], [a]) -> IO ()
206201 printVersion o
207202 = if (Version `elem` (fst o)) || o == ([], []) then
208203 putStrLn versionHeader >> exitSuccess else return ()
209
204
210205 {-DHUN| checks whether the given option exists exactly once in the given (getopt parsed) command line. Takes a predicate (returning a Maybe type) as first input parameter. Takes the decription string for the option under consideration as second parameter. Takes the (getopt parsed) command line as third input parameter. Return an either monad giving Right together with the value of the option in cases the option exists exacatly once in the command line, gives Left with an Error values otherwise DHUN-}
211
206
212207 exactlyOne :: (a -> Maybe b) -> String -> [a] -> Either MyError b
213208 exactlyOne predicate s o
214209 = case filter isJust (map predicate o) of
215210 ((Just x) : []) -> Right x
216211 _ -> Left (NotExcatlyOneError s)
217
212
218213 {-DHUN| checks whether the given option exists at most once in the given (getopt parsed) command line. Takes a predicate (returning a Maybe type) as first input parameter. Takes the decription string for the option under consideration as second parameter. Takes the (getopt parsed) command line as third input parameter. Return an either monad giving Right together with the value of the option in cases the option exists at most once once in the command line, gives Left with an Error values otherwise DHUN-}
219
214
220215 atMostOne ::
221216 (a1 -> Maybe a) -> String -> [a1] -> Either MyError (Maybe a)
222217 atMostOne predicate s o
224219 (x : []) -> Right x
225220 ([]) -> Right Nothing
226221 _ -> Left (NotAtMostOneError s)
227
222
228223 {-DHUN| predicate for the resolution option. see atMostOne and exactlyOne functions for details DHUN-}
229
224
230225 resolutionPredicate :: Flag -> Maybe String
231226 resolutionPredicate (Resolution x) = Just x
232227 resolutionPredicate _ = Nothing
233
228
234229 {-DHUN| predicate for the copy option. see atMostOne and exactlyOne functions for details DHUN-}
235
230
236231 copyPredicate :: Flag -> Maybe String
237232 copyPredicate (Copy x) = Just x
238233 copyPredicate _ = Nothing
239
234
240235 {-DHUN| predicate for the output option. see atMostOne and exactlyOne functions for details DHUN-}
241
236
242237 outputPredicate :: Flag -> Maybe String
243238 outputPredicate (Output x) = Just x
244239 outputPredicate _ = Nothing
245
240
246241 {-DHUN| predicate for the input option. see atMostOne and exactlyOne functions for details DHUN-}
247
242
248243 inputPredicate :: Flag -> Maybe String
249244 inputPredicate (Input x) = Just x
250245 inputPredicate _ = Nothing
251
246
252247 {-DHUN| predicate for the templates option. see atMostOne and exactlyOne functions for details DHUN-}
253
248
254249 templatesPredicate :: Flag -> Maybe String
255250 templatesPredicate (Templates x) = Just x
256251 templatesPredicate _ = Nothing
257
252
258253 {-DHUN| predicate for the headers option. see atMostOne and exactlyOne functions for details DHUN-}
259
254
260255 headersPredicate :: Flag -> Maybe String
261256 headersPredicate (Headers x) = Just x
262257 headersPredicate _ = Nothing
263
258
264259 {-DHUN| predicate for the hex option. see atMostOne and exactlyOne functions for details DHUN-}
265
260
266261 hexPredicate :: Flag -> Maybe String
267262 hexPredicate (Hex x) = Just x
268263 hexPredicate _ = Nothing
269
264
270265 {-DHUN| predicate for the server option. see atMostOne and exactlyOne functions for details DHUN-}
271
266
272267 serverPredicate :: Flag -> Maybe String
273268 serverPredicate (Server x) = Just x
274269 serverPredicate _ = Nothing
275
276270
277271 featuredPredicate :: Flag -> Maybe String
278272 featuredPredicate (Featured x) = Just x
279273 featuredPredicate _ = Nothing
280274
281275 {-DHUN| predicate for the paper option. see atMostOne and exactlyOne functions for details DHUN-}
282
276
283277 paperPredicate :: Flag -> Maybe String
284278 paperPredicate (Paper x) = Just x
285279 paperPredicate _ = Nothing
286
280
287281 {-DHUN| default images resolution. All images with a right resolution will be dithered to this resolution unless is is overridden with the resolution command line option DHUN-}
288
282
289283 defaultResolution :: Integer
290284 defaultResolution = 300
291
285
292286 {-DHUN| the default paper format DHUN-}
293
287
294288 defaultPaper :: String
295289 defaultPaper = "A4"
296
290
297291 {-DHUN| function to count the number of given command lines options which are part of a certain set of possible command line options. Takes a Maybe value representing whether the command line option is present and returns 1 on Just and 0 otherwise DHUN-}
298
292
299293 maybeToInt :: (Num a) => Maybe t -> a
300294 maybeToInt (Just _) = 1
301295 maybeToInt _ = 0
302
296
303297 {-DHUN| function to count the number of given command lines option which are part of a certain set of possible command line options. Takes a Bool values representing whether the command line option is present and returns 1 on True and 0 otherwise DHUN-}
304
298
305299 boolToInt :: (Num a) => Bool -> a
306300 boolToInt True = 1
307301 boolToInt _ = 0
308
302
309303 {-DHUN| Caculates a configuration information for the run of program from the options given on the command line. It takes the pathname of the current working directory as first input parameter. It takes the (getopt parsed) command line as second input parameter.It returns an Either Monad. In case the command line made sence the Right type containg the configuation is returned otherwise the Left values with a Error Values desribing the problem is returned DHUN-}
310
304
311305 checkOpts :: FilePath -> [Flag] -> Either MyError FullConfig
312306 checkOpts cwd o
313307 = do serverVal <- atMostOne serverPredicate serverOption o
314308 featuredVal <- atMostOne featuredPredicate featuredOption o
315309 case featuredVal of
316 Just x -> case (reads ("("++ (replace2 x ":" ",")++")"))::[((Integer,Integer),String)] of
317 [((s,e), _)] | (s<=e) -> return
318 FullConfig{ImperativeState.headers = Nothing,
319 resolution = 300, outputFilename = "",
320 inputUrl = "", runMode = ImperativeState.HTML,
321 paper = "A4", vector = False,
322 ImperativeState.copy = Nothing, mainPath = "",
323 server = Nothing, selfTest = Just (s,e), outputType=PlainPDF, compile=Nothing, imgctrb=Nothing}
324
310 Just x -> case
311 (reads ("(" ++ (replace2 x ":" ",") ++ ")")) ::
312 [((Integer, Integer), String)]
313 of
314 [((s, e), _)] | (s <= e) ->
315 return
316 FullConfig{ImperativeState.headers = Nothing,
317 resolution = 300, outputFilename = "",
318 inputUrl = "", runMode = ImperativeState.HTML,
319 paper = "A4", vector = False,
320 ImperativeState.copy = Nothing, mainPath = "",
321 server = Nothing, selfTest = Just (s, e),
322 outputType = PlainPDF, compile = Nothing,
323 imgctrb = Nothing}
325324 _ -> Left (NotIntegerPairError featuredOption)
326 _ -> case serverVal of
327 Just x -> case reads x of
328 [(z, _)] -> return
329 FullConfig{ImperativeState.headers = Nothing,
330 resolution = 300, outputFilename = "",
331 inputUrl = "", runMode = ImperativeState.HTML,
332 paper = "A4", vector = False,
333 ImperativeState.copy = Nothing, mainPath = "",
334 server = Just z, outputType=PlainPDF, selfTest=Nothing, compile=Nothing, imgctrb= Nothing}
335 _ -> Left (NotIntegerError serverOption)
336 _ -> do hexVal <- atMostOne hexPredicate hexen o
337 case hexVal of
338 Just x -> do return ((read . unhex) x)
339 _ -> do resolutionOpt <- atMostOne resolutionPredicate
340 resolutionOption
341 o
342 resolutionVal <- case resolutionOpt of
343 (Just x) -> case reads x of
344 [(z, _)] -> Right z
345 _ -> Left
346 (NotIntegerError
347 resolutionOption)
348 _ -> Right defaultResolution
349 outputVal <- exactlyOne outputPredicate output o
350 inputVal <- exactlyOne inputPredicate url o
351 templatesVal <- atMostOne templatesPredicate templates o
352 headersVal <- atMostOne headersPredicate templates o
353 copyVal <- atMostOne copyPredicate copyOption o
354 paperOpt <- atMostOne paperPredicate paperOption o
355 paperVal <- case paperOpt of
356 Just x -> if
357 x `elem`
358 ["A4", "A5", "B5", "letter", "legal",
359 "executive"]
360 then Right x else Left PaperError
361 _ -> Right defaultPaper
362 let mediaWikiVal = (MediaWiki `elem` o)
363 let bookModeVal = (BookMode `elem` o)
364 let htmlVal = (Main.HTML `elem` o)
365 let zipVal = (Main.Zip `elem` o)
366 let epubVal = (Main.EPub `elem` o)
367 let odtVal = (Main.Odt `elem` o)
368 let temVal = (Main.InternalTemplates `elem` o)
369 let vectorVal = (Main.Vector `elem` o)
370 let mysum
371 = (boolToInt temVal) + (boolToInt mediaWikiVal) +
372 (boolToInt htmlVal)
373 + (maybeToInt templatesVal) + (boolToInt bookModeVal)
374 if mysum > 1 then Left ToManyOptionsError else Right ()
375 if ((boolToInt zipVal) + (boolToInt epubVal) + (boolToInt odtVal)) > (1 :: Integer) then Left ToManyOutputOptionsError else Right ()
376 runModeVal <- if mysum == (1 :: Integer) then
377 case templatesVal of
378 Just xx -> Right (UserTemplateFile xx)
379 _ -> if mediaWikiVal then Right ExpandedTemplates
380 else
381 if htmlVal then Right ImperativeState.HTML
382 else if bookModeVal then Right ImperativeState.Book
383 else Right (StandardTemplates)
384 else Right ImperativeState.HTML
385 return
386 (FullConfig{ImperativeState.headers =
387 headersVal >>= (return . (cwd </>)),
388 resolution = resolutionVal, selfTest=Nothing,outputFilename = outputVal,
389 inputUrl = inputVal, runMode = runModeVal,
390 paper = paperVal, vector = vectorVal,
391 copy = copyVal >>= (return . (cwd </>)),
392 mainPath = cwd ++ (if os == "linux" then "" else "\\"),
393 server = Nothing, outputType=if zipVal then ZipArchive else if epubVal then EPubFile else if odtVal then OdtFile else PlainPDF,compile=Nothing,imgctrb=Nothing})
394
325 _ -> case serverVal of
326 Just x -> case reads x of
327 [(z, _)] -> return
328 FullConfig{ImperativeState.headers = Nothing,
329 resolution = 300, outputFilename = "",
330 inputUrl = "",
331 runMode = ImperativeState.HTML,
332 paper = "A4", vector = False,
333 ImperativeState.copy = Nothing,
334 mainPath = "", server = Just z,
335 outputType = PlainPDF,
336 selfTest = Nothing, compile = Nothing,
337 imgctrb = Nothing}
338 _ -> Left (NotIntegerError serverOption)
339 _ -> do hexVal <- atMostOne hexPredicate hexen o
340 case hexVal of
341 Just x -> do return ((read . unhex) x)
342 _ -> do resolutionOpt <- atMostOne resolutionPredicate
343 resolutionOption
344 o
345 resolutionVal <- case resolutionOpt of
346 (Just x) -> case reads x of
347 [(z, _)] -> Right z
348 _ -> Left
349 (NotIntegerError
350 resolutionOption)
351 _ -> Right defaultResolution
352 outputVal <- exactlyOne outputPredicate output o
353 inputVal <- exactlyOne inputPredicate url o
354 templatesVal <- atMostOne templatesPredicate templates o
355 headersVal <- atMostOne headersPredicate templates o
356 copyVal <- atMostOne copyPredicate copyOption o
357 paperOpt <- atMostOne paperPredicate paperOption o
358 paperVal <- case paperOpt of
359 Just x -> if
360 x `elem`
361 ["A4", "A5", "B5", "letter",
362 "legal", "executive"]
363 then Right x else
364 Left PaperError
365 _ -> Right defaultPaper
366 let mediaWikiVal = (MediaWiki `elem` o)
367 let bookModeVal = (BookMode `elem` o)
368 let htmlVal = (Main.HTML `elem` o)
369 let zipVal = (Main.Zip `elem` o)
370 let epubVal = (Main.EPub `elem` o)
371 let odtVal = (Main.Odt `elem` o)
372 let temVal = (Main.InternalTemplates `elem` o)
373 let vectorVal = (Main.Vector `elem` o)
374 let mysum
375 = (boolToInt temVal) + (boolToInt mediaWikiVal) +
376 (boolToInt htmlVal)
377 + (maybeToInt templatesVal)
378 + (boolToInt bookModeVal)
379 if mysum > 1 then Left ToManyOptionsError else Right ()
380 if
381 ((boolToInt zipVal) + (boolToInt epubVal) +
382 (boolToInt odtVal))
383 > (1 :: Integer)
384 then Left ToManyOutputOptionsError else Right ()
385 runModeVal <- if mysum == (1 :: Integer) then
386 case templatesVal of
387 Just xx -> Right (UserTemplateFile xx)
388 _ -> if mediaWikiVal then
389 Right ExpandedTemplates else
390 if htmlVal then
391 Right ImperativeState.HTML else
392 if bookModeVal then
393 Right ImperativeState.Book
394 else
395 Right (StandardTemplates)
396 else Right ImperativeState.HTML
397 return
398 (FullConfig{ImperativeState.headers =
399 headersVal >>= (return . (cwd </>)),
400 resolution = resolutionVal,
401 selfTest = Nothing,
402 outputFilename = outputVal,
403 inputUrl = inputVal, runMode = runModeVal,
404 paper = paperVal, vector = vectorVal,
405 copy = copyVal >>= (return . (cwd </>)),
406 mainPath =
407 cwd ++ (if os == "linux" then "" else "\\"),
408 server = Nothing,
409 outputType =
410 if zipVal then ZipArchive else
411 if epubVal then EPubFile else
412 if odtVal then OdtFile else PlainPDF,
413 compile = Nothing, imgctrb = Nothing})
414
395415 {-DHUN| main entry point of mediawiki2latex DHUN-}
396
416
397417 main :: IO ()
398418 main
399419 = do a <- getArgs
403423 cwd <- getCurrentDirectory
404424 case (checkOpts cwd (fst o)) of
405425 Right x -> case (compile x) of
406 Just dir -> do _ <- (runStateT (runExceptT (runCompile dir)) stz)
407 return ()
408 _-> case (imgctrb x) of
409 Just dir -> do _ <- (runStateT (runExceptT (runCtrb dir)) stz)
410 return ()
411 _-> case (server x) of
412 Just zz -> serve zz
413 _ -> do print x
414 (xx, _) <- (runStateT (runExceptT (All.all x))
415 stz)
416 case xx of
417 Left n -> print n
418 _ -> return ()
426 Just dir -> do _ <- (runStateT (runExceptT (runCompile dir)) stz)
427 return ()
428 _ -> case (imgctrb x) of
429 Just dir -> do _ <- (runStateT (runExceptT (runCtrb dir)) stz)
430 return ()
431 _ -> case (server x) of
432 Just zz -> serve zz
433 _ -> do print x
434 (xx, _) <- (runStateT (runExceptT (All.all x))
435 stz)
436 case xx of
437 Left n -> print n
438 _ -> return ()
419439 Left y -> print y
420440 return ()
33 import Language.Haskell.Exts.Pretty
44 import Data.Maybe
55 import System.Environment
6
6
77 {-DHUN| save function to access an list by index. If an element at that index exists the element is returned wrapped in a Just of the maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-}
88 ind l n = if length l > n then Just $ l !! n else Nothing
9
9
1010 {-DHUN| Takes a String as an single input parameter. It has to be the source code of module written in Haskell. It return a parse tree of that module DHUN-}
11
12 parseModuleFromFile :: String -> Module
1311 parseModuleFromFile inp = fromParseResult $ parseFileContents inp
14
12
1513 {-DHUN| the main function, see documentation at the head of this module DHUN-}
16
14
1715 main :: IO ()
1816 main
1917 = do args <- getArgs