New upstream version 7.33
Georges Khaznadar
5 years ago
0 | import Network.HTTP | |
1 | import Network.HTTP.Conduit | |
2 | import qualified Data.ByteString.Lazy as LB | |
0 | import UrlAnalyse | |
1 | import ImperativeState | |
2 | import UrlAnalyse | |
3 | 3 | import qualified Data.ByteString as BStr |
4 | import Network.URL as URL | |
4 | 5 | import qualified Data.ByteString.UTF8 as UTF8Str |
6 | import Data.List.Split (splitOn) | |
7 | import Control.Monad | |
8 | import Data.Maybe | |
9 | import Control.Monad.State | |
10 | import Data.List | |
11 | import System.FilePath | |
5 | 12 | import Codec.Binary.UTF8.String |
6 | import Data.ByteString.Lazy.Internal | |
7 | import Network.HTTP.Conduit.Browser | |
8 | import Control.Exception | |
9 | import Control.Monad.Trans.Resource | |
10 | import Control.Monad.IO.Class | |
11 | import Data.CaseInsensitive (mk) | |
12 | geturl3 :: String -> IO String | |
13 | geturl3 u | |
14 | = Control.Exception.catch | |
15 | (do req0 <- parseUrl u | |
16 | let req | |
17 | = (urlEncodedBody | |
18 | (map (\ (a, b) -> (UTF8Str.fromString a, UTF8Str.fromString b)) | |
19 | [("title","Spezial:Anmelden"),("action","submitlogin"),("type","login"),("returnto","Dirk Hünniger"),("wpName","Dirk Hünniger"),("wpPassword","rotationpeliped")])) | |
20 | req0 | |
21 | man <- newManager def | |
22 | req2b <- parseUrl "https://de.wikibooks.org/w/index.php" | |
23 | let cj = createCookieJar [] | |
24 | let req2 | |
25 | = (urlEncodedBody | |
26 | (map (\ (a, b) -> (UTF8Str.fromString a, UTF8Str.fromString b)) | |
27 | [("title","Spezial:Anmelden"),("returnto","Dirk Hünniger")])) | |
28 | req2b | |
13 | import Control.Concurrent.MVar | |
14 | import Parallel | |
15 | import Data.Map (member) | |
16 | import MediaWikiParser | |
17 | import MediaWikiParseTree | |
18 | import WikiHelper | |
19 | import Verben | |
20 | import Nomen | |
21 | import Adjektive | |
22 | import Adverbien | |
23 | import Gradpartikel | |
24 | import Hilfsverben | |
25 | deepGet2 :: [Char] -> [Anything a] -> [Anything a] | |
26 | deepGet2 tag ll = concat $ map go ll | |
27 | where | |
28 | | t == tag = | |
29 | [Environment Tag (TagAttr tag m) l] ++ (deepGet2 tag l) | |
30 | go (Environment _ _ l) = (deepGet2 tag l) | |
31 | go _ = [] | |
29 | 32 | |
30 | res <- runResourceT $ do browse man $ do setDefaultHeader (mk (UTF8Str.fromString "User-Agent")) $ Just (UTF8Str.fromString "mozialla 1.2.3 (compatible)") | |
31 | --setCookieJar cj | |
32 | makeRequestLbs req2 | |
33 | xx<-makeRequestLbs req | |
34 | yy<-makeRequestLbs req | |
35 | ccj <-getCookieJar | |
36 | --liftIO $ print ccj | |
37 | return yy | |
38 | return ((unpackChars (responseBody res)))) | |
39 | fun | |
40 | where | |
41 | fun :: ErrorCall -> IO String | |
42 | fun _ = return "" | |
33 | deepGet3 :: [Char] -> [Char] -> [Anything Char] -> [Anything Char] | |
34 | deepGet3 tag k ll = concat ( map go ll) | |
35 | where go (Environment Tag (TagAttr t m) l) | |
36 | | ((t == tag) && (member k m)) = | |
37 | [Environment Tag (TagAttr tag m) l] ++ (deepGet3 tag k l) | |
38 | go (Environment _ _ l) = (deepGet3 tag k l) | |
39 | go _ = [] | |
43 | 40 | |
44 | main = geturl3 "https://de.wikibooks.org/w/index.php" >>= putStr | |
41 | deepGo :: [Anything Char] -> [[Char]] | |
42 | deepGo ((Environment Tag (TagAttr t m) l):xs) = (shallowFlatten l):(deepGo xs) | |
43 | deepGo (_:xs) = deepGo xs | |
44 | deepGo [] = [] | |
45 | ||
46 | -- yy <- geturl theUrl3 | |
47 | -- let gg = (deepGet "li" "id" "ca-history" (parseit minparsers yy)) | |
48 | gourl url = do x<-geturl url | |
49 | return (reverse ( (reverse ( (deepGo (deepGet2 "a" (deepGet "div" "class" "mw-category-group" (deepGet "div" "id" "mw-pages"(parseit minparsers x))))))))) | |
50 | ||
51 | gourl2 url = do x<-geturl url | |
52 | return (reverse ((reverse ( (deepGo (deepGet2 "a" (deepGet "div" "class" "mw-category-group" (deepGet "div" "id" "mw-pages"(parseit minparsers x))))))))) | |
53 | ||
54 | gourls x = do y<-gourl2 ("https://de.wiktionary.org/w/index.php?title=Kategorie:Adverb_(Deutsch)&pagefrom="++(decodeString (last x))++"#mw-pages") | |
55 | if (length y)>0 then (print (last x))>>(print (last y))>> gourls (x++y) else return (x++y) | |
56 | ||
57 | main = do x <- gourl ("https://de.wiktionary.org/wiki/Kategorie:Adverb_%28Deutsch%29") | |
58 | y <- gourls x | |
59 | print y | |
45 | 60 | |
46 | 61 | |
47 | 62 | |
48 | 63 | |
49 | --main = simpleHttp "https://de.wikibooks.org/w/index.php?title=Spezial:Anmelden&returnto=Benutzer%3ADirk+Hünniger" | |
50 | 64 | |
51 | 65 | |
66 | ||
67 | ||
68 |
0 | Version 7.31 | |
0 | Version 7.33 | |
1 | * src | |
2 | amperand and "<" and ">" now work in sourcecode received via html | |
3 | some more toplevel function are now documented | |
4 | server now displayes running at wmflabs | |
5 | server now warns about time limit of one hour | |
6 | formulas inside image caption received of html work | |
7 | man page update | |
8 | source code pretty printed | |
9 | Version 7.32 | |
1 | 10 | * src |
2 | 11 | tables now work with formulas inside them also |
3 | 12 | webserver now links to installation instructions |
6 | 6 | .ds version MEDIAWIKI2LATEXVERSION |
7 | 7 | .ds year 2013 |
8 | 8 | .ds date \*[year]-12-25 |
9 | .ds appauthors Dirk Hünniger <hunniger@cip.physik.uni-bonn.de> | |
9 | .ds appauthors Dirk Hünniger <dirk.hunniger@googlemail.com> | |
10 | 10 | .ds manauthors Pierre Neidhardt <ambrevar@gmail.com> |
11 | 11 | . |
12 | 12 | .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" |
52 | 52 | .BR -z ", " --zip |
53 | 53 | Output LaTeX Source Archive. |
54 | 54 | .TP |
55 | .BR -b ", " --epub | |
56 | Output epub file. | |
57 | .TP | |
58 | .BR -d ", " --odt | |
59 | Output odt file. Open Document Text (for wordprocessors) | |
60 | .TP | |
55 | 61 | .BR -i ", " --internal |
56 | 62 | Use internal template definitions. |
57 | 63 | .TP |
60 | 66 | .TP |
61 | 67 | .BR -m ", " --mediawiki |
62 | 68 | Use mediaWiki to expand the templates. |
69 | .TP | |
70 | .BR -k ", " --bookmode | |
71 | Use book-namespace mode for expansion. That means: Follow all links but not recursively. | |
63 | 72 | .TP |
64 | 73 | .BR -o ", " --output = \fIFILE\fR |
65 | 74 | Specify the PDF output file. |
82 | 91 | .BR -u ", " --url = \fIURL\fR |
83 | 92 | The input URI. It should point to a MediaWiki page. |
84 | 93 | .TP |
85 | .BR -h ", " -? ", " -v ", " --version ", " --help | |
94 | .BR -? ", " -v ", " --version ", " --help | |
86 | 95 | Show help options together with version number. |
87 | 96 | . |
88 | 97 | .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" |
0 | 0 | Name: mediawiki2latex |
1 | Version: 7.32 | |
1 | Version: 7.33 | |
2 | 2 | License: GPL |
3 | 3 | License-File: LICENSE |
4 | 4 | Author: Dirk Hünniger <dirk.hunniger@googlemail.com> |
35 | 35 | import UrlAnalyse |
36 | 36 | import Network.URL |
37 | 37 | import System.FilePath |
38 | import Control.Concurrent.MVar() | |
38 | import Control.Concurrent.MVar () | |
39 | 39 | import Data.Hashable |
40 | 40 | import Hex |
41 | 41 | import Data.IORef |
44 | 44 | import Data.ByteString.Lazy (toStrict) |
45 | 45 | import System.Exit |
46 | 46 | import System.IO.Error |
47 | ||
47 | ||
48 | 48 | {-DHUN| takes a filename as input parameter and returns the so called normalized extension for it. This is and extension classifying the type of the file while used in this program. Its not necessarily the real extension of the filename. For example .jpeg images will be converted to jpg and so on. DHUN-} |
49 | ||
49 | ||
50 | 50 | getExtension :: String -> String |
51 | 51 | getExtension s |
52 | 52 | = normalizeExtension2 |
53 | 53 | (map toLower (reverse . (takeWhile (/= '.')) . reverse $ s)) |
54 | ||
54 | ||
55 | 55 | {-DHUN| returns that name of image magick convert command on the current operating system. It takes the path from which mediawiki2latex was started as input parameter. On Windows convert.exe has to reside in a path relative to it for this to work. On linux it does not matter DHUN-} |
56 | ||
56 | ||
57 | 57 | getConvert :: FilePath -> String |
58 | 58 | getConvert p |
59 | 59 | = if os == "linux" then convert else (getPathPrefix p) ++ convert |
60 | 60 | where convert |
61 | 61 | = if os == "linux" then "convert " else "convert.exe " |
62 | ||
62 | ||
63 | 63 | {-DHUN| generates the information for the titlepage. It takes the result of the wiki text compilation (CompileResult) as first input parameter and the FullWikiUrl to the article as second parameter. It title information is found in the CompileResult it is used, otherise it is generated from the FullWikiUrl DHUN-} |
64 | ||
64 | ||
65 | 65 | makeTitle :: CompileResult -> FullWikiUrl -> [Char] |
66 | 66 | makeTitle result fu = theTitle |
67 | 67 | where theTitle |
74 | 74 | = "\\title{" ++ |
75 | 75 | (concat (map (chartrans) ((removePrintVersion (lemma fu))))) ++ |
76 | 76 | "}\n" |
77 | ||
77 | 78 | makeTitle2 :: CompileResult -> FullWikiUrl -> [Char] |
78 | 79 | makeTitle2 result fu = theTitle |
79 | 80 | where theTitle |
80 | 81 | = if (Compiler.title result) == "" then tit else |
81 | 82 | (Compiler.title result) |
82 | tit | |
83 | = (concat (map (chartrans) ((removePrintVersion (lemma fu))))) | |
84 | ||
83 | tit = (concat (map (chartrans) ((removePrintVersion (lemma fu))))) | |
84 | ||
85 | 85 | {-DHUN| returns the prefix of the path where addional needed software resides depending on the operation system DHUN-} |
86 | ||
86 | ||
87 | 87 | getPathPrefix :: FilePath -> String |
88 | 88 | getPathPrefix p = if os == "linux" then "" else (p ++ "..\\lib\\") |
89 | ||
89 | ||
90 | 90 | {-DHUN| applied the necessary image processing to a file so that it can be included in a latex document and does not take too much discspace. It takes the path form which mediawiki2latex was started as first input parameter. It takes filename with its exension stiped as second input parameter. It takes the normalized exension of the file as third parameter (see also function getExtension in this module). It takes the maximum resolution that images should have as fourth parameter. It takes the imagenumbers of the images residing in image galleries in the wiki source text as fifth parameter (those images are given a lower absolute with in cm in the pdf document and thus can be dithered to a loweder absolute with in pixels). It takes the image number of the current image as sixth parameter. DHUN-} |
91 | ||
91 | ||
92 | 92 | runFileMods :: |
93 | 93 | FilePath -> |
94 | 94 | String -> |
138 | 138 | = system |
139 | 139 | ((getConvert p) ++ fn ++ " -background white -flatten " ++ fn) |
140 | 140 | pngfilename = filenamebase ++ "." ++ "png" |
141 | ||
141 | ||
142 | 142 | dither :: String -> IO () |
143 | 143 | dither fn |
144 | 144 | = do _ <- system |
172 | 172 | _ -> return () |
173 | 173 | _ -> return () |
174 | 174 | _ -> return () |
175 | ||
175 | ||
176 | 176 | runDither :: String -> Integer -> Integer -> IO () |
177 | 177 | runDither fn newSize oldSize |
178 | 178 | = if newSize < oldSize then |
183 | 183 | else return () |
184 | 184 | textWidth = 10.5 |
185 | 185 | galleryImageWidth = 5.0 |
186 | ||
186 | ||
187 | 187 | centimetersPerInch :: Double |
188 | 188 | centimetersPerInch = 2.54 |
189 | ||
189 | ||
190 | 190 | galleryWidth :: Integer |
191 | 191 | galleryWidth |
192 | 192 | = round |
193 | 193 | ((fromIntegral theResolution) * galleryImageWidth / |
194 | 194 | centimetersPerInch) |
195 | ||
195 | ||
196 | 196 | imageWidth :: Integer |
197 | 197 | imageWidth |
198 | 198 | = round |
199 | 199 | ((fromIntegral theResolution) * textWidth / centimetersPerInch) |
200 | ||
200 | ||
201 | 201 | {-DHUN| function to write the image files into the temporary directory and modify the files for use in a LaTeX document. It takes the pathname of temporary image download directory as first input parameter. Please not the the temporary image download directory is always different from the temporary directory. It takes the pathname of the directory in which mediawiki2latex was stated as second input parameter. It takes the name of the temporary directory as third input parameter. It takes the result of the image downloading process as fourth input parameter. This structure has a Maybe monad as outer type since the image download may fail. Inside is a 3 element tuple. The first one is the filename of the image on the wiki. The second one is the number under which the image was stored in the temporary image download directory. The third one is a list of possible urls where the image may be found on the wiki. It takes maximum resolution that images should have as fifth parameter. It takes the list of images number of images residing in galleries as sixth input parameter. DHUN-} |
202 | ||
202 | ||
203 | 203 | writeFiles :: |
204 | 204 | FilePath -> |
205 | 205 | FilePath -> |
206 | String -> | |
207 | [Maybe ImageInfo] -> | |
208 | Integer -> [Integer] -> IO () | |
206 | String -> [Maybe ImageInfo] -> Integer -> [Integer] -> IO () | |
209 | 207 | writeFiles dir p pathname theImages theResolution gals |
210 | 208 | = mapM_ go (Prelude.zip ([1 ..] :: [Integer]) theImages) |
211 | 209 | where go (i, Just x) |
212 | 210 | = do let filenamebase = (pathname ++ (show i)) |
213 | let filename = filenamebase ++ "." ++ (getExtension (wikiFilename x)) | |
214 | filecontent <- Data.ByteString.readFile (dir </> (show (imageNumber x))) | |
211 | let filename | |
212 | = filenamebase ++ "." ++ (getExtension (wikiFilename x)) | |
213 | filecontent <- Data.ByteString.readFile | |
214 | (dir </> (show (imageNumber x))) | |
215 | 215 | Data.ByteString.writeFile filename filecontent |
216 | runFileMods p filenamebase (getExtension (wikiFilename x)) theResolution gals i | |
216 | runFileMods p filenamebase (getExtension (wikiFilename x)) | |
217 | theResolution | |
218 | gals | |
219 | i | |
217 | 220 | pathname |
218 | 221 | go _ = return () |
219 | ||
220 | ||
221 | ||
222 | writeFiles2 :: String -> String -> [(String,Int)]-> IO () | |
223 | writeFiles2 tmpdir pathname forms | |
224 | = mapM_ go forms | |
225 | where go (x,_) | |
226 | = do filecontent <- Data.ByteString.readFile (tmpdir</>x) | |
227 | Data.ByteString.writeFile (pathname++(Prelude.last (splitOn "/" x))) filecontent | |
228 | ||
229 | data LatexConfig = LatexConfig{figures :: | |
230 | [Maybe ImageInfo], | |
222 | ||
223 | writeFiles2 :: String -> String -> [(String, Int)] -> IO () | |
224 | writeFiles2 tmpdir pathname forms = mapM_ go forms | |
225 | where go (x, _) | |
226 | = do filecontent <- Data.ByteString.readFile (tmpdir </> x) | |
227 | Data.ByteString.writeFile | |
228 | (pathname ++ (Prelude.last (splitOn "/" x))) | |
229 | filecontent | |
230 | ||
231 | data LatexConfig = LatexConfig{figures :: [Maybe ImageInfo], | |
231 | 232 | title :: String, fullConfig :: FullConfig, content :: String, |
232 | 233 | hostname :: String, theResult :: CompileResult, onlyTables :: Bool, |
233 | lang :: Maybe String, theTempDir :: String, formulas::[(String,Int)], figHTML:: String} | |
234 | ||
234 | lang :: Maybe String, theTempDir :: String, | |
235 | formulas :: [(String, Int)], figHTML :: String} | |
236 | ||
235 | 237 | runLaTeX :: LatexConfig -> ImperativeMonad ByteString |
236 | 238 | runLaTeX config |
237 | 239 | = liftIO |
238 | 240 | (withSystemTempDirectory "MediaWiki2LaTeX" |
239 | 241 | (runLaTeXCallback config)) |
240 | ||
242 | ||
241 | 243 | runLaTeXCallback :: LatexConfig -> FilePath -> IO ByteString |
242 | 244 | runLaTeXCallback config pathname |
243 | 245 | = do extract pathname |
248 | 250 | d{anchor = pathname </> "document/headers"} |
249 | 251 | return () |
250 | 252 | _ -> return () |
251 | if os=="linux" then return () else | |
252 | do d <- readDirectoryWith Data.ByteString.readFile ((mainPath (fullConfig config))++"../fonts/main/") | |
253 | _ <- writeDirectoryWith Data.ByteString.writeFile d{anchor = pathname ++ "/document/"} | |
254 | return () | |
255 | Tools.writeFile (pathname ++ "/document/main/main.tex") (content config) | |
256 | Tools.writeFile (pathname ++ "/document/index.html") ((html (theResult config))++"<h2>List of Figures</h2>"++(figHTML config)) | |
257 | ||
258 | ||
253 | if os == "linux" then return () else | |
254 | do d <- readDirectoryWith Data.ByteString.readFile | |
255 | ((mainPath (fullConfig config)) ++ "../fonts/main/") | |
256 | _ <- writeDirectoryWith Data.ByteString.writeFile | |
257 | d{anchor = pathname ++ "/document/"} | |
258 | return () | |
259 | Tools.writeFile (pathname ++ "/document/main/main.tex") | |
260 | (content config) | |
261 | Tools.writeFile (pathname ++ "/document/index.html") | |
262 | ((html (theResult config)) ++ | |
263 | "<h2>List of Figures</h2>" ++ (figHTML config)) | |
259 | 264 | Tools.writeFile (pathname ++ "/document/headers/svg.tex") |
260 | 265 | (if vector (fullConfig config) then |
261 | 266 | "\\newcommand{\\SVGExtension}{pdf}" else |
273 | 278 | (figures config) |
274 | 279 | (resolution (fullConfig config)) |
275 | 280 | (galleryNumbers (theResult config)) |
276 | All.writeFiles2 (theTempDir config) (pathname ++ "/document/formulas/") (formulas config) | |
281 | All.writeFiles2 (theTempDir config) | |
282 | (pathname ++ "/document/formulas/") | |
283 | (formulas config) | |
277 | 284 | cwd <- getCurrentDirectory |
278 | 285 | setCurrentDirectory (pathname ++ "/document/main") |
279 | 286 | case (ImperativeState.copy (fullConfig config)) of |
283 | 290 | return () |
284 | 291 | _ -> return () |
285 | 292 | _ <- forM |
286 | ((if onlyTables config then [1] else if ((outputType (fullConfig config)) /= PlainPDF) then [] else [1, 2]) :: [Integer]) | |
293 | ((if onlyTables config then [1] else | |
294 | if ((outputType (fullConfig config)) /= PlainPDF) then [] else | |
295 | [1, 2]) | |
296 | :: [Integer]) | |
287 | 297 | (\ r -> |
288 | 298 | do if (onlyTables config) then return () else |
289 | 299 | myprint (" generating PDF file. LaTeX run " ++ (show r) ++ " of 4") |
299 | 309 | "..\\miktex\\miktex\\bin\\makeindex.exe") |
300 | 310 | ++ " main") |
301 | 311 | >> return ())) |
302 | _ <- forM ((if (onlyTables config)||((outputType (fullConfig config)) /= PlainPDF) then [] else [3, 4]) :: [Integer]) | |
312 | _ <- forM | |
313 | ((if | |
314 | (onlyTables config) || | |
315 | ((outputType (fullConfig config)) /= PlainPDF) | |
316 | then [] else [3, 4]) | |
317 | :: [Integer]) | |
303 | 318 | (\ r -> |
304 | 319 | do if (onlyTables config) then return () else |
305 | 320 | myprint (" generating PDF file. LaTeX run " ++ (show r) ++ " of 4") |
318 | 333 | case splitOn "\n" te of |
319 | 334 | (x : _) -> return (pack (encode (strip "pt\r" x))) |
320 | 335 | _ -> return (pack (encode "")) |
321 | else case outputType (fullConfig config) of | |
322 | PlainPDF -> Data.ByteString.readFile "main.pdf" | |
323 | ZipArchive -> do setCurrentDirectory pathname | |
324 | a<-addFilesToArchive [OptRecursive] emptyArchive ["document"] | |
325 | Data.ByteString.writeFile "main.zip" (toStrict (fromArchive a)) | |
326 | Data.ByteString.readFile "main.zip" | |
327 | OdtFile -> do setCurrentDirectory ".." | |
328 | mysystem "libreoffice --headless --convert-to odt index.html" | |
329 | mysystem "libreoffice --headless --convert-to docx index.odt" | |
330 | mysystem "libreoffice --headless --convert-to odt index.docx" | |
331 | Data.ByteString.readFile "index.odt" | |
332 | EPubFile -> do mysystem "ebook-convert ../index.html .epub" | |
333 | Data.ByteString.readFile "index.epub" | |
334 | ||
335 | ||
336 | else | |
337 | case outputType (fullConfig config) of | |
338 | PlainPDF -> Data.ByteString.readFile "main.pdf" | |
339 | ZipArchive -> do setCurrentDirectory pathname | |
340 | a <- addFilesToArchive [OptRecursive] emptyArchive | |
341 | ["document"] | |
342 | Data.ByteString.writeFile "main.zip" | |
343 | (toStrict (fromArchive a)) | |
344 | Data.ByteString.readFile "main.zip" | |
345 | OdtFile -> do setCurrentDirectory ".." | |
346 | mysystem "libreoffice --headless --convert-to odt index.html" | |
347 | mysystem "libreoffice --headless --convert-to docx index.odt" | |
348 | mysystem "libreoffice --headless --convert-to odt index.docx" | |
349 | Data.ByteString.readFile "index.odt" | |
350 | EPubFile -> do mysystem "ebook-convert ../index.html .epub" | |
351 | Data.ByteString.readFile "index.epub" | |
336 | 352 | setCurrentDirectory cwd |
337 | 353 | return result |
338 | ex :: ProcessHandle -> Handle -> Handle -> IO () | |
354 | ||
355 | ex :: ProcessHandle -> Handle -> Handle -> IO () | |
339 | 356 | ex h o e |
340 | = do b<-hReady stdin | |
341 | if b then do terminateProcess h | |
342 | exitFailure | |
343 | else do return () | |
344 | v<-tryIOError (hWaitForInput e 0) | |
345 | let xo=case v of | |
357 | = do b <- hReady stdin | |
358 | if b then | |
359 | do terminateProcess h | |
360 | exitFailure | |
361 | else do return () | |
362 | v <- tryIOError (hWaitForInput e 0) | |
363 | let xo | |
364 | = case v of | |
346 | 365 | Right j -> j |
347 | 366 | _ -> False |
348 | _<- if xo then hGetChar e else return ' ' | |
349 | w<-tryIOError (hWaitForInput o 0) | |
350 | let yo=case w of | |
367 | _ <- if xo then hGetChar e else return ' ' | |
368 | w <- tryIOError (hWaitForInput o 0) | |
369 | let yo | |
370 | = case w of | |
351 | 371 | Right j -> j |
352 | 372 | _ -> False |
353 | _<- if yo then hGetChar o else return ' ' | |
354 | y<-getProcessExitCode h | |
355 | case y of | |
356 | Just _ -> do hClose o | |
357 | hClose e | |
358 | return () | |
359 | _-> ex h o e | |
373 | _ <- if yo then hGetChar o else return ' ' | |
374 | y <- getProcessExitCode h | |
375 | case y of | |
376 | Just _ -> do hClose o | |
377 | hClose e | |
378 | return () | |
379 | _ -> ex h o e | |
360 | 380 | |
361 | 381 | mysystem :: String -> IO () |
362 | mysystem x =if os == "linux" | |
363 | then | |
364 | do (_,o,e,h)<-runInteractiveCommand x | |
365 | ex h o e | |
366 | return () | |
367 | else | |
368 | do _<-system x | |
369 | return () | |
382 | mysystem x | |
383 | = if os == "linux" then | |
384 | do (_, o, e, h) <- runInteractiveCommand x | |
385 | ex h o e | |
386 | return () | |
387 | else | |
388 | do _ <- system x | |
389 | return () | |
370 | 390 | |
371 | 391 | getLang :: URL -> IO (Maybe String) |
372 | 392 | getLang u |
376 | 396 | ((Environment Tag (TagAttr _ m) _) : []) -> return $ |
377 | 397 | Data.Map.Strict.lookup "lang" m |
378 | 398 | _ -> return $ Nothing |
379 | ||
399 | ||
380 | 400 | catchFun :: IOException -> IO String |
381 | 401 | catchFun _ = return "" |
382 | ||
402 | ||
383 | 403 | strip :: (Eq a) => [a] -> [a] -> [a] |
384 | 404 | strip l = reverse . (dropWhile isBad) . reverse . dropWhile isBad |
385 | 405 | where isBad x = x `elem` l |
386 | ||
406 | ||
387 | 407 | latexPostamble :: String |
388 | 408 | latexPostamble = "\n\\end{longtable}\n\\pagebreak" |
389 | ||
390 | ||
391 | runCtrb ::String -> ImperativeMonad () | |
392 | runCtrb dir = do t<-liftIO $ Tools.readFile (dir </> "input") | |
393 | cr<-imgContribback ((read t)::(Maybe ImageInfo)) | |
394 | liftIO $ Tools.writeFile (dir </> "output") (show cr) | |
395 | ||
396 | ||
409 | ||
410 | runCtrb :: String -> ImperativeMonad () | |
411 | runCtrb dir | |
412 | = do t <- liftIO $ Tools.readFile (dir </> "input") | |
413 | cr <- imgContribback ((read t) :: (Maybe ImageInfo)) | |
414 | liftIO $ Tools.writeFile (dir </> "output") (show cr) | |
397 | 415 | |
398 | 416 | imgContrib :: |
399 | 417 | (Maybe ImageInfo) -> |
400 | 418 | ImperativeMonad ((Maybe (String, Maybe String))) |
401 | imgContrib x = do systempdir <- liftIO getTemporaryDirectory | |
402 | tempdir <- liftIO $ createTempDirectory systempdir "MediaWiki2LaTeXImgContrib" | |
403 | liftIO $ Tools.writeFile (tempdir </> "input") (show x) | |
404 | _ <- liftIO $ system("mediawiki2latex -x "++( Hex.hex (show (fullconfigbase{imgctrb=Just tempdir})))) | |
405 | t <-liftIO $ Tools.readFile (tempdir </> "output") | |
406 | return (read t) | |
419 | imgContrib x | |
420 | = do systempdir <- liftIO getTemporaryDirectory | |
421 | tempdir <- liftIO $ | |
422 | createTempDirectory systempdir "MediaWiki2LaTeXImgContrib" | |
423 | liftIO $ Tools.writeFile (tempdir </> "input") (show x) | |
424 | _ <- liftIO $ | |
425 | system | |
426 | ("mediawiki2latex -x " ++ | |
427 | (Hex.hex (show (fullconfigbase{imgctrb = Just tempdir})))) | |
428 | t <- liftIO $ Tools.readFile (tempdir </> "output") | |
429 | return (read t) | |
407 | 430 | |
408 | 431 | imgContribback :: |
409 | (Maybe ImageInfo) -> | |
410 | ImperativeMonad ((Maybe (String, Maybe String))) | |
432 | (Maybe ImageInfo) -> | |
433 | ImperativeMonad ((Maybe (String, Maybe String))) | |
411 | 434 | imgContribback z |
412 | 435 | = do x <- return z |
413 | 436 | xx <- imgContrib2 x |
414 | 437 | liftIO (go xx) |
415 | 438 | where go (Just xxx) = return (Just xxx) |
416 | 439 | go _ = return (Just ("", Nothing)) |
417 | ||
440 | ||
418 | 441 | imgContrib2 :: |
419 | Maybe ImageInfo -> | |
420 | ImperativeMonad ((Maybe (String, Maybe String))) | |
442 | Maybe ImageInfo -> ImperativeMonad ((Maybe (String, Maybe String))) | |
421 | 443 | imgContrib2 (Just x) |
422 | 444 | = do img <- getContributors (contributorUrls x) |
423 | 445 | ffi <- liftIO $ (return . fst) img |
425 | 447 | ssn <- liftIO $ (return . snd) img |
426 | 448 | sns <- liftIO ((return . msum) ssn) |
427 | 449 | liftIO ((fun fif sns)) |
428 | where | |
429 | ffun :: Map String Contributor -> String | |
450 | where ffun :: Map String Contributor -> String | |
430 | 451 | ffun i = intercalate ", " (keys (i)) |
431 | ||
452 | ||
432 | 453 | fun :: String -> Maybe String -> IO (Maybe (String, Maybe String)) |
433 | 454 | fun fi sn = return (Just (fi, sn)) |
434 | 455 | imgContrib2 _ = do liftIO (return Nothing) |
435 | ||
436 | makeImgList :: | |
437 | [(Maybe ImageInfo)] -> | |
438 | ImperativeMonad String | |
456 | ||
457 | makeImgList :: [(Maybe ImageInfo)] -> ImperativeMonad String | |
439 | 458 | makeImgList imgs2 |
440 | 459 | = do ccontrib <- mapM (imgContrib) imgs2 |
441 | 460 | cccontrib <- liftIO (mapM (return . id) ccontrib) |
446 | 465 | return ((toString latexPreamble) ++ z ++ (latexPostamble)) |
447 | 466 | where go ((i, Just (con, lic)), Just info) |
448 | 467 | = "\\href{" ++ |
449 | (replace2 (replace2 (concat (map chartransforlink (exportURL (descriptionUrl info)))) "//" "/") | |
468 | (replace2 | |
469 | (replace2 | |
470 | (concat (map chartransforlink (exportURL (descriptionUrl info)))) | |
471 | "//" | |
472 | "/") | |
450 | 473 | "https:/" |
451 | 474 | "https://") |
452 | 475 | ++ |
454 | 477 | (show i) ++ |
455 | 478 | "}& " ++ con ++ "&" ++ (fromMaybe "" lic) ++ "\\\\ \\hline \n" |
456 | 479 | go (((i, _), _)) = (show i) ++ "&&\\\\ \\hline \n" |
457 | ||
458 | ||
459 | makeImgListHTML :: | |
460 | [(Maybe ImageInfo)] -> | |
461 | ImperativeMonad String | |
480 | ||
481 | makeImgListHTML :: [(Maybe ImageInfo)] -> ImperativeMonad String | |
462 | 482 | makeImgListHTML imgs2 |
463 | 483 | = do ccontrib <- mapM (imgContrib) imgs2 |
464 | 484 | cccontrib <- liftIO (mapM (return . id) ccontrib) |
466 | 486 | imgs <- liftIO (return imgs2) |
467 | 487 | let z = concat |
468 | 488 | (map go (zip (zip ([1 ..] :: [Integer]) contrib) imgs)) |
469 | return ("<table rules=\"all\"><tr><td>Number</td><td>Contributors</td><td>License</td></tr>" ++ z ++ "</table>") | |
489 | return | |
490 | ("<table rules=\"all\"><tr><td>Number</td><td>Contributors</td><td>License</td></tr>" | |
491 | ++ z ++ "</table>") | |
470 | 492 | where go ((i, Just (con, lic)), Just info) |
471 | 493 | = "<tr><td><a href=\"" ++ |
472 | 494 | (replace2 (replace2 (((exportURL (descriptionUrl info)))) "//" "/") |
475 | 497 | ++ |
476 | 498 | "\">" ++ |
477 | 499 | (show i) ++ |
478 | "</a></td><td>" ++ con ++ "</td><td>" ++ (fromMaybe "" lic) ++ "</td></tr>" | |
500 | "</a></td><td>" ++ | |
501 | con ++ "</td><td>" ++ (fromMaybe "" lic) ++ "</td></tr>" | |
479 | 502 | go (((i, _), _)) = (show i) ++ "&&\\\\ \\hline \n" |
480 | 503 | |
481 | ||
482 | makeformulas:: String->String->[Anything Char]->ImperativeMonad [(String,Int)] | |
504 | makeformulas :: | |
505 | String -> | |
506 | String -> [Anything Char] -> ImperativeMonad [(String, Int)] | |
483 | 507 | makeformulas p tempdir ll |
484 | 508 | = do x <- allinfo |
485 | 509 | return $ concat x |
486 | where | |
487 | allinfo :: ImperativeMonad [[(String,Int)]] | |
510 | where allinfo :: ImperativeMonad [[(String, Int)]] | |
488 | 511 | allinfo = mapM processNode ll |
489 | processNode:: Anything Char -> ImperativeMonad [(String,Int)] | |
490 | processNode (Environment Math _ l) = do myname <- return (( (hex( show (hash(mathTransform l)))))) | |
491 | sz<-liftIO (do Tools.writeFile (tempdir </> ( myname++".tex")) ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"++(mathTransform l)++"$\n\\end{document}") | |
492 | Static.writeFiles tempdir headerFiles | |
493 | mysystem ("latex2png " ++ (tempdir </> (myname++".tex"))) | |
494 | sz<-mysize (tempdir </> (myname++".png")) | |
495 | mysystem ("latex2png -c -d 1200 " ++ (tempdir </> (((myname))++".tex"))) | |
496 | print (mathTransform l) | |
497 | return sz) | |
498 | return [(myname++".png",sz)] | |
499 | processNode (Environment BigMath _ l) = do myname <- return (( (hex( show (hash(mathTransform l)))))) | |
500 | sz<-liftIO (do Tools.writeFile (tempdir </> ( myname++".tex")) ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$"++(mathTransform l)++"$\n\\end{document}") | |
501 | Static.writeFiles tempdir headerFiles | |
502 | mysystem ("latex2png " ++ (tempdir </> (myname++".tex"))) | |
503 | sz<-mysize (tempdir </> (myname++".png")) | |
504 | mysystem ("latex2png -c -d 1200 " ++ (tempdir </> (((myname))++".tex"))) | |
505 | print (mathTransform l) | |
506 | return sz) | |
507 | return [(myname++".png",sz)] | |
512 | ||
513 | processNode :: Anything Char -> ImperativeMonad [(String, Int)] | |
514 | processNode (Environment Math _ l) | |
515 | = do myname <- return (((hex (show (hash (mathTransform l)))))) | |
516 | sz <- liftIO | |
517 | (do Tools.writeFile (tempdir </> (myname ++ ".tex")) | |
518 | ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$" | |
519 | ++ (mathTransform l) ++ "$\n\\end{document}") | |
520 | Static.writeFiles tempdir headerFiles | |
521 | mysystem ("latex2png " ++ (tempdir </> (myname ++ ".tex"))) | |
522 | sz <- mysize (tempdir </> (myname ++ ".png")) | |
523 | mysystem | |
524 | ("latex2png -c -d 1200 " ++ (tempdir </> (((myname)) ++ ".tex"))) | |
525 | print (mathTransform l) | |
526 | return sz) | |
527 | return [(myname ++ ".png", sz)] | |
528 | processNode (Environment BigMath _ l) | |
529 | = do myname <- return (((hex (show (hash (mathTransform l)))))) | |
530 | sz <- liftIO | |
531 | (do Tools.writeFile (tempdir </> (myname ++ ".tex")) | |
532 | ("\\documentclass{article}\n\\usepackage{color}\n\\input{./defaultcolors}\n\\usepackage{amsfonts}\n\\usepackage{amsmath}\n\\begin{document}\n\\thispagestyle{empty}\n$" | |
533 | ++ (mathTransform l) ++ "$\n\\end{document}") | |
534 | Static.writeFiles tempdir headerFiles | |
535 | mysystem ("latex2png " ++ (tempdir </> (myname ++ ".tex"))) | |
536 | sz <- mysize (tempdir </> (myname ++ ".png")) | |
537 | mysystem | |
538 | ("latex2png -c -d 1200 " ++ (tempdir </> (((myname)) ++ ".tex"))) | |
539 | print (mathTransform l) | |
540 | return sz) | |
541 | return [(myname ++ ".png", sz)] | |
508 | 542 | processNode (Environment _ _ l) = (makeformulas p tempdir l) |
509 | processNode _ = return [] | |
543 | processNode _ = return [] | |
544 | ||
510 | 545 | mysize :: String -> IO Int |
511 | 546 | mysize fn |
512 | 547 | = do _ <- system |
525 | 560 | (x : _) -> case splitOn " " x of |
526 | 561 | (_ : (_ : (y : _))) -> case splitOn "x" y of |
527 | 562 | (z : _) -> case reads z of |
528 | [(ii, | |
529 | _)] -> do return ii | |
563 | [(ii, _)] -> do return | |
564 | ii | |
530 | 565 | _ -> return 0 |
531 | 566 | _ -> return 0 |
532 | 567 | _ -> return 0 |
533 | 568 | _ -> return 0 |
534 | ||
535 | ||
536 | ||
537 | ||
569 | ||
538 | 570 | jjoin :: String -> String -> String |
539 | 571 | jjoin theBody listOfFiguers |
540 | 572 | = ((toString (latexHeader)) ++ |
541 | 573 | theBody ++ listOfFiguers ++ (toString latexFooter)) |
542 | ||
574 | ||
543 | 575 | all :: FullConfig -> ImperativeMonad () |
544 | 576 | all cfg |
545 | 577 | = do liftIO $ myprint " processing started" |
565 | 597 | liftIO $ myprint " downloading article and contributor information" |
566 | 598 | text <- load (runMode cfg) |
567 | 599 | liftIO $ myprint " parsing article text" |
568 | ||
569 | theFormulas <- if (outputType cfg) `elem` [EPubFile, OdtFile] then makeformulas (mainPath cfg) tempdir (printPrepareTree (parseit (if (runMode cfg)==HTML then minparsers else parsers) text)) else return [] | |
600 | theFormulas <- if (outputType cfg) `elem` [EPubFile, OdtFile] then | |
601 | makeformulas (mainPath cfg) tempdir | |
602 | (printPrepareTree | |
603 | (parseit (if (runMode cfg) == HTML then minparsers else parsers) | |
604 | text)) | |
605 | else return [] | |
570 | 606 | liftIO |
571 | 607 | (myprint |
572 | 608 | (" number of bytes to be parsed: " ++ |
573 | 609 | (show (Data.List.length text)))) |
574 | result <- Compiler.compile (runMode cfg) text templates [] "" Nothing (Map.fromList theFormulas) ((outputType cfg) `elem` [EPubFile, OdtFile]) | |
610 | result <- Compiler.compile (runMode cfg) text templates [] "" | |
611 | Nothing | |
612 | (Map.fromList theFormulas) | |
613 | ((outputType cfg) `elem` [EPubFile, OdtFile]) | |
575 | 614 | liftIO $ |
576 | 615 | myprint |
577 | 616 | " forking threads to download of images and contributor information on them" |
580 | 619 | (" number of images going to be downloaded: " ++ |
581 | 620 | (show (Data.List.length (images result))))) |
582 | 621 | theImages <- getImages tempdir (images result) (wikiUrl purl) |
583 | --let theImages=[] | |
584 | 622 | let joined = jjoin (body result) "" |
585 | 623 | let theConfig |
586 | 624 | = LatexConfig{content = joined, figures = [], |
587 | 625 | All.title = (makeTitle result purl), fullConfig = cfg, |
588 | 626 | All.hostname = (UrlAnalyse.hostname purl), theResult = result, |
589 | onlyTables = True, lang = language, theTempDir = tempdir, formulas=theFormulas, figHTML=""} | |
627 | onlyTables = True, lang = language, theTempDir = tempdir, | |
628 | formulas = theFormulas, figHTML = ""} | |
590 | 629 | liftIO $ myprint " precompiling table columns" |
591 | 630 | let cols = (sum (map Data.List.length (tablelist result))) |
592 | 631 | ior <- liftIO (newIORef (0 :: Integer)) |
610 | 649 | (myprint |
611 | 650 | (" number of bytes to be parsed: " ++ |
612 | 651 | (show (Data.List.length text)))) |
613 | newResult <- Compiler.compile (runMode cfg) text templates tabs (makeTitle2 result purl) language (Map.fromList theFormulas) ((outputType cfg) `elem` [EPubFile, OdtFile]) | |
614 | ||
652 | newResult <- Compiler.compile (runMode cfg) text templates tabs | |
653 | (makeTitle2 result purl) | |
654 | language | |
655 | (Map.fromList theFormulas) | |
656 | ((outputType cfg) `elem` [EPubFile, OdtFile]) | |
615 | 657 | liftIO $ |
616 | 658 | myprint |
617 | 659 | " joining threads to download the images and contributor information on them" |
621 | 663 | (show (Data.List.length (images result))))) |
622 | 664 | pp <- makeImgList theImages |
623 | 665 | pphtml <- makeImgListHTML theImages |
624 | (contrib,contribHTML) <- makeContributors (Just (UrlAnalyse.url purl)) | |
666 | (contrib, contribHTML) <- makeContributors | |
667 | (Just (UrlAnalyse.url purl)) | |
625 | 668 | let newContent = jjoin (body newResult) (contrib ++ pp) |
626 | 669 | thetheImages <- liftIO $ |
627 | 670 | do ii <- return theImages |
629 | 672 | liftIO $ myprint " preparing for PDF generation" |
630 | 673 | pdf <- runLaTeX |
631 | 674 | theConfig{onlyTables = False, theResult = newResult, |
632 | content = newContent, figures = thetheImages, figHTML=pphtml++contribHTML++"</body></html>"} | |
675 | content = newContent, figures = thetheImages, | |
676 | figHTML = pphtml ++ contribHTML ++ "</body></html>"} | |
633 | 677 | liftIO (Data.ByteString.writeFile (outputFilename cfg) pdf) |
634 | 678 | liftIO $ removeDirectoryRecursive tempdir |
635 | 679 | liftIO $ myprint " finished" |
0 | ||
0 | 1 | module Babel where |
1 | 2 | import Static |
2 | 3 | import Data.List.Split (splitOn) |
5 | 6 | import Codec.Binary.UTF8.String |
6 | 7 | import Data.ByteString |
7 | 8 | hiding (take, reverse, dropWhile, takeWhile, drop, map, concat, |
8 | elem, zip, intercalate, init, tails, isPrefixOf, any, length, null, hPutStr) | |
9 | ||
9 | elem, zip, intercalate, init, tails, isPrefixOf, any, length, null, | |
10 | hPutStr) | |
10 | 11 | |
11 | 12 | makeBabel :: Maybe String -> [Char] -> String |
12 | 13 | makeBabel b x |
20 | 21 | = case splitOn "." x of |
21 | 22 | (z : _) -> z |
22 | 23 | _ -> "en" |
23 | ||
24 | ||
25 |
2 | 2 | import Data.Char |
3 | 3 | import Data.Array |
4 | 4 | import Data.Tuple |
5 | ||
5 | ||
6 | 6 | {-DHUN| Basic Fontstyle, may be either normal of monospaced or small caps DHUN-} |
7 | ||
7 | ||
8 | 8 | data FontStyleBase = Normal |
9 | 9 | | Mono |
10 | 10 | | Smallcaps |
11 | 11 | deriving (Eq, Ord, Show) |
12 | ||
12 | ||
13 | 13 | {-DHUN| Full discription of style of a font. Consists of Basic Fontstyle plus boolean for bold and/or italic DHUN-} |
14 | ||
14 | ||
15 | 15 | data FontStyle = FontStyle{stylebase :: FontStyleBase, |
16 | 16 | bold :: Bool, italic :: Bool} |
17 | 17 | deriving (Eq, Ord, Show) |
18 | ||
18 | ||
19 | 19 | {-DHUN| Font, a list of ttf font file currently used by mediawiki2latex DHUN-} |
20 | ||
20 | ||
21 | 21 | data Font = GnuUnifont |
22 | 22 | | WenQuanYiZenHei |
23 | 23 | | FreeMono |
37 | 37 | | ComputerModernRomanItalic |
38 | 38 | | ComputerModernRomanBoldItalic |
39 | 39 | deriving (Eq, Ord, Show, Ix) |
40 | ||
40 | ||
41 | 41 | {-DHUN| list of Fonts. The first element in the list is the fonts that is preferred to be used. If it can not be used for some reason the next font in the list is used. Note that the list is denoted in reverse order and put into the right order by the reverse function in the way it is just written at this point of the source file DHUN-} |
42 | ||
42 | ||
43 | 43 | fonts :: [Font] |
44 | 44 | fonts |
45 | 45 | = reverse |
50 | 50 | ComputerModernTypeWriterBoldItalic, ComputerModernRoman, |
51 | 51 | ComputerModernRomanBold, ComputerModernRomanItalic, |
52 | 52 | ComputerModernRomanBoldItalic] |
53 | ||
53 | ||
54 | 54 | {-DHUN| enumation of Fonts. To store the Fonts database efficiently on disc and to read it to memory quickly interger indices are used. The Integers are low in magnitude and thus stored as chars on disc. DHUN-} |
55 | ||
55 | ||
56 | 56 | fontList :: [(Font, Int)] |
57 | 57 | fontList = zip fonts [(ord 'A') ..] |
58 | ||
58 | ||
59 | 59 | {-DHUN| converts font to char. See also fontList in this source file DHUN-} |
60 | ||
60 | ||
61 | 61 | fromFontToChar :: Font -> Char |
62 | 62 | fromFontToChar f |
63 | 63 | = chr |
64 | 64 | ((array (GnuUnifont, ComputerModernRomanBoldItalic) fontList) ! f) |
65 | ||
65 | ||
66 | 66 | {-DHUN| converts from char to font. See also fontList in this source file DHUN-} |
67 | ||
67 | ||
68 | 68 | fromCharToFont :: Char -> Font |
69 | 69 | fromCharToFont c |
70 | 70 | = (array (ord ('A'), ord ('A') + (length fontList) - 1) |
71 | 71 | (map swap fontList)) |
72 | 72 | ! (ord c) |
73 | ||
73 | ||
74 | 74 | {-DHUN| converts from Fonts to path of ttf file on disc. DHUN-} |
75 | ||
75 | ||
76 | 76 | getttf :: Font -> [Char] |
77 | 77 | getttf ComputerModernTypeWriter |
78 | 78 | = "/usr/share/fonts/truetype/cmu/cmuntt.ttf" |
108 | 108 | getttf GnuUnifont = "/usr/share/fonts/truetype/unifont/unifont.ttf" |
109 | 109 | getttf WenQuanYiZenHei |
110 | 110 | = "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc" |
111 | ||
111 | ||
112 | 112 | {-DHUN| defines the FontStyle for each ttf font file. See also FontStyle in this source file. DHUN-} |
113 | ||
113 | ||
114 | 114 | getstyle :: Font -> FontStyle |
115 | 115 | getstyle GnuUnifont |
116 | 116 | = FontStyle{stylebase = Normal, bold = False, italic = False} |
15 | 15 | import Data.ByteString |
16 | 16 | hiding (take, reverse, dropWhile, takeWhile, drop, map, concat, |
17 | 17 | elem, length, zip, head, filter, minimum, isInfixOf) |
18 | ||
19 | 18 | |
20 | ||
21 | runCompile ::String -> ImperativeMonad () | |
22 | runCompile dir = do t<-liftIO $ Tools.readFile (dir </> "input") | |
23 | cr<-return (printPrepareTree (parseit minparsers t)) | |
24 | liftIO $ Tools.writeFile (dir </> "output") (show cr) | |
25 | ||
26 | ||
19 | runCompile :: String -> ImperativeMonad () | |
20 | runCompile dir | |
21 | = do t <- liftIO $ Tools.readFile (dir </> "input") | |
22 | cr <- return (printPrepareTree (parseit minparsers t)) | |
23 | liftIO $ Tools.writeFile (dir </> "output") (show cr) | |
27 | 24 | |
28 | 25 | {-DHUN| main function to compile mediawiki pages |DHUN-} |
29 | ||
26 | ||
30 | 27 | compile :: |
31 | 28 | RunMode -> |
32 | String -> String -> [[ByteString]] -> String -> Maybe String -> Map.Map String Int -> Bool ->ImperativeMonad CompileResult | |
33 | compile theRunMode text templates tabs mytitle mylanguage formulas b | |
29 | String -> | |
30 | String -> | |
31 | [[ByteString]] -> | |
32 | String -> | |
33 | Maybe String -> | |
34 | Map.Map String Int -> Bool -> ImperativeMonad CompileResult | |
35 | compile theRunMode text templates tabs mytitle mylanguage formulas | |
36 | b | |
34 | 37 | = do st <- get |
35 | 38 | case theRunMode of |
36 | 39 | StandardTemplates -> return |
37 | (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates | |
38 | tabs formulas) | |
40 | (run b mylanguage mytitle (parseit parsers text) | |
41 | (parseit parsers text) | |
42 | (hostname . fullUrl $ st) | |
43 | templates | |
44 | tabs | |
45 | formulas) | |
39 | 46 | UserTemplateFile _ -> return |
40 | (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates | |
41 | tabs formulas) | |
47 | (run b mylanguage mytitle (parseit parsers text) | |
48 | (parseit parsers text) | |
49 | (hostname . fullUrl $ st) | |
50 | templates | |
51 | tabs | |
52 | formulas) | |
42 | 53 | HTML -> return |
43 | (run b mylanguage mytitle (printPrepareTree (parseit minparsers text)) (printPrepareTree (parseit minparsers text)) | |
54 | (run b mylanguage mytitle | |
55 | (printPrepareTree (parseit minparsers text)) | |
56 | (printPrepareTree (parseit minparsers text)) | |
44 | 57 | (hostname . fullUrl $ st) |
45 | 58 | templates |
46 | tabs formulas) | |
47 | Book -> do | |
48 | return | |
49 | (run b mylanguage mytitle (loadacu st) (loadacu st) | |
50 | (hostname . fullUrl $ st) | |
51 | templates | |
52 | tabs formulas) | |
53 | ExpandedTemplates -> do return | |
54 | (run b mylanguage mytitle (parseit parsers text) (parseit parsers text) (hostname . fullUrl $ st) templates | |
55 | tabs formulas) | |
56 | ||
59 | tabs | |
60 | formulas) | |
61 | Book -> do return | |
62 | (run b mylanguage mytitle (loadacu st) (loadacu st) | |
63 | (hostname . fullUrl $ st) | |
64 | templates | |
65 | tabs | |
66 | formulas) | |
67 | ExpandedTemplates -> do return | |
68 | (run b mylanguage mytitle (parseit parsers text) | |
69 | (parseit parsers text) | |
70 | (hostname . fullUrl $ st) | |
71 | templates | |
72 | tabs | |
73 | formulas) | |
74 | ||
57 | 75 | {-DHUN| pathname of the temporary directory of the compiler |DHUN-} |
58 | ||
76 | ||
59 | 77 | dirpref :: [Char] |
60 | 78 | dirpref = "../tmp/compiler/" |
61 | ||
79 | ||
62 | 80 | {-DHUN| converts a wiki source document received from mediawiki when requesting it for Special:Export to a parse tree to be converted to LaTeX be treeToLaTeX3. It also signals to compiler.py that the source code was read using the temporary compiler directory |DHUN-} |
63 | ||
81 | ||
64 | 82 | shortparse :: String -> IO [Anything Char] |
65 | 83 | shortparse x |
66 | 84 | = do Tools.writeFile (dirpref ++ "done") "" |
67 | 85 | return (parseit parsers x) |
68 | ||
69 | ||
70 | ||
71 | ||
86 | ||
72 | 87 | {-DHUN| return a parse tree of a source file. The first argument is the source file. The second argument is a list of command line parameter. If it contains the keyword print. The source file is understood to be the HTML code returned by mediawiki when being requested for the print version of a wiki page, otherwise it is understood to be the wiki source code of a wiki page, that is what you get when issuing a Special:Export request to mediawiki. This function return a parse tree ready to be turned into a latex document by treeToLaTeX3 |DHUN-} |
73 | ||
88 | ||
74 | 89 | getparse :: String -> [String] -> IO [Anything Char] |
75 | 90 | getparse x args |
76 | 91 | = if ("print" `elem` args) then printparse x else shortparse x |
77 | ||
92 | ||
78 | 93 | {-DHUN| prepares a HTML document received from mediawiki when requesting it for the print version of a wiki page to a parse tree to be converted to LaTeX be treeToLaTeX3. It also signals to compiler.py that the source code was read using the temporary compiler directory |DHUN-} |
79 | ||
94 | ||
80 | 95 | printparse :: String -> IO [Anything Char] |
81 | 96 | printparse x |
82 | 97 | = do Tools.writeFile (dirpref ++ "done") "" |
83 | 98 | return (printPrepareTree (parseit minparsers x)) |
84 | ||
85 | ||
86 | ||
99 | ||
87 | 100 | {-DHUN| the pathname of the temporary directory |DHUN-} |
88 | ||
101 | ||
89 | 102 | tmppath :: [Char] |
90 | 103 | tmppath = "../tmp/" |
91 | ||
104 | ||
92 | 105 | {-DHUN| the function takes a list of the following format as first input [([table1,column1],width_x),([table1,column2],width_y),...,([table2,column1],width_z),...] parameter. Thats is a list containing the maximum width for each column of each table of the document. The maximum width of the column is the width that the column could have if it was printed on paper of infinite size. So the size without line breaks. The second parameter is the accumulator which should be the empty map when calling this function from outside. The function returns a map mapping tablenumber to a map mapping columnnumbers to maximum columns width. This data structure contains all information needed to make the decisions on the final width of the columns in the document |DHUN-} |
93 | ||
106 | ||
94 | 107 | maketabmap :: |
95 | 108 | [([Int], Double)] -> |
96 | 109 | Map.Map Int (Map.Map Int Double) -> |
104 | 117 | Nothing -> Just (Map.singleton s1 b1) |
105 | 118 | Just m1 -> Just (Map.insert s1 b1 m1) |
106 | 119 | maketabmap _ m = m |
107 | ||
120 | ||
108 | 121 | {-DHUN| prepare the result of maketabmap for further procession. Some indices are offset corrected. Space for the rules of the table is added to the width of the columns DHUN-} |
109 | ||
122 | ||
110 | 123 | postproctabmap :: |
111 | 124 | (Fractional a, Num k1, Ord k1, Ord a) => |
112 | 125 | Map.Map k (Map.Map k1 a) -> Map.Map k (Map.Map k1 a) |
115 | 128 | = Map.delete 0 |
116 | 129 | (Map.mapKeys (\ k -> k - 1) |
117 | 130 | (Map.map (\ x -> (x + 12.333748 - (minimum (Map.elems m1)))) m1)) |
118 | ||
131 | ||
119 | 132 | {-DHUN| datatype for the results of a compulation process. The field images contains the strings enlclose in double square brackes in the wiki used for image inclusion. The field body contains the body of the latex document compiled form the wiki documents. The field tablelist contains a list of lists of bodies of latex document containing the latex source for a single column. Those can be compiled with the propper headers and footers on arbitrary huge paper to determine the maximum reasonable width for a each column in each table of the document which is need for automatic calculation of column widths for the document. The field gallery numbers contain the image numbers of images include in the wiki source inside of galleries. These got a smaller dimension in cm in the final pdf document and can thus be dithers to a small width in pixels. The field title contains the title of the document if a template defining the title was part of the parse wiki source |DHUN-} |
120 | ||
133 | ||
121 | 134 | data CompileResult = CompileResult{images :: [String], |
122 | 135 | body :: String, tablelist :: [[String]], |
123 | galleryNumbers :: [Integer], title :: String, html::String} | |
124 | deriving Show | |
136 | galleryNumbers :: [Integer], title :: String, html :: String} | |
137 | deriving Show | |
138 | ||
125 | 139 | {-DHUN| the first parameter is the parse tree created by get parse of the document currently being processed. the second parameter is the URL under which the document was downloaded. the third parameter is the netloc describing the wiki this page belongs to. The fourth parameter is a mapping file defined by the user for the mapping of mediawiki templates to latex commands. the fifth parameter is a possible parse tree created by precious run the the should be added before the begging of the newly created parse tree. This function writes out all results to temporary files that will be further processed by compiler.py DHUN-} |
126 | 140 | |
127 | 141 | run :: |
128 | Bool -> Maybe String -> String -> [Anything Char] -> [Anything Char] -> | |
129 | String -> String -> [[ByteString]] -> Map.Map String Int-> CompileResult | |
130 | run bb mylanguage mytitle parsetree parsetree2 netloc tmpl someTables formulas | |
142 | Bool -> | |
143 | Maybe String -> | |
144 | String -> | |
145 | [Anything Char] -> | |
146 | [Anything Char] -> | |
147 | String -> | |
148 | String -> [[ByteString]] -> Map.Map String Int -> CompileResult | |
149 | run bb mylanguage mytitle parsetree parsetree2 netloc tmpl | |
150 | someTables formulas | |
131 | 151 | = CompileResult{images = img, body = bdy, tablelist = theTables, |
132 | galleryNumbers = gals, title = tit, html=if bb then trda3 else []} | |
152 | galleryNumbers = gals, title = tit, | |
153 | html = if bb then trda3 else []} | |
133 | 154 | where alldata2 g u |
134 | 155 | = (treeToLaTeX3 ((snd . newtree $ g)) |
135 | 156 | initialState{urld = analyseNetloc netloc}{tabmap = u, |
161 | 182 | bdy = doUnicode trda |
162 | 183 | gals = getGalleryNumbers trst |
163 | 184 | tit = getTitle trst |
164 | ||
185 | ||
165 | 186 | fun :: ByteString -> Double |
166 | 187 | fun x |
167 | 188 | = case reads (toString x) of |
6 | 6 | import Data.Map.Strict hiding ((!)) |
7 | 7 | import Data.Maybe |
8 | 8 | import System.Info |
9 | ||
9 | ||
10 | 10 | {-DHUN| a map taking a FontStyle as key. (see FontStyle in the BaseFont module). The Values of a maps is and array. The array has got an integer index covering 16 bit. The elements of the arrows are chars. These chars can be converted to the paths of ttf files on disc. (see functions fromCharToFont and getttf in the module BaseFont on how to converts the char to a path to a ttf file on disc). The idea is that you got a 16 Bit unicode charter with certain font style properties. It put in this information in the this map and the array you get from it you get the idea ttf file to print that character in that fontstyle in a latex document DHUN-} |
11 | ||
11 | ||
12 | 12 | megafont2 :: Map FontStyle (Array Int Char) |
13 | 13 | megafont2 |
14 | 14 | = Data.Map.Strict.fromList |
16 | 16 | array ((0, (2 :: Int) ^ (16 :: Int) - 1) :: (Int, Int)) |
17 | 17 | (zip ([0 .. (2 :: Int) ^ (16 :: Int) - 1] :: [Int]) f)) |
18 | 18 | | (s, f) <- megafont] |
19 | ||
19 | ||
20 | 20 | {-DHUN| takes a fontstyle and a 16 bis unicode charater and give you the ideal font to print this character in a LaTeX docuemnt. See also megafont2 in this module DHUN-} |
21 | ||
21 | ||
22 | 22 | getFont :: FontStyle -> Char -> Font |
23 | 23 | getFont fontStyle c |
24 | 24 | = fromMaybe GnuUnifont |
25 | 25 | ((Data.Map.Strict.lookup fontStyle megafont2) >>= |
26 | return . fromCharToFont . (! (if (ord c) >65535 then ord (' ') else (ord c)))) | |
27 | ||
26 | return . | |
27 | fromCharToFont . | |
28 | (! (if (ord c) > 65535 then ord (' ') else (ord c)))) | |
29 | ||
28 | 30 | {-DHUN| Takes a font and returns the LaTeX Command to switch to this particular font in xelatex DHUN-} |
29 | ||
31 | ||
30 | 32 | fontsetter :: Font -> [Char] |
31 | 33 | fontsetter f |
32 | = "\\setmainfont" ++ inner ++ "\\setmonofont" ++ innermono | |
34 | = "\\setmainfont" ++ inner ++ "\\setmonofont" ++ innermono | |
33 | 35 | where filename |
34 | 36 | = reverse ((takeWhile (/= '/')) (reverse (getttf f))) |
35 | 37 | pathname = reverse ((dropWhile (/= '/')) (reverse (getttf f))) |
36 | inner = "{" ++ filename ++ "}"++"[" ++ (if os=="linux" then "Path=" ++ pathname else "") ++ (mid f) ++ "]" | |
38 | inner | |
39 | = "{" ++ | |
40 | filename ++ | |
41 | "}" ++ | |
42 | "[" ++ | |
43 | (if os == "linux" then "Path=" ++ pathname else "") ++ | |
44 | (mid f) ++ "]" | |
37 | 45 | innermono |
38 | = "{" ++ filename ++ "}"++"[" ++(if os=="linux" then "Path=" ++ pathname else "")++ (midmono f) ++ "]" | |
46 | = "{" ++ | |
47 | filename ++ | |
48 | "}" ++ | |
49 | "[" ++ | |
50 | (if os == "linux" then "Path=" ++ pathname else "") ++ | |
51 | (midmono f) ++ "]" | |
39 | 52 | mid i |
40 | 53 | | i `elem` |
41 | 54 | [ComputerModernRoman, ComputerModernRomanBold, |
70 | 83 | ",UprightFont=FreeMono,BoldFont=FreeMonoBold," ++ |
71 | 84 | "ItalicFont=FreeMonoOblique,BoldItalicFont=FreeMonoBoldOblique" |
72 | 85 | midmono _ = "" |
73 | ||
86 | ||
74 | 87 | {-DHUN| Takes a FontStyle and returns the LaTeX Command to switch to that font. DHUN-} |
75 | ||
88 | ||
76 | 89 | fontstyler :: FontStyle -> [Char] |
77 | 90 | fontstyler s |
78 | 91 | = (if (stylebase s) == Mono then "\\ttfamily " else "") ++ |
11 | 11 | import Data.List |
12 | 12 | import Tools |
13 | 13 | import System.FilePath |
14 | ||
14 | ||
15 | 15 | modpath2 :: String -> URL -> URL |
16 | 16 | modpath2 s u |
17 | 17 | = u{url_path = |
20 | 20 | p = case reverse pp of |
21 | 21 | ('/' : xs) -> (reverse xs) |
22 | 22 | xs -> (reverse xs) |
23 | ||
23 | ||
24 | 24 | conv :: URL -> String -> String |
25 | 25 | conv u s |
26 | 26 | = if take 5 s == "http:" then s else |
34 | 34 | _ -> s}) |
35 | 35 | "%25" |
36 | 36 | "%" |
37 | ||
37 | ||
38 | 38 | getImageUrl2 :: (String, URL) -> Maybe String |
39 | 39 | getImageUrl2 (s, u) |
40 | 40 | = (getImageUrl "fullImageLink" u s) `mplus` |
41 | 41 | (getImageUrl "fullMedia" u s) |
42 | ||
42 | ||
43 | 43 | getImageUrl3 :: String -> Maybe String |
44 | 44 | getImageUrl3 s = return s |
45 | ||
45 | ||
46 | 46 | getImageUrl :: String -> URL -> String -> Maybe String |
47 | 47 | getImageUrl fi u ss |
48 | 48 | = if isInfixOf fil s then |
61 | 61 | theHref = BStr.unpack (UTF8Str.fromString "href=\"") |
62 | 62 | q = BStr.unpack (UTF8Str.fromString "\"") |
63 | 63 | |
64 | {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes the WikiUrl of the wiki website currently being processed as second parameter. It takes a tuple as third input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It returns a tuple. The first element of the tuple is a list of urls under which the image may be found on the wiki. The second element of the tuple is the image number as described above. The third element of the tuple is the Url where the description page of the image on the wiki is located DHUN-} | |
64 | 65 | |
65 | {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes the WikiUrl of the wiki websitze currently being processed as second parameter. It takes a tuple as third input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It returns a tuple. The first element of the tuple is a list of urls under which the image may be found on the wiki. The second element of the tuple is the image number as described above. The third element of the tuple is the Url where the description page of the image on the wiki is located DHUN-} | |
66 | ||
67 | 66 | getImagePage :: |
68 | 67 | String -> |
69 | 68 | WikiUrl -> (Integer, String) -> IO (Maybe ([String], Integer, URL)) |
79 | 78 | Just (du, x) -> do img <- (geturl2 x) :: (IO BStr.ByteString) |
80 | 79 | BStr.writeFile (dir </> (show i)) img |
81 | 80 | return |
82 | (Just (map (unify . exportURL . (modpath2 ss)) (parses u), i, modpath2 ss du)) | |
81 | (Just | |
82 | (map (unify . exportURL . (modpath2 ss)) (parses u), i, | |
83 | modpath2 ss du)) | |
83 | 84 | _ -> return Nothing |
84 | where | |
85 | go :: (URL, Maybe String) -> [(URL, String)] | |
85 | where go :: (URL, Maybe String) -> [(URL, String)] | |
86 | 86 | go (uu, Just x) = [(uu, x)] |
87 | 87 | go _ = [] |
88 | ||
89 | 88 | |
90 | 89 | {-DHUN| downloads a single image form the wiki. It takes the temporary image download directory as first parameter. It takes a tuple as second input parameter. The first element of the tuple is the image number so just an integer that can be used to identify the image uniquely) . The second element of the tupele is image include string of the image from the wiki source, that is the text in between the square brackets as second input parameter. It takes the WikiUrl of the wiki websitze currently being processed as thrird parameter. See function getImages in this module for documentation on the returned data type DHUN-} |
91 | 90 | |
92 | 91 | doImage :: |
93 | String -> | |
94 | WikiUrl -> | |
95 | (Integer, String) -> IO (Maybe ImageInfo) | |
92 | String -> WikiUrl -> (Integer, String) -> IO (Maybe ImageInfo) | |
96 | 93 | doImage dir theWikiUrl img |
97 | 94 | = do myprint (show img) |
98 | 95 | p <- getImagePage dir theWikiUrl (fst img, theName) |
99 | 96 | case p of |
100 | Just (u, pp, du) -> return (Just ImageInfo{wikiFilename=theName,imageNumber=pp,contributorUrls=u,descriptionUrl=du}) | |
97 | Just (u, pp, du) -> return | |
98 | (Just | |
99 | ImageInfo{wikiFilename = theName, imageNumber = pp, | |
100 | contributorUrls = u, descriptionUrl = du}) | |
101 | 101 | _ -> return Nothing |
102 | 102 | where theName |
103 | 103 | = case dropWhile (/= ':') (takeWhile (/= '|') (snd img)) of |
104 | 104 | (_ : xs) -> replace2 xs "%" "%25" |
105 | 105 | _ -> [] |
106 | ||
106 | ||
107 | 107 | {-DHUN| main function to download images. It takes the temporary image download directory as first parameter. It takes image include strings of the images from the wiki source, that is the text in between the square brackets as second input parameter. It takes the WikiUrl of the wiki websitze currently being processed as thrird parameter. This function runs as a background process. So it returns a list of empty MVars immediately when being called. They are later on filled with information on the downloaded images including their location in the temporary image download directory. The returend MVars contain ImageInfo values. See description in the module ImperativeState for a detailed description. DHUN-} |
108 | ||
108 | ||
109 | 109 | getImages :: |
110 | String -> | |
111 | [String] -> | |
112 | WikiUrl -> | |
113 | ImperativeMonad [Maybe ImageInfo] | |
110 | String -> [String] -> WikiUrl -> ImperativeMonad [Maybe ImageInfo] | |
114 | 111 | getImages dir images theWikiUrl |
115 | 112 | = do liftIO $ |
116 | 113 | do let ddir = dir |
0 | ||
0 | 1 | module Grammatik where |
1 | data Plingular = Singular | Plural | |
2 | deriving Show | |
3 | data Casus = Nominativ | Dativ | Genitiv | Akkusativ | |
4 | deriving Show | |
5 | data Person = Ich | Du | ErSieEs | |
6 | deriving Show | |
7 | data Verbform = VerbformPerson Person | Paeteritum | Partizip2 | Lonjunktiv | Imperativ Plingular | Grundform | |
8 | deriving Show | |
9 | data Adjektivfrom = Positiv | Koperativ | Superlativ | |
10 | deriving Show | |
11 | data Genus = Maskulin | Feminin | Neutrum | |
12 | deriving Show | |
13 | data GenusArticelis = MaskulinA | FemininA | NeutrumA | PluralA | |
14 | deriving Show | |
15 | data Bestimmung = Demonstativ | Definit | Unbestimmt | |
16 | deriving Show | |
17 | data Wort = Nomen String Genus Casus Plingular | Verb String Verbform | Adjektiv Adjektivfrom | Artikel Bestimmung GenusArticelis Casus | |
18 | deriving Show | |
19 | 2 | |
20 | data SatzTeil = Unbekannt [Wort] | Subjekt Wort | VerbS Wort | Objekt Wort | |
21 | deriving Show | |
3 | data Plingular = Singular | |
4 | | Plural | |
5 | deriving Show | |
6 | ||
7 | data Casus = Nominativ | |
8 | | Dativ | |
9 | | Genitiv | |
10 | | Akkusativ | |
11 | deriving Show | |
12 | ||
13 | data Person = Ich | |
14 | | Du | |
15 | | ErSieEs | |
16 | deriving Show | |
17 | ||
18 | data Verbform = VerbformPerson Person | |
19 | | Paeteritum | |
20 | | Partizip2 | |
21 | | Lonjunktiv | |
22 | | Imperativ Plingular | |
23 | | Grundform | |
24 | deriving Show | |
25 | ||
26 | data Adjektivfrom = Positiv | |
27 | | Koperativ | |
28 | | Superlativ | |
29 | deriving Show | |
30 | ||
31 | data Genus = Maskulin | |
32 | | Feminin | |
33 | | Neutrum | |
34 | deriving Show | |
35 | ||
36 | data GenusArticelis = MaskulinA | |
37 | | FemininA | |
38 | | NeutrumA | |
39 | | PluralA | |
40 | deriving Show | |
41 | ||
42 | data Bestimmung = Demonstativ | |
43 | | Definit | |
44 | | Unbestimmt | |
45 | deriving Show | |
46 | ||
47 | data Wort = Nomen String Genus Casus Plingular | |
48 | | Verb String Verbform | |
49 | | Adjektiv Adjektivfrom | |
50 | | Artikel Bestimmung GenusArticelis Casus | |
51 | deriving Show | |
52 | ||
53 | data SatzTeil = Unbekannt [Wort] | |
54 | | Subjekt Wort | |
55 | | VerbS Wort | |
56 | | Objekt Wort | |
57 | deriving Show |
2 | 2 | import Data.Char |
3 | 3 | import Data.Map.Strict hiding (map) |
4 | 4 | import Data.Maybe |
5 | ||
5 | ||
6 | 6 | {-DHUN| list of integer in the range from 0 to 15. So one hex digit DHUN-} |
7 | ||
7 | ||
8 | 8 | nums :: [Int] |
9 | 9 | nums = [0 .. 15] |
10 | ||
10 | ||
11 | 11 | {-DHUN| list of single digit hex numbers in ascending order DHUN-} |
12 | ||
12 | ||
13 | 13 | chars :: [Char] |
14 | 14 | chars = (['0' .. '9'] ++ ['A' .. 'F']) |
15 | ||
15 | ||
16 | 16 | {-DHUN| map from integer to hex digit DHUN-} |
17 | ||
17 | ||
18 | 18 | fromm :: Map Int Char |
19 | 19 | fromm = fromList $ zip nums chars |
20 | ||
20 | ||
21 | 21 | {-DHUN| map from hex digit to integer DHUN-} |
22 | ||
22 | ||
23 | 23 | tom :: Map Char Int |
24 | 24 | tom = fromList $ zip chars nums |
25 | ||
25 | ||
26 | 26 | {-DHUN| function to convert a single unicode character (Char) to a hex encodes string DHUN-} |
27 | ||
27 | ||
28 | 28 | hexChar :: Char -> String |
29 | 29 | hexChar c |
30 | 30 | = concat |
37 | 37 | fromm) |
38 | 38 | >>= (\ x -> return [x]))) |
39 | 39 | (reverse [0 .. 7] :: [Int])) |
40 | ||
40 | ||
41 | 41 | {-DHUN| function to convert a string of unicode characters to a hex encoded version of it DHUN-} |
42 | ||
42 | ||
43 | 43 | hex :: String -> String |
44 | 44 | hex s = concat (map hexChar s) |
45 | ||
45 | ||
46 | 46 | {-DHUN| function to decode a hex encoded unicode string DHUN-} |
47 | ||
47 | ||
48 | 48 | unhex :: String -> String |
49 | 49 | unhex (a : (b : (c : (d : (e : (f : (g : (h : xs)))))))) |
50 | 50 | = (chr |
51 | 51 | (sum |
52 | 52 | (map |
53 | (\ (i, cc) -> (16 ^ i) * (fromMaybe 0 (Data.Map.Strict.lookup cc tom))) | |
53 | (\ (i, cc) -> | |
54 | (16 ^ i) * (fromMaybe 0 (Data.Map.Strict.lookup cc tom))) | |
54 | 55 | (zip (reverse [0 .. 7] :: [Int]) [a, b, c, d, e, f, g, h])))) |
55 | 56 | : (unhex xs) |
56 | 57 | unhex _ = [] |
0 | ||
0 | 1 | module HtmlRenderer where |
1 | 2 | import MediaWikiParseTree |
2 | 3 | import MyState |
3 | 4 | import qualified Data.Map.Strict as Map |
4 | 5 | import Data.Map.Strict (Map) |
5 | import Control.Monad.Trans.State | |
6 | (State, state, runState, put, get) | |
6 | import Control.Monad.Trans.State (State, state, runState, put, get) | |
7 | 7 | import LatexRenderer |
8 | 8 | import WikiHelper |
9 | 9 | import Tools |
18 | 18 | import Data.Tuple |
19 | 19 | import Data.Hashable |
20 | 20 | import Hex |
21 | ||
21 | 22 | type HtmlRenderer = State MyState |
22 | 23 | |
23 | 24 | templateToHtml :: [Anything Char] -> String -> Renderer String |
26 | 27 | \ st -> swap $ templateHtmlProcessor st (prepateTemplate l s) |
27 | 28 | |
28 | 29 | templateHtmlProcessor :: |
29 | MyState -> | |
30 | (String, Map String [Anything Char]) -> (MyState, String) | |
31 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Warnung", ll) | |
30 | MyState -> | |
31 | (String, Map String [Anything Char]) -> (MyState, String) | |
32 | templateHtmlProcessor st | |
33 | ("Mathe f\252r Nicht-Freaks: Vorlage:Warnung", ll) | |
32 | 34 | = (st, |
33 | 35 | "<b>Warnung</b><br/>" ++ |
34 | (treeToHtml (Map.findWithDefault [] "1" ll) st)++"") | |
35 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Hinweis", ll) | |
36 | (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "") | |
37 | templateHtmlProcessor st | |
38 | ("Mathe f\252r Nicht-Freaks: Vorlage:Hinweis", ll) | |
36 | 39 | = (st, |
37 | 40 | "<b>Hinweis</b><br/>" ++ |
38 | (treeToHtml (Map.findWithDefault [] "1" ll) st)++"") | |
39 | ||
40 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Beispiel", ll) | |
41 | (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "") | |
42 | templateHtmlProcessor st | |
43 | ("Mathe f\252r Nicht-Freaks: Vorlage:Beispiel", ll) | |
41 | 44 | = (st, |
42 | 45 | "<b>Beispiel</b><br/>" ++ |
43 | (treeToHtml (Map.findWithDefault [] "beispiel" ll) st)++"") | |
44 | ||
45 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Satz", ll) | |
46 | (treeToHtml (Map.findWithDefault [] "beispiel" ll) st) ++ "") | |
47 | templateHtmlProcessor st | |
48 | ("Mathe f\252r Nicht-Freaks: Vorlage:Satz", ll) | |
46 | 49 | = (st, |
47 | 50 | "<b>Satz</b><br/>" ++ |
48 | (treeToHtml (Map.findWithDefault [] "satz" ll) st)++"") | |
49 | ||
50 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Lösungsweg", ll) | |
51 | (treeToHtml (Map.findWithDefault [] "satz" ll) st) ++ "") | |
52 | templateHtmlProcessor st | |
53 | ("Mathe f\252r Nicht-Freaks: Vorlage:L\246sungsweg", ll) | |
51 | 54 | = (st, |
52 | 55 | "<b>Wie kommt man auf den Beweis?</b>" ++ |
53 | (treeToHtml (Map.findWithDefault [] "lösungsweg" ll) st)++"") | |
54 | ||
55 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Beweis", ll) | |
56 | (treeToHtml (Map.findWithDefault [] "l\246sungsweg" ll) st) ++ "") | |
57 | templateHtmlProcessor st | |
58 | ("Mathe f\252r Nicht-Freaks: Vorlage:Beweis", ll) | |
56 | 59 | = (st, |
57 | 60 | "<b>Beweis: </b><br/>" ++ |
58 | (treeToHtml (Map.findWithDefault [] "beweis" ll) st)++"") | |
59 | ||
60 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Definition", ll) | |
61 | (treeToHtml (Map.findWithDefault [] "beweis" ll) st) ++ "") | |
62 | templateHtmlProcessor st | |
63 | ("Mathe f\252r Nicht-Freaks: Vorlage:Definition", ll) | |
61 | 64 | = (st, |
62 | 65 | "<b>Definition: </b><i>(" ++ |
63 | (treeToHtml (Map.findWithDefault [] "titel" ll) st)++")</i><br/>" | |
64 | ++ (treeToHtml (Map.findWithDefault [] "definition" ll) st)) | |
65 | templateHtmlProcessor st ("mathe für Nicht-Freaks: Vorlage:Definition", ll) | |
66 | (treeToHtml (Map.findWithDefault [] "titel" ll) st) ++ | |
67 | ")</i><br/>" ++ | |
68 | (treeToHtml (Map.findWithDefault [] "definition" ll) st)) | |
69 | templateHtmlProcessor st | |
70 | ("mathe f\252r Nicht-Freaks: Vorlage:Definition", ll) | |
66 | 71 | = (st, |
67 | 72 | "<b>Definition: </b><i>(" ++ |
68 | (treeToHtml (Map.findWithDefault [] "titel" ll) st)++")</i><br/>" | |
69 | ++ (treeToHtml (Map.findWithDefault [] "definition" ll) st)) | |
70 | ||
71 | ||
72 | ||
73 | (treeToHtml (Map.findWithDefault [] "titel" ll) st) ++ | |
74 | ")</i><br/>" ++ | |
75 | (treeToHtml (Map.findWithDefault [] "definition" ll) st)) | |
73 | 76 | templateHtmlProcessor st ("-", ll) |
74 | 77 | = (tempProcAdapter $ mnfindent ll) st |
75 | ||
76 | templateHtmlProcessor st ("Mathe f\252r Nicht-Freaks: Vorlage:Klapptext", ll) | |
78 | templateHtmlProcessor st | |
79 | ("Mathe f\252r Nicht-Freaks: Vorlage:Klapptext", ll) | |
77 | 80 | = (tempProcAdapter $ mnfklapptext ll) st |
78 | templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Klapptext", ll) | |
81 | templateHtmlProcessor st | |
82 | ("Aufgabensammlung: Vorlage:Klapptext", ll) | |
79 | 83 | = (tempProcAdapter $ mnfklapptext ll) st |
80 | templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Vollst\228ndige Induktion", ll) | |
84 | templateHtmlProcessor st | |
85 | ("Aufgabensammlung: Vorlage:Vollst\228ndige Induktion", ll) | |
81 | 86 | = (tempProcAdapter $ mnfinduktion ll) st |
82 | ||
83 | ||
84 | 87 | templateHtmlProcessor st ("Formel", ll) |
85 | 88 | = (st, |
86 | 89 | "<dl><dd>" ++ |
87 | (treeToHtml (Map.findWithDefault [] "1" ll) st)++"</dd></dl>") | |
88 | templateHtmlProcessor st ("Mathe für Nicht-Freaks: Vorlage:Frage", ll) | |
90 | (treeToHtml (Map.findWithDefault [] "1" ll) st) ++ "</dd></dl>") | |
91 | templateHtmlProcessor st | |
92 | ("Mathe f\252r Nicht-Freaks: Vorlage:Frage", ll) | |
89 | 93 | = (tempProcAdapter $ mnffrage ll) st |
90 | ||
91 | templateHtmlProcessor st ("Anker", _) | |
92 | = (st, "") | |
94 | templateHtmlProcessor st ("Anker", _) = (st, "") | |
93 | 95 | templateHtmlProcessor st ("Symbol", ll) |
94 | 96 | = (st, (treeToHtml (Map.findWithDefault [] "1" ll) st)) |
95 | ||
96 | templateHtmlProcessor st ("#invoke:Mathe für Nicht-Freaks/Seite", _) | |
97 | = (st, "") | |
98 | ||
97 | templateHtmlProcessor st | |
98 | ("#invoke:Mathe f\252r Nicht-Freaks/Seite", _) = (st, "") | |
99 | 99 | templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Infobox", _) |
100 | 100 | = (st, "") |
101 | 101 | templateHtmlProcessor st ("Aufgabensammlung: Vorlage:Symbol", _) |
102 | 102 | = (st, "") |
103 | ||
104 | templateHtmlProcessor st ("Nicht l\246schen", _) | |
105 | = (st, "") | |
106 | ||
107 | ||
108 | templateHtmlProcessor st ("#ifeq:{{{include", _) | |
109 | = (st, "") | |
110 | ||
111 | ||
112 | ||
113 | templateHtmlProcessor st ("Druckversion Titelseite", _) | |
114 | = (st, "") | |
115 | templateHtmlProcessor st ("PDF-Version Gliederung", _) | |
116 | = (st, "") | |
117 | templateHtmlProcessor st ("#invoke:Liste", _) | |
118 | = (st, "") | |
119 | templateHtmlProcessor st ("Smiley", _) | |
120 | = (st, "☺") | |
121 | templateHtmlProcessor st ("", _) | |
122 | = (st, "") | |
123 | ||
124 | ||
125 | ||
126 | ||
127 | ||
128 | ||
129 | templateHtmlProcessor st (x, _) = (st,"UNKNOWN TEMPLATE "++x++" ") | |
103 | templateHtmlProcessor st ("Nicht l\246schen", _) = (st, "") | |
104 | templateHtmlProcessor st ("#ifeq:{{{include", _) = (st, "") | |
105 | templateHtmlProcessor st ("Druckversion Titelseite", _) = (st, "") | |
106 | templateHtmlProcessor st ("PDF-Version Gliederung", _) = (st, "") | |
107 | templateHtmlProcessor st ("#invoke:Liste", _) = (st, "") | |
108 | templateHtmlProcessor st ("Smiley", _) = (st, "\9786") | |
109 | templateHtmlProcessor st ("", _) = (st, "") | |
110 | templateHtmlProcessor st (x, _) | |
111 | = (st, "UNKNOWN TEMPLATE " ++ x ++ " ") | |
130 | 112 | |
131 | 113 | wikiLinkCaptionHtml :: [Anything Char] -> MyState -> String |
132 | 114 | wikiLinkCaptionHtml l st = if isCaption x then rebuild x else "" |
133 | 115 | where x = (treeToHtml (last (splitOn [C '|'] l)) st) |
134 | 116 | rebuild (':' : xs) = xs |
135 | 117 | rebuild b = b |
118 | ||
136 | 119 | wikiImageToHtml :: [Anything Char] -> Renderer String |
137 | 120 | wikiImageToHtml l |
138 | 121 | = do st <- get |
139 | mystr <- return ((if not (micro st) then "<p>"else "")++"<"++(if ext=="webm" then "video controls" else "img")++" src=\"./images/" ++ (n st) ++ "." ++ ext ++ "\" style=\"width: "++ (if (tb st) then "100.0" else (mysize st)) ++"%;\">"++ (if (not (micro st)) then "<br/> "++(getfig st)++" "++(n st) ++ " " ++ (s st) ++ "</p>"else "")) | |
122 | mystr <- return | |
123 | ((if not (micro st) then "<p>" else "") ++ | |
124 | "<" ++ | |
125 | (if ext == "webm" then "video controls" else "img") ++ | |
126 | " src=\"./images/" ++ | |
127 | (n st) ++ | |
128 | "." ++ | |
129 | ext ++ | |
130 | "\" style=\"width: " ++ | |
131 | (if (tb st) then "100.0" else (mysize st)) ++ | |
132 | "%;\">" ++ | |
133 | (if (not (micro st)) then | |
134 | "<br/> " ++ | |
135 | (getfig st) ++ " " ++ (n st) ++ " " ++ (s st) ++ "</p>" | |
136 | else "")) | |
140 | 137 | put |
141 | 138 | st{getImages = (getImages st) ++ [shallowFlatten l], |
142 | 139 | getJ = ((getJ st) + 1)} |
155 | 152 | s1 st |
156 | 153 | = if '|' `elem` (shallowFlatten l) then (s2 st) else |
157 | 154 | (treeToHtml [] st{getJ = ((getJ st) + 1)}) |
158 | mysize st = printf "%0.5f" ((mysizefloat2 st)*100.0) | |
155 | mysize st = printf "%0.5f" ((mysizefloat2 st) * 100.0) | |
159 | 156 | mysizefloat st = (min (getF st) (imageSize l)) |
160 | 157 | mysizefloat2 st = if (msb st) then 1.0 else (mysizefloat st) |
161 | 158 | msb st = (mysizefloat st) == (getF st) |
162 | 159 | micro st = ((mysizefloat st) < 0.17) || ((getInTab st) > 1) |
163 | 160 | n st = show (getJ st) |
164 | 161 | tb st = ((getInTab st) > 0) |
165 | getfig st = head (splitOn "}" (last (splitOn "\\newcommand{\\myfigurebabel}{" (makeBabel (langu st) "en")))) | |
166 | ||
167 | ||
168 | ||
162 | getfig st | |
163 | = head | |
164 | (splitOn "}" | |
165 | (last | |
166 | (splitOn "\\newcommand{\\myfigurebabel}{" | |
167 | (makeBabel (langu st) "en")))) | |
169 | 168 | |
170 | 169 | galleryContentToHtml :: [[Anything Char]] -> Renderer String |
171 | 170 | galleryContentToHtml (x : xs) |
173 | 172 | ss <- galleryContentToHtml xs |
174 | 173 | return $ s ++ "</tr><tr>" ++ ss |
175 | 174 | galleryContentToHtml [] = return [] |
176 | ||
175 | ||
177 | 176 | {-DHUN| converts a part of a gallery (image gallery, gallery tag) from parse tree to latex. A part are as many elements as fit into a single row in the resulting latex table DHUN-} |
178 | ||
177 | ||
179 | 178 | galleryRowToHtml :: [Anything Char] -> Renderer String |
180 | 179 | galleryRowToHtml [] = return [] |
181 | 180 | galleryRowToHtml (x : []) = treeToHtml2 [x] |
183 | 182 | = do s <- treeToHtml2 [x] |
184 | 183 | g <- galleryRowToHtml xs |
185 | 184 | return $ s ++ "</td><td>" ++ g |
186 | ||
185 | ||
187 | 186 | {-DHUN| Converts are gallery (image gallery, gallery tag) from parse tree to latex. Also writes table header and footer. This is the function you should use for converting galleries to latex DHUN-} |
188 | ||
187 | ||
189 | 188 | galleryToHtml :: [Anything Char] -> Renderer String |
190 | 189 | galleryToHtml x |
191 | 190 | = do st <- get |
195 | 194 | trim (treeToHtml z st) /= ""]) |
196 | 195 | st2 <- get |
197 | 196 | put st2{getF = (getF st)} |
197 | return ("<table><tr>" ++ s ++ "</tr></table>") | |
198 | ||
199 | mnffrage :: Map String [Anything Char] -> Renderer String | |
200 | mnffrage ll | |
201 | = do typ <- treeToHtml2 (Map.findWithDefault [] "typ" ll) | |
202 | frage <- treeToHtml2 (Map.findWithDefault [] "frage" ll) | |
203 | antwort <- treeToHtml2 (Map.findWithDefault [] "antwort" ll) | |
198 | 204 | return |
199 | ("<table><tr>" ++ s ++ "</tr></table>") | |
200 | ||
201 | mnffrage :: Map String [Anything Char] -> Renderer String | |
202 | mnffrage ll = do typ<-treeToHtml2 (Map.findWithDefault [] "typ" ll) | |
203 | frage<-treeToHtml2 (Map.findWithDefault [] "frage" ll) | |
204 | antwort<-treeToHtml2 (Map.findWithDefault [] "antwort" ll) | |
205 | return ("<dl><dd><b>" ++ typ++":</b> "++ frage++"</dd><dd>"++antwort ++"</dd></dl>") | |
205 | ("<dl><dd><b>" ++ | |
206 | typ ++ ":</b> " ++ frage ++ "</dd><dd>" ++ antwort ++ "</dd></dl>") | |
206 | 207 | |
207 | 208 | mnfindent :: Map String [Anything Char] -> Renderer String |
208 | mnfindent ll = do one<-treeToHtml2 (Map.findWithDefault [] "1" ll) | |
209 | return ("<dl><dd>" ++ one ++"</dd></dl>") | |
210 | ||
209 | mnfindent ll | |
210 | = do one <- treeToHtml2 (Map.findWithDefault [] "1" ll) | |
211 | return ("<dl><dd>" ++ one ++ "</dd></dl>") | |
211 | 212 | |
212 | 213 | mnfklapptext :: Map String [Anything Char] -> Renderer String |
213 | mnfklapptext ll = do kopf<-treeToHtml2 (Map.findWithDefault [] "kopf" ll) | |
214 | inhalt<-treeToHtml2 (Map.findWithDefault [] "inhalt" ll) | |
215 | return ("<b>" ++ kopf ++"</b><br/>"++inhalt) | |
216 | ||
214 | mnfklapptext ll | |
215 | = do kopf <- treeToHtml2 (Map.findWithDefault [] "kopf" ll) | |
216 | inhalt <- treeToHtml2 (Map.findWithDefault [] "inhalt" ll) | |
217 | return ("<b>" ++ kopf ++ "</b><br/>" ++ inhalt) | |
217 | 218 | |
218 | 219 | mnfinduktion :: Map String [Anything Char] -> Renderer String |
219 | mnfinduktion ll = do erf<-treeToHtml2 (Map.findWithDefault [] "erfuellungsmenge" ll) | |
220 | aus<-treeToHtml2 (Map.findWithDefault [] "aussageform" ll) | |
221 | anf<-treeToHtml2 (Map.findWithDefault [] "induktionsanfang" ll) | |
222 | vor<-treeToHtml2 (Map.findWithDefault [] "induktionsvoraussetzung" ll) | |
223 | beh<-treeToHtml2 (Map.findWithDefault [] "induktionsbehauptung" ll) | |
224 | sch<-treeToHtml2 (Map.findWithDefault [] "beweis_induktionsschritt" ll) | |
225 | return ("<b>Aussageform, deren Allgemeingültigkeit für "++erf++" bewiesen werden soll:</b><br/>"++aus++"<br/><b>1. Induktionsanfang</b><br/>"++anf++"<br/><b>2. Induktionsschritt</b><br><b>2a. Induktionsvoraussetzung</b><br/>"++vor++"<br/><b>2b. Induktionsbehauptung</b><br/>"++beh++"<br/><b>2c. Beweis des Induktionsschritts</b><br/>"++sch++"<br/>") | |
226 | ||
227 | ||
228 | ||
229 | writedict::[(String,String)]->String | |
220 | mnfinduktion ll | |
221 | = do erf <- treeToHtml2 | |
222 | (Map.findWithDefault [] "erfuellungsmenge" ll) | |
223 | aus <- treeToHtml2 (Map.findWithDefault [] "aussageform" ll) | |
224 | anf <- treeToHtml2 (Map.findWithDefault [] "induktionsanfang" ll) | |
225 | vor <- treeToHtml2 | |
226 | (Map.findWithDefault [] "induktionsvoraussetzung" ll) | |
227 | beh <- treeToHtml2 | |
228 | (Map.findWithDefault [] "induktionsbehauptung" ll) | |
229 | sch <- treeToHtml2 | |
230 | (Map.findWithDefault [] "beweis_induktionsschritt" ll) | |
231 | return | |
232 | ("<b>Aussageform, deren Allgemeing\252ltigkeit f\252r " ++ | |
233 | erf ++ | |
234 | " bewiesen werden soll:</b><br/>" ++ | |
235 | aus ++ | |
236 | "<br/><b>1. Induktionsanfang</b><br/>" ++ | |
237 | anf ++ | |
238 | "<br/><b>2. Induktionsschritt</b><br><b>2a. Induktionsvoraussetzung</b><br/>" | |
239 | ++ | |
240 | vor ++ | |
241 | "<br/><b>2b. Induktionsbehauptung</b><br/>" ++ | |
242 | beh ++ | |
243 | "<br/><b>2c. Beweis des Induktionsschritts</b><br/>" ++ | |
244 | sch ++ "<br/>") | |
245 | ||
246 | writedict :: [(String, String)] -> String | |
230 | 247 | writedict [] = [] |
231 | writedict ((k,v):xs) = k++"=\""++v++"\" " ++ (writedict xs) | |
232 | ||
233 | treeToHtml3 :: Map String Int->Maybe String -> String -> [Anything Char] -> MyState -> (String, MyState) | |
234 | treeToHtml3 formulas mylanguage title l st = let (a,b)=runState (treeToHtml2 l) st{langu=mylanguage, forms=formulas} in ("<html><head><meta charset=\"utf-8\"><title>"++title++"</title><style>table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { margin: 0; padding: 0; vertical-align: baseline; border: none; }\ntable.sourceCode { width: 100%; line-height: 100%; }\ntd.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; }\ntd.sourceCode { padding-left: 5px; }\ncode > span.kw { font-weight: bold; }\ncode > span.dt { text-decoration: underline; }\ncode > span.co { font-style: italic; }\ncode > span.al { font-weight: bold; }\ncode > span.er { font-weight: bold; }\n</style></head><body>"++a,b) | |
248 | writedict ((k, v) : xs) | |
249 | = k ++ "=\"" ++ v ++ "\" " ++ (writedict xs) | |
250 | ||
251 | treeToHtml3 :: | |
252 | Map String Int -> | |
253 | Maybe String -> | |
254 | String -> [Anything Char] -> MyState -> (String, MyState) | |
255 | treeToHtml3 formulas mylanguage title l st | |
256 | = let (a, b) | |
257 | = runState (treeToHtml2 l) st{langu = mylanguage, forms = formulas} | |
258 | in | |
259 | ("<html><head><meta charset=\"utf-8\"><title>" ++ | |
260 | title ++ | |
261 | "</title><style>table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { margin: 0; padding: 0; vertical-align: baseline; border: none; }\ntable.sourceCode { width: 100%; line-height: 100%; }\ntd.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; }\ntd.sourceCode { padding-left: 5px; }\ncode > span.kw { font-weight: bold; }\ncode > span.dt { text-decoration: underline; }\ncode > span.co { font-style: italic; }\ncode > span.al { font-weight: bold; }\ncode > span.er { font-weight: bold; }\n</style></head><body>" | |
262 | ++ a, | |
263 | b) | |
264 | ||
235 | 265 | treeToHtml :: [Anything Char] -> MyState -> String |
236 | 266 | treeToHtml l states = (fst $ runState (treeToHtml2 l) states) |
267 | ||
237 | 268 | treeToHtmlBak :: [Anything Char] -> MyState -> String |
238 | 269 | treeToHtmlBak _ _ = "" |
270 | ||
239 | 271 | treeToHtml2Bak :: [Anything Char] -> HtmlRenderer String |
240 | 272 | treeToHtml2Bak _ = return "" |
241 | 273 | |
243 | 275 | treeToHtml2 ll |
244 | 276 | = do x <- allinfo |
245 | 277 | return $ concat x |
246 | where | |
247 | allinfo :: HtmlRenderer [String] | |
278 | where allinfo :: HtmlRenderer [String] | |
248 | 279 | allinfo = mapM nodeToHtml ll |
249 | ||
280 | ||
250 | 281 | walk :: String -> [Anything Char] -> String -> HtmlRenderer String |
251 | 282 | walk prefix l postfix |
252 | 283 | = do d <- treeToHtml2 l |
253 | 284 | return $ prefix ++ d ++ postfix |
285 | ||
254 | 286 | nodeToHtml :: Anything Char -> HtmlRenderer String |
255 | 287 | nodeToHtml (C c) |
256 | = do st<-get | |
257 | x<-if (c=='\n') && ((lastChar st)==c) then return "</p><p>" else return [c] | |
258 | put st{lastChar=c} | |
288 | = do st <- get | |
289 | x <- if (c == '\n') && ((lastChar st) == c) then return "</p><p>" | |
290 | else return [c] | |
291 | put st{lastChar = c} | |
259 | 292 | return x |
260 | 293 | nodeToHtml (Environment Wikilink _ l) |
261 | 294 | = do st <- get |
278 | 311 | guard (not ((getInTab st) > 0)) |
279 | 312 | return aa |
280 | 313 | of |
281 | Just j -> (renderHtml ((formatHtmlBlock defaultFormatOpts) (highlightAs j f))) | |
314 | Just j -> (renderHtml | |
315 | ((formatHtmlBlock defaultFormatOpts) (highlightAs j f))) | |
282 | 316 | Nothing -> (rtrim d) |
283 | 317 | nodeToHtml (Environment Template (Str s) l) = templateToHtml l s |
284 | nodeToHtml (Environment Wikitable _ l) = walk "<table><tr>" l "</table></tr>" | |
318 | nodeToHtml (Environment Wikitable _ l) | |
319 | = walk "<table><tr>" l "</table></tr>" | |
285 | 320 | nodeToHtml (Environment TableRowSep _ _) = return "</tr><tr>" |
286 | 321 | nodeToHtml (Environment TableColSep _ _) = return "</td><td>" |
287 | 322 | nodeToHtml (Environment TableHeadColSep _ _) = return "</th><th>" |
288 | nodeToHtml (Environment TableCap _ l) = walk "<caption>" l "</caption>" | |
289 | nodeToHtml (Environment Wikiheading (Str x) l) = let y=(show (length x)) in walk ("<h"++y++">") l ("</h"++y++">") | |
323 | nodeToHtml (Environment TableCap _ l) | |
324 | = walk "<caption>" l "</caption>" | |
325 | nodeToHtml (Environment Wikiheading (Str x) l) | |
326 | = let y = (show (length x)) in | |
327 | walk ("<h" ++ y ++ ">") l ("</h" ++ y ++ ">") | |
290 | 328 | nodeToHtml (Environment ItemEnv (Str _) [Item _]) = return [] |
291 | nodeToHtml (Environment ItemEnv (Str s) l) = do tag<-return (case s of | |
292 | "*"-> "ul" | |
293 | _ -> "ol") | |
294 | walk ("<"++tag++">") l ("</li></"++tag++">") | |
329 | nodeToHtml (Environment ItemEnv (Str s) l) | |
330 | = do tag <- return | |
331 | (case s of | |
332 | "*" -> "ul" | |
333 | _ -> "ol") | |
334 | walk ("<" ++ tag ++ ">") l ("</li></" ++ tag ++ ">") | |
295 | 335 | nodeToHtml (Item _) = return "</li><li>" |
296 | ||
297 | 336 | nodeToHtml (Environment Tag (TagAttr "noscript" _) _) = return [] |
298 | 337 | nodeToHtml (Environment Tag (TagAttr "head" _) _) = return [] |
299 | 338 | nodeToHtml (Environment Tag (TagAttr "a" _) l) = walk "" l "" |
307 | 346 | || ((Map.findWithDefault [] "id" a) `elem` ["coordinates"]) |
308 | 347 | then return "" else walk "" l "" |
309 | 348 | else walk "" l "" |
310 | nodeToHtml (Environment Tag (TagAttr "img" m) _) | (Map.lookup "class" m) == (Just "mwe-math-fallback-image-inline") = return [] | |
349 | nodeToHtml (Environment Tag (TagAttr "img" m) _) | |
350 | | (Map.lookup "class" m) == (Just "mwe-math-fallback-image-inline") | |
351 | = return [] | |
311 | 352 | nodeToHtml (Environment Comment _ _) = return [] |
312 | nodeToHtml (Environment Preformat (TagAttr "pre" _) l) = walk "<pre>" l "</pre>" | |
313 | nodeToHtml (Environment Math (TagAttr "math" _) l) = do st<-get | |
314 | return ("<img src=\"./formulas/"++(hex(show (hash (mathTransform l))))++".png\" style=\" width:"++(show ((((forms st) Map.! ((hex(show (hash (mathTransform l))))++".png"))*2) `div` 3) )++"px;\" />") | |
315 | ||
316 | ||
317 | nodeToHtml (Environment Math _ l) = do st<-get | |
318 | --return (show (forms st)) | |
319 | return ("<img src=\"./formulas/"++(hex(show (hash (mathTransform l))))++".png\" style=\" width:"++(show ((((forms st) Map.! ((hex(show (hash (mathTransform l))))++".png"))*2) `div` 3) )++"px;\" />") | |
320 | ||
321 | nodeToHtml (Environment Tag (TagAttr "table" m) l) | |
353 | nodeToHtml (Environment Preformat (TagAttr "pre" _) l) | |
354 | = walk "<pre>" l "</pre>" | |
355 | nodeToHtml (Environment Math (TagAttr "math" _) l) | |
356 | = do st <- get | |
357 | return | |
358 | ("<img src=\"./formulas/" ++ | |
359 | (hex (show (hash (mathTransform l)))) ++ | |
360 | ".png\" style=\" width:" ++ | |
361 | (show | |
362 | ((((forms st) Map.! | |
363 | ((hex (show (hash (mathTransform l)))) ++ ".png")) | |
364 | * 2) | |
365 | `div` 3)) | |
366 | ++ "px;\" />") | |
367 | nodeToHtml (Environment Math _ l) | |
368 | = do st <- get | |
369 | return | |
370 | ("<img src=\"./formulas/" ++ | |
371 | (hex (show (hash (mathTransform l)))) ++ | |
372 | ".png\" style=\" width:" ++ | |
373 | (show | |
374 | ((((forms st) Map.! | |
375 | ((hex (show (hash (mathTransform l)))) ++ ".png")) | |
376 | * 2) | |
377 | `div` 3)) | |
378 | ++ "px;\" />") | |
379 | nodeToHtml (Environment Tag (TagAttr "table" m) l) | |
322 | 380 | = do st <- get |
323 | 381 | put $ st{getInTab = (getInTab st) + 1} |
324 | d <- walk ("<table "++(writedict (Map.toList m))++">") l ("</table>") | |
382 | d <- walk ("<table " ++ (writedict (Map.toList m)) ++ ">") l | |
383 | ("</table>") | |
325 | 384 | st2 <- get |
326 | 385 | put $ st2{getInTab = (getInTab st)} |
327 | 386 | return d |
337 | 396 | newst i |
338 | 397 | = (midst i){getGalleryNumbers = |
339 | 398 | (getGalleryNumbers (midst i)) ++ (map toInteger (gins i))} |
340 | nodeToHtml (Environment Tag (TagAttr "span" _) l) = walk "" l "" | |
341 | nodeToHtml (Environment Tag (TagAttr x m) l) = walk ("<"++x++" "++(writedict (Map.toList m))++">") l ("</"++x++">") | |
399 | nodeToHtml (Environment Tag (TagAttr "span" _) l) = walk "" l "" | |
400 | nodeToHtml (Environment Tag (TagAttr x m) l) | |
401 | = walk ("<" ++ x ++ " " ++ (writedict (Map.toList m)) ++ ">") l | |
402 | ("</" ++ x ++ ">") | |
342 | 403 | nodeToHtml (Environment _ _ l) = walk "" l "" |
343 | 404 | nodeToHtml _ = return [] |
344 |
7 | 7 | import Control.Concurrent.MVar |
8 | 8 | import Data.List |
9 | 9 | import Network.URL |
10 | ||
11 | {-DHUN| A type to for errors that might be thrown during the imperative calculation DHUN-} | |
12 | ||
10 | 13 | data MyError = DownloadError String String |
11 | 14 | | OtherError String |
12 | 15 | | WikiUrlParseError String |
18 | 21 | | ToManyOptionsError |
19 | 22 | | ToManyOutputOptionsError |
20 | 23 | | PaperError |
21 | ||
22 | type MyErrorMonad = Either MyError | |
24 | ||
25 | {-DHUN| A monad for dealing with errors DHUN-} | |
26 | ||
27 | type MyErrorMonad = Either MyError | |
28 | ||
29 | {-DHUN| printable error messages DHUN-} | |
23 | 30 | |
24 | 31 | instance Show MyError where |
25 | 32 | show (DownloadError theLemma theUrl) |
30 | 37 | show NotImplementedError |
31 | 38 | = "Error: The requested feature is not implemented yet" |
32 | 39 | show (NotIntegerPairError msg) |
33 | = "Error: The option --" ++ msg ++ "could not be parsed to a pair of integers (like -f 23:42)" | |
40 | = "Error: The option --" ++ | |
41 | msg ++ "could not be parsed to a pair of integers (like -f 23:42)" | |
34 | 42 | show PaperError |
35 | 43 | = "Error: The option paper may only be one of A4,A5,B5,letter,legal,executive" |
36 | 44 | show ToManyOptionsError |
47 | 55 | = "Error: The option --" ++ |
48 | 56 | msg ++ " could not be parsed as an integer." |
49 | 57 | show (OtherError msg) = msg |
50 | ||
58 | ||
59 | ||
60 | {-DHUN| A type to capture a contributor form the list of contributors needed for license reasons. The element name is username of the author, the element edits is the number of edits done by the user, href is a link to the users homepage on the wiki DHUN-} | |
61 | ||
51 | 62 | data Contributor = Contributor{name :: String, edits :: Integer, |
52 | 63 | href :: String} |
53 | 64 | deriving (Eq, Ord, Show, Read) |
54 | ||
65 | ||
66 | {-DHUN| The sum of two contribors is defined as the new contribor with the same name and homepage and the edits summed up. Of course this makes only sense when summing up edits for the same contributor, which is not checked but has to be ensured by hand DHUN-} | |
67 | ||
55 | 68 | myplus :: Contributor -> Contributor -> Contributor |
56 | 69 | myplus x y = x{edits = (edits x) + (edits y)} |
57 | ||
70 | ||
71 | {-DHUN| Build a map of contributors summing up the edits per contributor. The map takes the name of the contributor as key and the contributor records given above as value. The first parameter is a list of such maps the results is also such a map representing the sum of the maps in the list DHUN-} | |
72 | ||
58 | 73 | contribsum :: [Map String Contributor] -> Map String Contributor |
59 | contribsum x = Data.List.foldl (unionWith myplus) Data.Map.Strict.empty x | |
60 | ||
74 | contribsum x | |
75 | = Data.List.foldl (unionWith myplus) Data.Map.Strict.empty x | |
76 | ||
77 | {-DHUN| a defaults version of the record Imperative State DHUN-} | |
78 | ||
79 | ||
61 | 80 | imperativeStateZero :: IO ImperativeState |
62 | 81 | imperativeStateZero |
63 | = do v<-newMVar (0::Int) | |
64 | return ImperativeState{audict = [], fullUrl = fullWikiUrlZero, | |
65 | tmpPath = "" ,counter=v, loadacu=[]} | |
66 | ||
82 | = do v <- newMVar (0 :: Int) | |
83 | return | |
84 | ImperativeState{audict = [], fullUrl = fullWikiUrlZero, | |
85 | tmpPath = "", counter = v, loadacu = []} | |
86 | ||
67 | 87 | data ImperativeState = ImperativeState{audict :: |
68 | 88 | [(Map String Contributor)], |
69 | fullUrl :: FullWikiUrl, tmpPath :: String, counter::MVar Int, loadacu::[Anything Char]} | |
89 | fullUrl :: FullWikiUrl, tmpPath :: String, | |
90 | counter :: MVar Int, loadacu :: [Anything Char]} | |
70 | 91 | |
92 | data ImageInfo = ImageInfo{wikiFilename :: String, | |
93 | imageNumber :: Integer, contributorUrls :: [String], | |
94 | descriptionUrl :: URL} | |
95 | deriving (Show, Read) | |
71 | 96 | |
72 | data ImageInfo = ImageInfo{wikiFilename :: String,imageNumber::Integer,contributorUrls::[String],descriptionUrl::URL} | |
73 | deriving (Show,Read) | |
74 | ||
75 | 97 | type ImperativeMonad = ExceptT MyError (StateT ImperativeState IO) |
76 | ||
98 | ||
77 | 99 | data RunMode = HTML |
78 | 100 | | ExpandedTemplates |
79 | 101 | | StandardTemplates |
80 | 102 | | Book |
81 | 103 | | UserTemplateFile String |
82 | 104 | deriving (Show, Read, Eq) |
83 | ||
105 | ||
84 | 106 | data SourceMode = Included |
85 | 107 | | Excluded |
86 | 108 | deriving (Show, Read) |
87 | ||
88 | data OutputType = PlainPDF | ZipArchive | EPubFile | OdtFile | |
89 | deriving (Show, Read,Eq) | |
109 | ||
110 | data OutputType = PlainPDF | |
111 | | ZipArchive | |
112 | | EPubFile | |
113 | | OdtFile | |
114 | deriving (Show, Read, Eq) | |
115 | ||
90 | 116 | data FullConfig = FullConfig{headers :: Maybe String, |
91 | 117 | resolution :: Integer, outputFilename :: String, |
92 | 118 | inputUrl :: String, runMode :: RunMode, paper :: String, |
93 | 119 | vector :: Bool, copy :: Maybe String, mainPath :: String, |
94 | server :: Maybe Int, outputType::OutputType, selfTest ::Maybe (Integer,Integer), compile:: Maybe String, imgctrb:: Maybe String} | |
120 | server :: Maybe Int, outputType :: OutputType, | |
121 | selfTest :: Maybe (Integer, Integer), compile :: Maybe String, | |
122 | imgctrb :: Maybe String} | |
95 | 123 | deriving (Show, Read) |
96 | 124 | |
97 | fullconfigbase::FullConfig | |
98 | fullconfigbase= FullConfig{headers =Nothing, | |
99 | resolution =0, outputFilename ="", | |
100 | inputUrl ="", runMode =HTML, paper ="A4", | |
101 | vector = False, copy = Nothing, mainPath = "", | |
102 | server = Nothing, outputType=PlainPDF, selfTest =Nothing, compile= Nothing,imgctrb=Nothing} | |
125 | fullconfigbase :: FullConfig | |
126 | fullconfigbase | |
127 | = FullConfig{headers = Nothing, resolution = 0, | |
128 | outputFilename = "", inputUrl = "", runMode = HTML, paper = "A4", | |
129 | vector = False, copy = Nothing, mainPath = "", server = Nothing, | |
130 | outputType = PlainPDF, selfTest = Nothing, compile = Nothing, | |
131 | imgctrb = Nothing} |
4 | 4 | getGalleryNumbers, getTitle, initialState, getJ, urld, |
5 | 5 | analyseNetloc, templateMap, getUserTemplateMap, urls, mUrlState, |
6 | 6 | initialUrlState, makeLables, templateRegistry, baseUrl, |
7 | deepFlatten, wikiLinkCaption, imageSize, isCaption, linewidth, | |
8 | generateGalleryImageNumbers , splitToTuples, galleryTableScale, tempProcAdapter) | |
7 | deepFlatten, wikiLinkCaption, imageSize, isCaption, linewidth, | |
8 | generateGalleryImageNumbers, splitToTuples, galleryTableScale, | |
9 | tempProcAdapter) | |
9 | 10 | where |
10 | 11 | import Data.String.HT (trim) |
11 | 12 | import MyState |
32 | 33 | import Data.Maybe |
33 | 34 | import Data.Tuple (swap) |
34 | 35 | import MediaWikiParser hiding (prep) |
35 | ||
36 | ||
36 | 37 | {-DHUN| the maximum width of lines for preformat and source code DHUN-} |
37 | ||
38 | ||
38 | 39 | linewidth :: Int |
39 | 40 | linewidth = 80 |
40 | ||
41 | ||
41 | 42 | {-DHUN| The user can provide her own translation table for mediawiki templates to latex commands. this is done in the templates.user files. This function takes this file in list representation and converts it to the map representation to be able to look up the names of templates DHUN-} |
42 | ||
43 | ||
43 | 44 | getUserTemplateMap :: [[String]] -> Map String [String] |
44 | 45 | getUserTemplateMap input |
45 | 46 | = Map.fromList (map (\ (x : xs) -> (x, xs)) input) |
46 | ||
47 | ||
47 | 48 | {-DHUN| table may omit tailing columns in a row, but in latex they need to be written out, this function does so DHUN-} |
48 | ||
49 | ||
49 | 50 | rowaddsym :: TableState -> [Char] |
50 | 51 | rowaddsym st |
51 | 52 | = if (currentColumn st) < ((numberOfColumnsInTable st) + 1) then |
55 | 56 | replicate (((numberOfColumnsInTable st) + 1) - (currentColumn st)) |
56 | 57 | '&') |
57 | 58 | else [] |
58 | ||
59 | ||
59 | 60 | {-DHUN| This function renders the inner parts of a table to latex it does so by calling tableContentToLaTeX and additionally removes superfluous newlines which might cause compilation problems in latex when used inside tables DHUN-} |
60 | ||
61 | ||
61 | 62 | tableContentToLaTeX2 :: |
62 | 63 | [Anything Char] -> (StateT TableState (State MyState) String) |
63 | 64 | tableContentToLaTeX2 l |
64 | 65 | = do r <- tableContentToLaTeX l |
65 | 66 | return (killnl2 r) |
66 | ||
67 | ||
67 | 68 | varwidthbegin :: TableState -> [Char] |
68 | varwidthbegin st = if isJust (activeColumn st) then "\\begin{varwidth}{\\linewidth}" else "" | |
69 | varwidthbegin st | |
70 | = if isJust (activeColumn st) then "\\begin{varwidth}{\\linewidth}" | |
71 | else "" | |
69 | 72 | |
70 | 73 | varwidthend :: TableState -> [Char] |
71 | varwidthend st = if isJust (activeColumn st) then "\\end{varwidth}" else "" | |
74 | varwidthend st | |
75 | = if isJust (activeColumn st) then "\\end{varwidth}" else "" | |
72 | 76 | |
73 | 77 | {-DHUN| This function renders the inner parts of a table to latex, please always use tableContentToLaTeX2 since this also removes superfluous newlines DHUN-} |
74 | ||
78 | ||
75 | 79 | tableContentToLaTeX :: |
76 | 80 | [Anything Char] -> (StateT TableState (State MyState) String) |
77 | 81 | tableContentToLaTeX ((Environment TableRowSep _ _) : []) |
78 | 82 | = do st <- get |
79 | 83 | let cc = (currentColumn st) |
80 | 84 | let c = cc + (multiRowCount cc (multiRowMap st)) |
81 | return $ (varwidthend st)++ | |
82 | (headendsym (lastCellWasHeaderCell st)) ++ | |
83 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
84 | (rowaddsym st{currentColumn = c}) | |
85 | return $ | |
86 | (varwidthend st) ++ | |
87 | (headendsym (lastCellWasHeaderCell st)) ++ | |
88 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
89 | (rowaddsym st{currentColumn = c}) | |
85 | 90 | tableContentToLaTeX ((Environment TableRowSep _ l) : xs) |
86 | 91 | = do sst <- lift get |
87 | 92 | st <- get |
112 | 117 | if stillInTableHeader st then not mycond else False} |
113 | 118 | xx <- tableContentToLaTeX xs |
114 | 119 | return $ |
115 | if (not (isFirstRow st)) then (varwidthend st) ++ | |
116 | (headendsym (lastCellWasHeaderCell st)) ++ | |
117 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
118 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
119 | (multiRowSymbolForRowSep (currentColumn st) (multiRowMap st) | |
120 | (seperatingLinesRequestedForTable st)) | |
121 | ++ | |
122 | (rowaddsym st{currentColumn = c}) ++ | |
123 | (rowendsymb ((getInTab sst) <= 1) | |
124 | ((rowCounter st) == (inputLastRowOfHeader st) - 2)) | |
125 | ++ | |
126 | (innerHorizontalLine (seperatingLinesRequestedForTable st) | |
127 | (multiRowMap st3) | |
128 | (numberOfColumnsInTable st)) | |
129 | ++ " \n" ++ (varwidthbegin st)++xx | |
120 | if (not (isFirstRow st)) then | |
121 | (varwidthend st) ++ | |
122 | (headendsym (lastCellWasHeaderCell st)) ++ | |
123 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
124 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
125 | (multiRowSymbolForRowSep (currentColumn st) (multiRowMap st) | |
126 | (seperatingLinesRequestedForTable st)) | |
127 | ++ | |
128 | (rowaddsym st{currentColumn = c}) ++ | |
129 | (rowendsymb ((getInTab sst) <= 1) | |
130 | ((rowCounter st) == (inputLastRowOfHeader st) - 2)) | |
131 | ++ | |
132 | (innerHorizontalLine (seperatingLinesRequestedForTable st) | |
133 | (multiRowMap st3) | |
134 | (numberOfColumnsInTable st)) | |
135 | ++ " \n" ++ (varwidthbegin st) ++ xx | |
130 | 136 | else xx |
131 | 137 | tableContentToLaTeX ((Environment TableColSep _ l) : xs) |
132 | 138 | = do st <- get |
146 | 152 | (multiRowStartSymbol l (activeColumn st)) /= "", |
147 | 153 | isFirstRow = False, lastCellWasHeaderCell = False} |
148 | 154 | xx <- tableContentToLaTeX xxs |
149 | return $ (varwidthend st)++ | |
150 | (headendsym (lastCellWasHeaderCell st)) ++ | |
151 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
152 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
153 | (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ | |
154 | (multiRowSymbol (currentColumn st) (multiRowMap st) | |
155 | (seperatingLinesRequestedForTable st)) | |
156 | ++ | |
157 | (multiColumnStartSymbol l (columnsWidthList st) c | |
158 | (seperatingLinesRequestedForTable st) | |
159 | st) | |
155 | return $ | |
156 | (varwidthend st) ++ | |
157 | (headendsym (lastCellWasHeaderCell st)) ++ | |
158 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
159 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
160 | (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ | |
161 | (multiRowSymbol (currentColumn st) (multiRowMap st) | |
162 | (seperatingLinesRequestedForTable st)) | |
160 | 163 | ++ |
161 | (multiRowStartSymbol l (activeColumn st)) ++ | |
162 | (if rig then "\\RaggedLeft{}" else "") ++ | |
163 | (tablecolorsym l) ++ hypennothing ++(varwidthbegin st)++xx | |
164 | (multiColumnStartSymbol l (columnsWidthList st) c | |
165 | (seperatingLinesRequestedForTable st) | |
166 | st) | |
167 | ++ | |
168 | (multiRowStartSymbol l (activeColumn st)) ++ | |
169 | (if rig then "\\RaggedLeft{}" else "") ++ | |
170 | (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st) ++ xx | |
164 | 171 | where rig |
165 | 172 | = isInfixOf2 |
166 | [Environment Attribute (Attr ("style", "text-align:right")) []] | |
173 | [Environment Attribute (Attr ("style", "text-align:right")) []] | |
167 | 174 | l |
168 | 175 | xxs |
169 | 176 | = if rig then (reverse . removesp . reverse . removesp) xs else xs |
188 | 195 | isFirstRow = False, lastCellWasHeaderCell = True, |
189 | 196 | currentRowIsHeaderRow = True} |
190 | 197 | xx <- tableContentToLaTeX xs |
191 | return $ (varwidthend st)++ | |
192 | (headendsym (lastCellWasHeaderCell st)) ++ | |
193 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
194 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
195 | (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ | |
196 | (multiRowSymbol (currentColumn st) (multiRowMap st) | |
197 | (seperatingLinesRequestedForTable st)) | |
198 | ++ | |
199 | (multiColumnStartSymbol l (columnsWidthList st) c | |
200 | (seperatingLinesRequestedForTable st) | |
201 | st) | |
198 | return $ | |
199 | (varwidthend st) ++ | |
200 | (headendsym (lastCellWasHeaderCell st)) ++ | |
201 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
202 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
203 | (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ | |
204 | (multiRowSymbol (currentColumn st) (multiRowMap st) | |
205 | (seperatingLinesRequestedForTable st)) | |
202 | 206 | ++ |
203 | (multiRowStartSymbol l (activeColumn st)) ++ | |
204 | headstartsym ++ (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st)++ xx | |
207 | (multiColumnStartSymbol l (columnsWidthList st) c | |
208 | (seperatingLinesRequestedForTable st) | |
209 | st) | |
210 | ++ | |
211 | (multiRowStartSymbol l (activeColumn st)) ++ | |
212 | headstartsym ++ | |
213 | (tablecolorsym l) ++ hypennothing ++ (varwidthbegin st) ++ xx | |
205 | 214 | tableContentToLaTeX (x : xs) |
206 | 215 | = do st <- get |
207 | 216 | ele <- case (activeColumn st) of |
215 | 224 | = do st <- get |
216 | 225 | let cc = currentColumn st |
217 | 226 | let c = cc + (multiRowCount cc (multiRowMap st)) |
218 | return $ (varwidthend st)++ | |
219 | (headendsym (lastCellWasHeaderCell st)) ++ | |
220 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
221 | (multiRowSymbolForTableEnd (currentColumn st) (multiRowMap st) | |
222 | (seperatingLinesRequestedForTable st)) | |
223 | ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (rowaddsym st{currentColumn = (c + (columnMultiplicityForCounting []))}) | |
224 | ||
227 | return $ | |
228 | (varwidthend st) ++ | |
229 | (headendsym (lastCellWasHeaderCell st)) ++ | |
230 | (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ | |
231 | (multiRowSymbolForTableEnd (currentColumn st) (multiRowMap st) | |
232 | (seperatingLinesRequestedForTable st)) | |
233 | ++ | |
234 | (multiRowEndSymbol (lastCellWasMultiRow st)) ++ | |
235 | (rowaddsym | |
236 | st{currentColumn = (c + (columnMultiplicityForCounting []))}) | |
237 | ||
225 | 238 | {-DHUN| This string has to be added to each new cell in a latex table in order to allow for hyphenation of the first word in this cell DHUN-} |
226 | ||
239 | ||
227 | 240 | hypennothing :: [Char] |
228 | 241 | hypennothing = "\\hspace*{0pt}\\ignorespaces{}\\hspace*{0pt}" |
229 | ||
242 | ||
230 | 243 | {-DHUN| color cell in latex if HTML attribute bgcolor is present in the parse tree for the cell DHUN-} |
231 | ||
244 | ||
232 | 245 | tablecolorsym :: [Anything Char] -> [Char] |
233 | 246 | tablecolorsym ll |
234 | 247 | = case genLookup "bgcolor" ll of |
238 | 251 | "\\cellcolor{" ++ colname ++ "}" |
239 | 252 | _ -> "\\cellcolor{" ++ x ++ "}" |
240 | 253 | Nothing -> "" |
241 | ||
254 | ||
242 | 255 | {-DHUN| the caption of a table is given in |+ or <th> elements, it needs to be reformatted in the parse in oder to be rendered in latex as a multicolumn cell spanning the whole width of the table DHUN-} |
243 | ||
256 | ||
244 | 257 | reformatTableCaption :: |
245 | 258 | Int -> [Anything Char] -> MyState -> [Anything Char] |
246 | 259 | reformatTableCaption n |
266 | 279 | reformatTableCaption n (x : xs) st |
267 | 280 | = x : reformatTableCaption n xs st |
268 | 281 | reformatTableCaption _ [] _ = [] |
269 | ||
282 | ||
270 | 283 | {-DHUN| strips column separators out of a parse tree or part of which DHUN-} |
271 | ||
284 | ||
272 | 285 | stripColSep :: [Anything Char] -> [Anything Char] |
273 | 286 | stripColSep = filter go |
274 | 287 | where go (Environment TableHeadColSep _ _) = False |
275 | 288 | go (Environment TableColSep _ _) = False |
276 | 289 | go _ = True |
277 | ||
290 | ||
278 | 291 | {-DHUN| predicate to test if an element in the parse tree is a row separator DHUN-} |
279 | ||
292 | ||
280 | 293 | isRowSep :: Anything Char -> Bool |
281 | 294 | isRowSep (Environment TableRowSep _ _) = True |
282 | 295 | isRowSep _ = False |
283 | ||
296 | ||
284 | 297 | {-DHUN| strip empty rows out of the parse tree DHUN-} |
285 | ||
298 | ||
286 | 299 | stripempty :: [Anything Char] -> MyState -> [Anything Char] |
287 | 300 | stripempty [] _ = [] |
288 | 301 | stripempty ((Environment TableRowSep a b) : xs) s |
303 | 316 | pre = takeWhile (not . isRowSep) l |
304 | 317 | post = dropWhile (not . isRowSep) l |
305 | 318 | inside = (treeToLaTeX (stripColSep pre)) s |
306 | ||
319 | ||
307 | 320 | {-DHUN| In order to determine the maximum width of columns, each table is precompiled with latex several times, with only one column included each time. this function creates the list of the latex sources of these tables, for one table in the parse tree DHUN-} |
308 | ||
321 | ||
309 | 322 | maketablist :: |
310 | 323 | [Anything Char] -> TableState -> Int -> MyState -> [[Char]] |
311 | 324 | maketablist l tst nc mst = map tablo [1 .. (nc + 1)] |
312 | 325 | where tablo n |
313 | 326 | = "\\begin{tabular}{|" ++ |
314 | 327 | (replicate nc 'l') ++ |
315 | "|}" ++ "\\begin{varwidth}{\\linewidth}"++ | |
316 | (fst | |
317 | (fst | |
318 | (runState | |
319 | ((runStateT (tableContentToLaTeX2 l)) | |
320 | tst{inputLastRowOfHeader = -2, activeColumn = Just n}) | |
321 | mst))) | |
322 | ++ "\\end{tabular}" | |
323 | ||
328 | "|}" ++ | |
329 | "\\begin{varwidth}{\\linewidth}" ++ | |
330 | (fst | |
331 | (fst | |
332 | (runState | |
333 | ((runStateT (tableContentToLaTeX2 l)) | |
334 | tst{inputLastRowOfHeader = -2, activeColumn = Just n}) | |
335 | mst))) | |
336 | ++ "\\end{tabular}" | |
337 | ||
324 | 338 | {-DHUN| Takes a map from int to double finds the biggest double and removes the corresponding key value pair from the map. This way wide columns are set to smaller sizes in order to fit the whole table onto the page width DHUN-} |
325 | ||
339 | ||
326 | 340 | removehighest :: Map Int Double -> Map Int Double |
327 | 341 | removehighest m |
328 | 342 | | m /= Map.empty = Map.fromList (hlp (Map.toList m)) |
331 | 345 | hlp (x : xs) = x : (hlp xs) |
332 | 346 | hlp [] = [] |
333 | 347 | removehighest _ = Map.empty |
334 | ||
348 | ||
335 | 349 | {-DHUN| Returns a list of floats which represents the width of the columns of a table in units of the line width with the proper corrections for use in the a latex documents. If the boolean input parameter is true the table is understood to be written in landscape mode. It also take a map of Int to Double. This is the list of the maximum width of columns determined by previous runs of latex on the table with only one column included per run DHUN-} |
336 | ||
337 | wdth3 :: Bool -> Map Int Double -> ([Float],Double) | |
350 | ||
351 | wdth3 :: Bool -> Map Int Double -> ([Float], Double) | |
338 | 352 | wdth3 ls m |
339 | | m /= Map.empty = ( | |
340 | (map | |
341 | ((* (1.0 - (scalefactor (fromIntegral n)))) . | |
342 | double2Float . (/ (linew2 ls))) | |
343 | (Map.elems mm)),if d==1.0 then 1.0 else d*0.90) | |
353 | | m /= Map.empty = | |
354 | ((map | |
355 | ((* (1.0 - (scalefactor (fromIntegral n)))) . | |
356 | double2Float . (/ (linew2 ls))) | |
357 | (Map.elems mm)), | |
358 | if d == 1.0 then 1.0 else d * 0.9) | |
344 | 359 | where n = (maximum (Map.keys m)) |
345 | (mm,d) = wdth ls n m | |
346 | wdth3 _ _ = ([],1.0) | |
347 | ||
360 | (mm, d) = wdth ls n m | |
361 | wdth3 _ _ = ([], 1.0) | |
362 | ||
348 | 363 | {-DHUN| Returns a table header which represents the width of the columns of a table in units of the line width with the proper corrections for use in the a latex documents. If the first boolean input parameter is true the table is understood to be written in landscape mode. It also take a map of Int to Double. This is the list of the maximum width of columns determined by previous runs of latex on the table with only one column included per run. If second boolean parameter is true it is understood the the rule should be printed with the table, otherwise the table should be printed without rules DHUN-} |
349 | ||
364 | ||
350 | 365 | wdth2 :: Bool -> Map Int Double -> Bool -> String |
351 | 366 | wdth2 ls m b |
352 | 367 | | m /= Map.empty = |
356 | 371 | double2Float . (/ (linew2 ls))) |
357 | 372 | (Map.elems mm)) |
358 | 373 | where n = (maximum (Map.keys m)) |
359 | (mm,_) = wdth ls n m | |
374 | (mm, _) = wdth ls n m | |
360 | 375 | wdth2 _ _ _ = [] |
361 | ||
376 | ||
362 | 377 | {-DHUN| takes the list of maximum column widths created by previous runs of the latex compiler with only one columns included per run as map from Int to Double. Take the total number of columns of the table as Int. The table is understood to be printed in landscape mode if the boolean parameter is true. It returns a map from int to double representing the width of columns of the table to be used in the latex documents. So it takes raw widths. Which are just the width of the column if the width of the paper was infinite and return the width that fit on the finite width of the real paper DHUN-} |
363 | ||
364 | wdth :: Bool -> Int -> Map Int Double -> (Map Int Double,Double) | |
378 | ||
379 | wdth :: Bool -> Int -> Map Int Double -> (Map Int Double, Double) | |
365 | 380 | wdth ls n mm |
366 | 381 | = case |
367 | 382 | (Control.Monad.msum |
373 | 388 | mm)) |
374 | 389 | [0 .. (length (Map.keys mm))]))) |
375 | 390 | of |
376 | Just (x,Nothing) -> if (sum (Map.elems x)) < (linew2 ls) then | |
377 | (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x,1.0) else (x,1.0) | |
378 | Just (x,Just ddd) -> (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x, ddd) | |
379 | Nothing -> (myfill ((linew2 ls) / (fromIntegral n)) Map.empty,1.0) | |
380 | where | |
381 | hlp :: (Map Int Double, Int) -> Maybe ((Map Int Double),Maybe Double) | |
382 | hlp (m, i) | ((sum (Map.elems m)) :: Double)+((linew2 ls) / (fromIntegral n)) * (fromIntegral i)<(linew2 ls) = Just (myfill (((linew2 ls) - (sum (Map.elems m))) / ((fromIntegral i) :: Double)) m,Nothing) | |
383 | ||
391 | Just (x, Nothing) -> if (sum (Map.elems x)) < (linew2 ls) then | |
392 | (Map.map (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) x, 1.0) | |
393 | else (x, 1.0) | |
394 | Just (x, Just ddd) -> (Map.map | |
395 | (\ y -> y * ((linew2 ls) / (sum (Map.elems x)))) | |
396 | x, | |
397 | ddd) | |
398 | Nothing -> (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, 1.0) | |
399 | where hlp :: | |
400 | (Map Int Double, Int) -> Maybe ((Map Int Double), Maybe Double) | |
401 | hlp (m, i) | |
402 | | ((sum (Map.elems m)) :: Double) + | |
403 | ((linew2 ls) / (fromIntegral n)) * (fromIntegral i) | |
404 | < (linew2 ls) | |
405 | = | |
406 | Just | |
407 | (myfill | |
408 | (((linew2 ls) - (sum (Map.elems m))) / | |
409 | ((fromIntegral i) :: Double)) | |
410 | m, | |
411 | Nothing) | |
384 | 412 | hlp (_, i) |
385 | | ((i == 3)||(i==n) ) = let (mmm,dd)=(wdth ls n (Map.map ((\x->x*0.95)) mm)) in Just (mmm,Just ((0.965)*dd)) | |
413 | | ((i == 3) || (i == n)) = | |
414 | let (mmm, dd) = (wdth ls n (Map.map ((\ x -> x * 0.95)) mm)) in | |
415 | Just (mmm, Just ((0.965) * dd)) | |
386 | 416 | hlp (m, i) |
387 | | ((sum (Map.elems m)) :: Double)+((linew2 ls) / (fromIntegral n)) * (fromIntegral i)>=(linew2 ls) = Nothing | |
388 | hlp _ = Just (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, Nothing) | |
389 | ||
417 | | ((sum (Map.elems m)) :: Double) + | |
418 | ((linew2 ls) / (fromIntegral n)) * (fromIntegral i) | |
419 | >= (linew2 ls) | |
420 | = Nothing | |
421 | hlp _ | |
422 | = Just (myfill ((linew2 ls) / (fromIntegral n)) Map.empty, Nothing) | |
423 | ||
390 | 424 | myfill :: Double -> Map Int Double -> Map Int Double |
391 | 425 | myfill x m = Map.union m (Map.fromList (zip [1 .. n] (repeat x))) |
392 | ||
426 | ||
393 | 427 | {-DHUN| In landscape mode everything has to be multiplied by a factor of two. If the boolean parameter is true it is understood that the table should be printed in landscape mode. This function return the width of the line in latex using the units of latex DHUN-} |
394 | ||
428 | ||
395 | 429 | linew2 :: Bool -> Double |
396 | 430 | linew2 ls = if ls then linew * 1.414 else linew |
397 | ||
431 | ||
398 | 432 | {-DHUN| The width of the line in A4 paper with DIV margin factor of 13 in latex own units DHUN-} |
399 | ||
433 | ||
400 | 434 | linew :: Double |
401 | 435 | linew = 455.45742 |
402 | ||
436 | ||
403 | 437 | hasTable :: [Anything a] -> Bool |
404 | 438 | hasTable ll = or $ map go ll |
405 | where go::Anything a->Bool | |
406 | go (Environment Wikitable _ _) =True | |
439 | where go :: Anything a -> Bool | |
440 | go (Environment Wikitable _ _) = True | |
407 | 441 | go (Environment _ _ l) = (hasTable l) |
408 | 442 | go _ = False |
409 | 443 | |
410 | ||
411 | 444 | {-DHUN| convert a table form the parse tree to latex. The [Anything Char] parameter it the contend of the table represented as a parse tree. The String parameter contains the HTML attributes of the table element, or in wiki notation the HTML parameters of the line beginning with {| . This is evaluated in order to find out whether rules should be printed in the table. The return type is Renderer String. Which means that it returns a string but also take a state as additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-} |
412 | ||
445 | ||
413 | 446 | tableToLaTeX :: [Anything Char] -> String -> Renderer String |
414 | 447 | tableToLaTeX l1 s |
415 | 448 | = do st <- get |
435 | 468 | Just t -> wdth2 lsc t sep |
436 | 469 | sep = seperatingLinesRequested s |
437 | 470 | hline = horizontalLine sep |
438 | (widths,fontscalefactor) | |
471 | (widths, fontscalefactor) | |
439 | 472 | = case Map.lookup tbno (tabmap st) of |
440 | Nothing -> (columnWidths l,1.0) | |
473 | Nothing -> (columnWidths l, 1.0) | |
441 | 474 | Just t -> wdth3 lsc t |
442 | 475 | env = tableEnvironment (getF st) |
443 | scriptsize = (isInfixOf2 "latexfontsize=\"scriptsize\"" s)||((numberOfColumns l) > 5) | |
476 | scriptsize | |
477 | = (isInfixOf2 "latexfontsize=\"scriptsize\"" s) || | |
478 | ((numberOfColumns l) > 5) | |
444 | 479 | sb = if scriptsize then "{\\scriptsize{}" else "" |
445 | 480 | se = if scriptsize then "}" else "" |
446 | lsc = (env == "longtable") && (((numberOfColumns l) > 100)|| (hasTable l1)) | |
481 | lsc | |
482 | = (env == "longtable") && | |
483 | (((numberOfColumns l) > 100) || (hasTable l1)) | |
447 | 484 | lsb = if lsc then "\\begin{landscape}\n" else "" |
448 | 485 | lse = if lsc then "\n\\end{landscape}" else "" |
449 | 486 | tbno = (length (tablist st)) + 1 |
464 | 501 | lsb ++ |
465 | 502 | sb ++ |
466 | 503 | (if (env /= "tabular") then "\n" else "\\scalebox{0.85}{") ++ |
467 | (if fontscalefactor==1.0 then "" else "{\\scalefont{"++(printf "%0.5f" fontscalefactor)++"}")++"\\begin{" ++ | |
468 | env ++ | |
469 | "}{" ++ | |
470 | spec ++ | |
471 | "}" ++ | |
472 | hline ++ | |
473 | " \n" ++ | |
474 | t1 ++ | |
475 | (rowDelimiter sep) ++ " \n\\end{" ++ env ++ "}\n" ++ (if fontscalefactor==1.0 then "" else "}")++ (if (env /= "tabular") then "" else "}")++se ++ lse | |
504 | (if fontscalefactor == 1.0 then "" else | |
505 | "{\\scalefont{" ++ (printf "%0.5f" fontscalefactor) ++ "}") | |
506 | ++ | |
507 | "\\begin{" ++ | |
508 | env ++ | |
509 | "}{" ++ | |
510 | spec ++ | |
511 | "}" ++ | |
512 | hline ++ | |
513 | " \n" ++ | |
514 | t1 ++ | |
515 | (rowDelimiter sep) ++ | |
516 | " \n\\end{" ++ | |
517 | env ++ | |
518 | "}\n" ++ | |
519 | (if fontscalefactor == 1.0 then "" else "}") ++ | |
520 | (if (env /= "tabular") then "" else "}") ++ | |
521 | se ++ lse | |
476 | 522 | return r |
477 | ||
523 | ||
478 | 524 | {-DHUN| Converts an image from the parse tree to latex. The actual images is only referenced in the wiki source, as well as the parse tree, as well as the latex source. It takes a parse tree representation of the image as only input parameter. The return type is Renderer String. Which means that it returns a string but also take a state as additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-} |
479 | ||
525 | ||
480 | 526 | wikiImageToLaTeX :: [Anything Char] -> Renderer String |
481 | 527 | wikiImageToLaTeX l |
482 | 528 | = do st <- get |
543 | 589 | addit st |
544 | 590 | = if (getInTab st) > 0 then "" else |
545 | 591 | (if not (micro st) then "\\vspace{0.75cm}" else "") |
546 | ||
592 | ||
547 | 593 | {-DHUN| Returns the caption of a wikilink. Takes a parse tree representation of the wikilink and the current state of the renderer. Return the caption in LaTeX representation as string. A Wikilink is represented as [[FooBar]] in Wiki notation. DHUN-} |
548 | ||
594 | ||
549 | 595 | wikiLinkCaption :: [Anything Char] -> MyState -> String |
550 | 596 | wikiLinkCaption l st = if isCaption x then rebuild x else "" |
551 | 597 | where x = (treeToLaTeX (last (splitOn [C '|'] l)) st) |
552 | 598 | rebuild (':' : xs) = xs |
553 | 599 | rebuild b = b |
554 | ||
600 | ||
555 | 601 | {-DHUN| Returns the LaTeX representation of a wikilink. Takes a parse tree representation of the wikilink and the current state of the render. A Wikilink is represented as [[FooBar]] in Wiki notation. DHUN-} |
556 | ||
602 | ||
557 | 603 | wikiLinkToLaTeX :: [Anything Char] -> MyState -> String |
558 | 604 | wikiLinkToLaTeX l st |
559 | 605 | = case |
596 | 642 | killnl ('\n' : ('\n' : xs)) = killnl ('\n' : xs) |
597 | 643 | killnl (x : xs) = x : (killnl xs) |
598 | 644 | killnl [] = [] |
599 | ||
645 | ||
600 | 646 | {-DHUN| If repeated newlines appear in a string directly after each other. Each series of newlines is reduced to exactly one newline DHUN-} |
601 | ||
647 | ||
602 | 648 | killnl2 :: String -> String |
603 | 649 | killnl2 ('\n' : ('\n' : xs)) = killnl2 ('\n' : xs) |
604 | 650 | killnl2 ('\n' : xs) |
607 | 653 | post = (dropWhile (/= '\n') xs) |
608 | 654 | killnl2 (x : xs) = x : (killnl2 xs) |
609 | 655 | killnl2 [] = [] |
610 | ||
656 | ||
611 | 657 | {-DHUN| returns the caption of a link. A link is represented as [foobar.com mycaption] in wiki notation. It takes the parse tree representation of the link as first input parameter. The second input parameter is the current state of the renderer. The third parameter is the Uri scheme as string (See 'URI scheme' in the English wikipeda) usually this is 'http'. It returns the latex representation of the caption of the link as string DHUN-} |
612 | ||
658 | ||
613 | 659 | linkCaption :: |
614 | 660 | [Anything Char] -> MyState -> String -> Bool -> String |
615 | 661 | linkCaption l st s b |
616 | 662 | = case spl of |
617 | 663 | (_ : (gg : gs)) -> (treeToLaTeX |
618 | (concat (gg:(map (\ x -> (C ' ') : x) gs))) | |
664 | (concat (gg : (map (\ x -> (C ' ') : x) gs))) | |
619 | 665 | st) |
620 | 666 | _ -> if b then "" else s ++ (escapelink (linkLocation l)) |
621 | 667 | where spl = splitOn [C ' '] l |
622 | ||
668 | ||
623 | 669 | {-DHUN| returns the latex representation of a link. A link is represented as [foobar.com mycaption] in wiki notation. It takes the parse tree representation of the link as first input parameter. The second input parameter is the current state of the renderer. The third parameter is the Uri scheme as string (See 'URI scheme' in the English wikipeda) usually this is 'http'. It returns the latex representation of the link as string DHUN-} |
624 | ||
670 | ||
625 | 671 | linkToLaTeX :: [Anything Char] -> MyState -> String -> String |
626 | 672 | linkToLaTeX l st s |
627 | 673 | = if |
634 | 680 | where addit = if b then "fn" else "" |
635 | 681 | b = getInFootnote st |
636 | 682 | cap = (linkCaption l st s b) |
637 | ||
683 | ||
638 | 684 | {-DHUN| takes a list and splits it into sublist of equal length, allowing a possible smaller length for the last list in case the devision does not create an integer result. DHUN-} |
639 | ||
685 | ||
640 | 686 | splitToTuples :: [a] -> [[a]] |
641 | 687 | splitToTuples x |
642 | 688 | = map (take galleryNumberOfColumns) . |
643 | 689 | takeWhile (not . null) . iterate (drop galleryNumberOfColumns) |
644 | 690 | $ x |
645 | ||
691 | ||
646 | 692 | {-DHUN| the number of column to be used in latex documents for mediawikis gallery (image gallery) (gallery tags) DHUN-} |
647 | ||
693 | ||
648 | 694 | galleryNumberOfColumns :: Int |
649 | 695 | galleryNumberOfColumns = 1 |
650 | ||
696 | ||
651 | 697 | {-DHUN| the width of a column for the table of the latex version of mediawikis gallery (image gallery, gallery tags) DHUN-} |
652 | ||
698 | ||
653 | 699 | galleryTableScale :: Float |
654 | 700 | galleryTableScale |
655 | 701 | = (1.0 / (fromIntegral galleryNumberOfColumns)) - (scalefactor 1.0) |
656 | ||
702 | ||
657 | 703 | {-DHUN| the latex string for a single column in table header in the latex version of mediawikis gallery (image gallery, gallery tag) DHUN-} |
658 | ||
704 | ||
659 | 705 | galleryTableSpecifier :: String |
660 | 706 | galleryTableSpecifier |
661 | 707 | = concat $ |
662 | 708 | replicate galleryNumberOfColumns |
663 | 709 | ">{\\RaggedRight}p{0.5\\linewidth}" |
664 | ||
710 | ||
665 | 711 | {-DHUN| converts the inner parts gallery (image gallery, gallery tag) from parse tree notation to latex, does not write the latex table header and footer. This is only a helper function. Always use galleryToLatex if you want to convert a gallery to latex DHUN-} |
666 | ||
712 | ||
667 | 713 | galleryContentToLatex :: [[Anything Char]] -> Renderer String |
668 | 714 | galleryContentToLatex (x : xs) |
669 | 715 | = do s <- galleryRowToLaTex x |
670 | 716 | ss <- galleryContentToLatex xs |
671 | 717 | return $ s ++ "\\\\ \n" ++ ss |
672 | 718 | galleryContentToLatex [] = return [] |
673 | ||
719 | ||
674 | 720 | {-DHUN| converts a part of a gallery (image gallery, gallery tag) from parse tree to latex. A part are as many elements as fit into a single row in the resulting latex table DHUN-} |
675 | ||
721 | ||
676 | 722 | galleryRowToLaTex :: [Anything Char] -> Renderer String |
677 | 723 | galleryRowToLaTex [] = return [] |
678 | 724 | galleryRowToLaTex (x : []) = treeToLaTeX2 [x] |
680 | 726 | = do s <- treeToLaTeX2 [x] |
681 | 727 | g <- galleryRowToLaTex xs |
682 | 728 | return $ s ++ "&" ++ g |
683 | ||
729 | ||
684 | 730 | {-DHUN| Converts are gallery (image gallery, gallery tag) from parse tree to latex. Also writes table header and footer. This is the function you should use for converting galleries to latex DHUN-} |
685 | ||
731 | ||
686 | 732 | galleryToLatex :: [Anything Char] -> Renderer String |
687 | 733 | galleryToLatex x |
688 | 734 | = do st <- get |
695 | 741 | return |
696 | 742 | ("\\begin{longtable}{" ++ |
697 | 743 | galleryTableSpecifier ++ "} \n" ++ s ++ "\\end{longtable}") |
698 | ||
744 | ||
699 | 745 | {-DHUN| A function to drop all unnecessary elements of an HTML image map, so that it can be converted to latex by calling treeToLaTeX2 DHUN-} |
700 | ||
746 | ||
701 | 747 | imageMapClean :: [Anything Char] -> [Anything Char] |
702 | 748 | imageMapClean ((Environment Wikilink s l) : xs) |
703 | 749 | = (Environment Wikilink s l) : imageMapClean xs |
704 | 750 | imageMapClean (_ : xs) = imageMapClean xs |
705 | 751 | imageMapClean [] = [] |
706 | ||
752 | ||
707 | 753 | {-DHUN| Takes the parse tree representation of an image, and returns the its size in unit of the width of a line in latex. Images 400px or wider in wiki notation are understood to use the full width of the line. Smaller one are considered fractionally. That means 100px means 0.25 the width of the line and 200px means 0.5 width of the line DHUN-} |
708 | ||
754 | ||
709 | 755 | imageSize :: [Anything Char] -> Float |
710 | 756 | imageSize l = if [] == x then 1.0 else (minimum x) |
711 | 757 | where x = map readImageSize (imageSizeStrings (shallowFlatten l)) |
712 | ||
758 | ||
713 | 759 | {-DHUN| takes a candidate string for the width of an image in the wikis px notation. Returns 1.0 if the candidate could not be parsed, returns the width of the image in units of the width of a line in latex otherwise. Images 400px or wider in wiki notation are understood to use the full width of the line. Smaller one are considered fractionally. That means 100px means 0.25 the width of the line and 200px means 0.5 width of the line DHUN-} |
714 | ||
760 | ||
715 | 761 | readImageSize :: String -> Float |
716 | 762 | readImageSize y |
717 | 763 | = case (reads x) of |
720 | 766 | where x = removex y |
721 | 767 | removex ('x' : zs) = zs |
722 | 768 | removex z = z |
723 | ||
769 | ||
724 | 770 | {-DHUN| takes a flattend version of a parse tree represendtation of an image and retruns a list of substrings which are candidates for representing the width of the image in the wikis px notation DHUN-} |
725 | ||
771 | ||
726 | 772 | imageSizeStrings :: String -> [String] |
727 | 773 | imageSizeStrings s |
728 | 774 | = [take (length (x) - 2) (x) | |
729 | 775 | x <- ((splitOn ['|'] s) :: [String]), isSuffixOf "px" x] |
730 | ||
776 | ||
731 | 777 | {-DHUN| converts a mathematical fomula from the wiki to latex notation DHUN-} |
732 | ||
778 | ||
733 | 779 | mathToLatex :: [Anything Char] -> String |
734 | 780 | mathToLatex l |
735 | 781 | = if isInfixOf2 "\\begin{alignat}" (shallowFlatten l) then |
736 | 782 | mathTransform l else "{$" ++ (mathTransform l) ++ "$}" |
737 | ||
783 | ||
738 | 784 | {-DHUN| a predicate that returns true if and only if the input is a parse tree that contains only spaces but no other structures DHUN-} |
739 | ||
785 | ||
740 | 786 | onlySpaces :: [Anything Char] -> Bool |
741 | 787 | onlySpaces ((C ' ') : xs) = onlySpaces xs |
742 | 788 | onlySpaces [] = True |
743 | 789 | onlySpaces _ = False |
744 | ||
790 | ||
745 | 791 | {-DHUN| in the wiki notation pipe (|)inside temples are escaped as {!} and double pipes as {!!}}. this function undoes this escaping in a parse tree DHUN-} |
746 | ||
792 | ||
747 | 793 | prepateParameter :: [Anything Char] -> [Anything Char] |
748 | 794 | prepateParameter ((Environment Template _ [C '!']) : xs) |
749 | 795 | = (C '|') : prepateParameter xs |
751 | 797 | = (C '|') : (C '|') : prepateParameter xs |
752 | 798 | prepateParameter (x : xs) = x : prepateParameter xs |
753 | 799 | prepateParameter [] = [] |
754 | ||
800 | ||
755 | 801 | {-DHUN| this function prepares a template as parse by the parser in the parse tree notation into a other notation that can be further processes by the latex renderer and the function templateToLatex in particular. The first input parameter is the parse tree notation of the template. The second is the name of the template as string. It returns a tuple, the first element of this tuple is the name of the template (stripped of heading an tailing white space) and the second element of the tuple is a map from strings to parse trees. The strings are the names of the parameters of the template. These might be just numbers represented as string but also any other strings DHUN-} |
756 | ||
802 | ||
757 | 803 | prepateTemplate :: |
758 | 804 | [Anything Char] -> String -> (String, Map String [Anything Char]) |
759 | 805 | prepateTemplate ll x = (trim x, enum ll 1 (Map.fromList [])) |
760 | where | |
761 | enum :: | |
806 | where enum :: | |
762 | 807 | [Anything Char] -> |
763 | 808 | Integer -> Map String [Anything Char] -> Map String [Anything Char] |
764 | 809 | enum ((Environment TemplateInside (Str "") l) : zs) i d |
767 | 812 | = enum zs i (Map.insert (trim z) (prepateParameter l) d) |
768 | 813 | enum [] _ d = d |
769 | 814 | enum (_ : zs) i d = enum zs i d |
770 | ||
815 | ||
771 | 816 | {-DHUN| converts a template from the wiki to latex. The first parameter is the parse tree representation of the template as generated by the parse the second is the name of the template. It returns a Renderer String. That is it returns the latex representation of the template, but also takes a state as an additional monadic input parameter and returns a possible changed version of it as additional return parameter monadically DHUN-} |
772 | ||
817 | ||
773 | 818 | templateToLatex :: [Anything Char] -> String -> Renderer String |
774 | 819 | templateToLatex l s |
775 | 820 | = state $ |
785 | 830 | st) |
786 | 831 | ((C 'B') : ((C '|') : xs)) -> (wikiLinkToLaTeX xs st, st) |
787 | 832 | _ -> swap $ templateProcessor st (prepateTemplate l s) |
788 | ||
789 | ||
790 | ||
791 | 833 | |
792 | 834 | mnfindentlatex :: Map String [Anything Char] -> Renderer String |
793 | mnfindentlatex ll = do one<-treeToLaTeX2 (Map.findWithDefault [] "1" ll) | |
794 | st<-get | |
795 | return ("\n\\begin{" ++ (itemEnvironmentName ":" (getF st)) ++ "}" ++ (itemEnvironmentParameters ":" (getF st)) ++ "\n\\item{}" ++ one ++ "\n\\end{" ++ (itemEnvironmentName ":" (getF st)) ++ "}\n") | |
796 | ||
835 | mnfindentlatex ll | |
836 | = do one <- treeToLaTeX2 (Map.findWithDefault [] "1" ll) | |
837 | st <- get | |
838 | return | |
839 | ("\n\\begin{" ++ | |
840 | (itemEnvironmentName ":" (getF st)) ++ | |
841 | "}" ++ | |
842 | (itemEnvironmentParameters ":" (getF st)) ++ | |
843 | "\n\\item{}" ++ | |
844 | one ++ "\n\\end{" ++ (itemEnvironmentName ":" (getF st)) ++ "}\n") | |
797 | 845 | |
798 | 846 | {-DHUN| function to converts wikipedias citearticle template to latex DHUN-} |
799 | ||
847 | ||
800 | 848 | citearticle :: Map String [Anything Char] -> Renderer String |
801 | 849 | citearticle ll |
802 | 850 | = state $ |
843 | 891 | (treeToLaTeX (Map.findWithDefault [] "month" ll) st) else |
844 | 892 | "") |
845 | 893 | ++ |
846 | (treeToLaTeX (Map.findWithDefault [] "year" ll) st) ++ | |
894 | (treeToLaTeX (Map.findWithDefault [] "year" ll) st) ++ | |
847 | 895 | (if Map.member "url" ll then |
848 | (treeToLaTeX (Map.findWithDefault [] "url" ll) st) else | |
849 | "")++"\n", st) | |
850 | ||
896 | (treeToLaTeX (Map.findWithDefault [] "url" ll) st) | |
897 | else "") | |
898 | ++ "\n", | |
899 | st) | |
900 | ||
851 | 901 | {-DHUN| removes source structures from a parse tree, keeping the source inside the source structure in the parse tree, so flattens out the source structure. You need this if some parse tree contains source code but you don't know whether or not it is inside a source tag DHUN-} |
852 | ||
902 | ||
853 | 903 | flattensource :: [Anything Char] -> [Anything Char] |
854 | 904 | flattensource ((Environment Source (TagAttr _ _) l) : xs) |
855 | 905 | = l ++ (flattensource xs) |
856 | 906 | flattensource (x : xs) = x : (flattensource xs) |
857 | 907 | flattensource [] = [] |
858 | ||
908 | ||
859 | 909 | {-DHUN| prepare code for printing in latex. takes the current state of the renderer as first parameter. takes the map version of the template containing the code as second parameter. returns the latex representation as string DHUN-} |
860 | ||
910 | ||
861 | 911 | trilex :: MyState -> Map String [Anything Char] -> String |
862 | 912 | trilex st ll = trilexgen st ll "code" |
863 | ||
913 | ||
864 | 914 | {-DHUN| prepare code for printing in latex. Takes the map version of the template containing the code as first parameter. returns the latex representation as string. It returns a Render String so that it can access the current state of the renderer as additional monadic input parameter DHUN-} |
865 | ||
915 | ||
866 | 916 | trilex2 :: Map String [Anything Char] -> Renderer String |
867 | 917 | trilex2 ll |
868 | 918 | = do st <- get |
869 | 919 | return $ trilexgen st ll "code" |
870 | ||
920 | ||
871 | 921 | {-DHUN| prepare code for printing in latex. takes the current state of the renderer as first parameter. takes the map version of the template containing the code as second parameter. takes the name of the parameter containing the actual source in the map as third parameter. returns the latex representation as string DHUN-} |
872 | ||
922 | ||
873 | 923 | trilexgen :: |
874 | 924 | MyState -> Map String [Anything Char] -> String -> String |
875 | 925 | trilexgen st ll code |
879 | 929 | (killnewline (flattensource (Map.findWithDefault [] code ll)))) |
880 | 930 | st) |
881 | 931 | else "" |
882 | where | |
883 | killnewline :: [Anything Char] -> [Anything Char] | |
932 | where killnewline :: [Anything Char] -> [Anything Char] | |
884 | 933 | killnewline ((C '\n') : xs) = killnewline xs |
885 | 934 | killnewline x = x |
886 | ||
935 | ||
887 | 936 | {-DHUN| analyzes a color in HTML notation. It returns a triple. The first element is a boolean. If it is true the color has got rgb hex notation and the third parameter contains the rgb latex notation. If it is false, the color is not rgb and hopefully a HTML color name, which is returned a second element of the tuple DHUN-} |
888 | ||
937 | ||
889 | 938 | colinfo :: String -> (Bool, String, String) |
890 | 939 | colinfo colcode |
891 | 940 | = case col of |
907 | 956 | of |
908 | 957 | Just x -> x |
909 | 958 | Nothing -> colcode |
910 | ||
959 | ||
911 | 960 | ss :: String -> [Integer] |
912 | 961 | ss (a : (b : xs)) = (maybeToList . unhex $ [a, b]) ++ (ss xs) |
913 | 962 | ss _ = [] |
914 | ||
963 | ||
915 | 964 | ss3 :: String -> [Integer] |
916 | 965 | ss3 (a : xs) = (maybeToList . unhex $ [a, a]) ++ (ss3 xs) |
917 | 966 | ss3 _ = [] |
918 | ||
967 | ||
919 | 968 | ss2 :: [Integer] -> [Float] |
920 | 969 | ss2 (x : xs) = ((fromInteger x) / 255.0) : ss2 xs |
921 | 970 | ss2 [] = [] |
922 | 971 | ss4 x = if (((length . ss) x) == 3) then ss x else ss3 x |
923 | ||
972 | ||
924 | 973 | prettyp2 :: [String] -> String |
925 | 974 | prettyp2 (x : []) = x |
926 | 975 | prettyp2 (x : xs) = x ++ "," ++ (prettyp2 xs) |
927 | 976 | prettyp2 [] = [] |
928 | ||
977 | ||
929 | 978 | prettyp :: [String] -> String |
930 | 979 | prettyp x = "{" ++ (prettyp2 x) ++ "}" |
931 | ||
980 | ||
932 | 981 | makecol :: Maybe String -> Maybe String |
933 | 982 | makecol x |
934 | 983 | = do xx <- x |
936 | 985 | return $ |
937 | 986 | prettyp ((map (printf "%0.5f") ((ss2 . ss4) xx)) :: [String]) |
938 | 987 | mypred x = (((length . ss) x) == 3) || (((length . ss3) x) == 3) |
939 | ||
988 | ||
940 | 989 | {-DHUN| and adapter to convert between the monadic and non monadic version of render. A function returning renderer string means that it returns a string but takes a state as additional monadic input parameter and returns the a possibly modified version of it as an additional monadic output parameter. This function takes a monadic return value. That is renderer String and returns its non monadic version. DHUN-} |
941 | ||
990 | ||
942 | 991 | tempProcAdapter :: |
943 | 992 | Renderer String -> (MyState -> (MyState, String)) |
944 | 993 | tempProcAdapter = (swap .) . runState |
945 | ||
994 | ||
946 | 995 | {-DHUN| function for key strokes templates in the blender wikibook DHUN-} |
947 | ||
996 | ||
948 | 997 | key :: [Char] -> [Char] |
949 | 998 | key "AKEY" = "A" |
950 | 999 | key "BKEY" = "B" |
976 | 1025 | key "SEMICOLON" = ";" |
977 | 1026 | key "NUM-" = "NUM-{}" |
978 | 1027 | key x = x |
979 | ||
1028 | ||
980 | 1029 | {-DHUN| converts a template to latex. Takes the current state of the render as first input parameter. The second input parameter is a tuple. Its first element is the name of the template as string. Its second element is a map, mapping the names of the parameters of the template to their parse tree representations, it returns a tuple. The First element is the possible change state of the renderer the second one is the latex representation of the template DHUN-} |
981 | ||
1030 | ||
982 | 1031 | templateProcessor :: |
983 | 1032 | MyState -> |
984 | 1033 | (String, Map String [Anything Char]) -> (MyState, String) |
986 | 1035 | = (st, |
987 | 1036 | "Main Page: " ++ |
988 | 1037 | (wikiLinkToLaTeX (Map.findWithDefault [] "1" ll) st)) |
989 | ||
990 | ||
991 | templateProcessor st ("#invoke:Mathe für Nicht-Freaks/Seite", _) | |
1038 | templateProcessor st ("#invoke:Mathe f\252r Nicht-Freaks/Seite", _) | |
992 | 1039 | = (st, "") |
993 | templateProcessor st ("#invoke:Liste", _) | |
994 | = (st, "") | |
995 | ||
1040 | templateProcessor st ("#invoke:Liste", _) = (st, "") | |
996 | 1041 | templateProcessor st ("!", _) = (st, "|") |
997 | 1042 | templateProcessor st ("!!", _) = (st, "||") |
998 | 1043 | templateProcessor st |
1122 | 1167 | where go (C x) = [x] |
1123 | 1168 | go _ = [] |
1124 | 1169 | templateProcessor st ("Komplexe Zahlen/ Vorlage:Formel", ll) |
1125 | = (st, (treeToLaTeX (shallowEnlargeMath (Map.findWithDefault [] "1" ll)) st) ) | |
1170 | = (st, | |
1171 | (treeToLaTeX (shallowEnlargeMath (Map.findWithDefault [] "1" ll)) | |
1172 | st)) | |
1126 | 1173 | templateProcessor st ("Ada/95/RM", ll) |
1127 | 1174 | = (st, |
1128 | 1175 | (linkToLaTeX |
1251 | 1298 | (treeToLaTeX (Map.findWithDefault [] "Aufgabe" ll) st) ++ |
1252 | 1299 | "\n {\\bfseries Musterl\246sung} \n" ++ |
1253 | 1300 | (treeToLaTeX (Map.findWithDefault [] "L\246sung" ll) st) ++ " \n")) |
1254 | templateProcessor st ("-", ll) = (tempProcAdapter $ mnfindentlatex ll) st | |
1301 | templateProcessor st ("-", ll) | |
1302 | = (tempProcAdapter $ mnfindentlatex ll) st | |
1255 | 1303 | templateProcessor st ("Haskell speaker 2", ll) = (st, param "1") |
1256 | 1304 | where param n = (treeToLaTeX (Map.findWithDefault [] n ll) st) |
1257 | 1305 | templateProcessor st ("Vorlage:LaTeX Mehrspaltig Anfang", ll) |
1270 | 1318 | templateProcessor st ("Referenzbox Internet", ll) |
1271 | 1319 | = (st, |
1272 | 1320 | "{\\bfseries Themenbezogene} " ++ |
1273 | (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++(map C "|Webangebote"))] st)) | |
1321 | (treeToLaTeX | |
1322 | [Environment Wikilink (Str "") | |
1323 | ((Map.findWithDefault [] "1" ll) ++ (map C "|Webangebote"))] | |
1324 | st)) | |
1274 | 1325 | templateProcessor st ("Vorlage:Referenzbox Internet", ll) |
1275 | 1326 | = (st, |
1276 | 1327 | "{\\bfseries Themenbezogene} " ++ |
1277 | (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++(map C "|Webangebote"))] st)) | |
1328 | (treeToLaTeX | |
1329 | [Environment Wikilink (Str "") | |
1330 | ((Map.findWithDefault [] "1" ll) ++ (map C "|Webangebote"))] | |
1331 | st)) | |
1278 | 1332 | templateProcessor st ("Referenzbox IntraReihe", ll) |
1279 | 1333 | = (st, |
1280 | 1334 | "{\\bfseries Zum anderen Band der " ++ |
1281 | (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++[C '|']++(Map.findWithDefault [] "2" ll))] st)++"}"++(treeToLaTeX (Map.findWithDefault [] "3" ll) st) ) | |
1282 | ||
1335 | (treeToLaTeX | |
1336 | [Environment Wikilink (Str "") | |
1337 | ((Map.findWithDefault [] "1" ll) ++ | |
1338 | [C '|'] ++ (Map.findWithDefault [] "2" ll))] | |
1339 | st) | |
1340 | ++ "}" ++ (treeToLaTeX (Map.findWithDefault [] "3" ll) st)) | |
1283 | 1341 | templateProcessor st ("Vorlage:Referenzbox IntraReihe", ll) |
1284 | 1342 | = (st, |
1285 | 1343 | "{\\bfseries Zum anderen Band der " ++ |
1286 | (treeToLaTeX [Environment Wikilink (Str "") ((Map.findWithDefault [] "1" ll)++[C '|']++(Map.findWithDefault [] "2" ll))] st)++"}"++(treeToLaTeX (Map.findWithDefault [] "3" ll) st) ) | |
1344 | (treeToLaTeX | |
1345 | [Environment Wikilink (Str "") | |
1346 | ((Map.findWithDefault [] "1" ll) ++ | |
1347 | [C '|'] ++ (Map.findWithDefault [] "2" ll))] | |
1348 | st) | |
1349 | ++ "}" ++ (treeToLaTeX (Map.findWithDefault [] "3" ll) st)) | |
1287 | 1350 | templateProcessor st ("unicode", ll) |
1288 | 1351 | = (st, (treeToLaTeX (Map.findWithDefault [] "1" ll) st)) |
1289 | 1352 | templateProcessor st |
1397 | 1460 | where heading |
1398 | 1461 | = if Map.member "info" ll then |
1399 | 1462 | treeToLaTeX (Map.findWithDefault [] "info" ll) st else "Hinweis" |
1400 | templateProcessor st ("Wie_mein_Buch_auf_die_Welt_kommt/_Vorlage:Zitat", ll) = templateProcessor st ("Zitat", ll) | |
1463 | templateProcessor st | |
1464 | ("Wie_mein_Buch_auf_die_Welt_kommt/_Vorlage:Zitat", ll) | |
1465 | = templateProcessor st ("Zitat", ll) | |
1401 | 1466 | templateProcessor st ("Zitat", ll) |
1402 | 1467 | = (st, |
1403 | 1468 | "\\begin{longtable}{|p{\\linewidth}|}\\hline\n" ++ |
1407 | 1472 | else "") |
1408 | 1473 | ++ |
1409 | 1474 | " \\uline{" ++ |
1410 | (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Autor" ll) "autor" ll)) st) ++ | |
1475 | (treeToLaTeX | |
1476 | ((Map.findWithDefault (Map.findWithDefault [] "Autor" ll) "autor" | |
1477 | ll)) | |
1478 | st) | |
1479 | ++ | |
1411 | 1480 | "}\\\\\\textit{\n" ++ |
1412 | (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Zitat" ll) "zitat" ll)) st) ++ | |
1481 | (treeToLaTeX | |
1482 | ((Map.findWithDefault (Map.findWithDefault [] "Zitat" ll) "zitat" | |
1483 | ll)) | |
1484 | st) | |
1485 | ++ | |
1413 | 1486 | "}\\scriptsize \\\\ \\RaggedLeft \\scriptsize \n" ++ |
1414 | (treeToLaTeX ((Map.findWithDefault (Map.findWithDefault [] "Quelle" ll) "quelle" ll)) st) ++ | |
1415 | "\\\\ \\hline \n\\end{longtable}\n") | |
1487 | (treeToLaTeX | |
1488 | ((Map.findWithDefault (Map.findWithDefault [] "Quelle" ll) "quelle" | |
1489 | ll)) | |
1490 | st) | |
1491 | ++ "\\\\ \\hline \n\\end{longtable}\n") | |
1416 | 1492 | templateProcessor st ("java web api", ll) |
1417 | 1493 | = (st, |
1418 | 1494 | "\\myhref{http://java.sun.com/javase/6/docs/api/" ++ |
1419 | 1495 | loc ++ "}{" ++ cap ++ "}") |
1420 | where | |
1421 | loc :: String | |
1496 | where loc :: String | |
1422 | 1497 | loc = (shallowFlatten (Map.findWithDefault [] "1" ll)) |
1423 | ||
1498 | ||
1424 | 1499 | cap :: String |
1425 | 1500 | cap |
1426 | 1501 | = "Java API: " ++ (treeToLaTeX (Map.findWithDefault [] "2" ll) st) |
1468 | 1543 | st) |
1469 | 1544 | ++ "}}\n") |
1470 | 1545 | templateProcessor st ("BigJava", ll) |
1471 | = tempProcAdapter (do x<-return ((replace2 (replace2 ("{\\ttfamily { " ++ ((formatLaTeXBlock defaultFormatOpts) (highlightAs "java" (shallowFlatten (map renormalize (breakLines3 96 (Map.findWithDefault [] "code" ll))))))++ "}}\n" )"\\begin{Shaded}" "") "\\end{Shaded}" "")) | |
1472 | lll<-mapM doFonts x | |
1473 | return (concat lll) ) st | |
1546 | = tempProcAdapter | |
1547 | (do x <- return | |
1548 | ((replace2 | |
1549 | (replace2 | |
1550 | ("{\\ttfamily { " ++ | |
1551 | ((formatLaTeXBlock defaultFormatOpts) | |
1552 | (highlightAs "java" | |
1553 | (shallowFlatten | |
1554 | (map renormalize | |
1555 | (breakLines3 96 (Map.findWithDefault [] "code" ll)))))) | |
1556 | ++ "}}\n") | |
1557 | "\\begin{Shaded}" | |
1558 | "") | |
1559 | "\\end{Shaded}" | |
1560 | "")) | |
1561 | lll <- mapM doFonts x | |
1562 | return (concat lll)) | |
1563 | st | |
1474 | 1564 | templateProcessor st |
1475 | 1565 | ("C++-Programmierung/ Vorlage:Kapitelanhang", ll) |
1476 | 1566 | = (st, |
1492 | 1582 | templateProcessor st ("code:Output", ll) |
1493 | 1583 | = (st, |
1494 | 1584 | ("{{" ++ |
1495 | ||
1496 | (if (Map.member "1" ll) then | |
1497 | "{ {" ++ | |
1498 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) | |
1499 | st) | |
1585 | (if (Map.member "1" ll) then | |
1586 | "{ {" ++ | |
1587 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) st) | |
1588 | ++ "}}" | |
1589 | else "") | |
1590 | ++ | |
1591 | "}}\n$\\text{ }$\\newline{}\n{\\bfseries Code}\\newline{}" ++ | |
1592 | (if (Map.member "2" ll) then | |
1593 | "{\\ttfamily {\\scriptsize" ++ | |
1594 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) st) | |
1500 | 1595 | ++ "}}" |
1501 | 1596 | else "") |
1597 | ++ | |
1598 | (if (Map.member "3" ll) then | |
1599 | "\n{\\bfseries Output}\\newline{}{\\ttfamily {\\scriptsize" ++ | |
1600 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) st) | |
1601 | ++ "}}" | |
1602 | else ""))) | |
1603 | templateProcessor st ("bcode:Example", ll) | |
1604 | = (st, | |
1605 | ("{\\bfseries Code}\\newline{}{\\ttfamily {\\scriptsize" ++ | |
1606 | (if (Map.member "1" ll) then | |
1607 | "{\\ttfamily {\\scriptsize" ++ | |
1608 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) st) | |
1609 | ++ "}}" | |
1610 | else "") | |
1502 | 1611 | ++ |
1503 | ||
1504 | "}}\n$\\text{ }$\\newline{}\n{\\bfseries Code}\\newline{}" ++ | |
1612 | "}}\n" ++ | |
1505 | 1613 | (if (Map.member "2" ll) then |
1506 | 1614 | "{\\ttfamily {\\scriptsize" ++ |
1507 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) | |
1508 | st) | |
1615 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) st) | |
1509 | 1616 | ++ "}}" |
1510 | 1617 | else "") |
1511 | ++ | |
1512 | (if (Map.member "3" ll) then | |
1513 | "\n{\\bfseries Output}\\newline{}{\\ttfamily {\\scriptsize" ++ | |
1514 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) | |
1515 | st) | |
1516 | ++ "}}" | |
1517 | else ""))) | |
1518 | ||
1519 | templateProcessor st ("bcode:Example", ll) | |
1520 | = (st, | |
1521 | ("{\\bfseries Code}\\newline{}{\\ttfamily {\\scriptsize" ++ | |
1522 | ||
1523 | (if (Map.member "1" ll) then | |
1524 | "{\\ttfamily {\\scriptsize" ++ | |
1525 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "1" ll)) | |
1526 | st) | |
1527 | ++ "}}" | |
1528 | else "") | |
1529 | ++ | |
1530 | ||
1531 | "}}\n" ++ | |
1532 | (if (Map.member "2" ll) then | |
1533 | "{\\ttfamily {\\scriptsize" ++ | |
1534 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "2" ll)) | |
1535 | st) | |
1536 | ++ "}}" | |
1537 | else "") | |
1538 | ++ | |
1539 | (if (Map.member "3" ll) then | |
1540 | "{\\ttfamily {\\scriptsize" ++ | |
1541 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) | |
1542 | st) | |
1543 | ++ "}}" | |
1544 | else ""))) | |
1545 | ||
1618 | ++ | |
1619 | (if (Map.member "3" ll) then | |
1620 | "{\\ttfamily {\\scriptsize" ++ | |
1621 | (treeToLaTeX (breakLines3 96 (Map.findWithDefault [] "3" ll)) st) | |
1622 | ++ "}}" | |
1623 | else ""))) | |
1546 | 1624 | templateProcessor st ("C++-Programmierung/ Vorlage:Code", ll) |
1547 | 1625 | = (st, |
1548 | 1626 | ("{\\ttfamily {\\scriptsize" ++ |
1559 | 1637 | st) |
1560 | 1638 | ++ "}}" |
1561 | 1639 | else ""))) |
1562 | ||
1563 | 1640 | templateProcessor st ("cite web", ll) = (st, mainer) |
1564 | 1641 | where mainer |
1565 | 1642 | = "\\myhref{" ++ |
1572 | 1649 | accessdate |
1573 | 1650 | = treeToLaTeX (Map.findWithDefault [] "accessdate" ll) st |
1574 | 1651 | templateProcessor st ("code", ll) = (st, mainer) |
1575 | where | |
1576 | mainer :: String | |
1652 | where mainer :: String | |
1577 | 1653 | mainer |
1578 | 1654 | = "\\TemplateCode{" ++ |
1579 | 1655 | header ++ |
1580 | 1656 | "}{" ++ "}{" ++ "}{" ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}" |
1581 | ||
1657 | ||
1582 | 1658 | header :: String |
1583 | 1659 | header |
1584 | 1660 | = if Map.member "header" ll then |
1585 | 1661 | (treeToLaTeX (Map.findWithDefault [] "header" ll) |
1586 | 1662 | st{getInCode = True}) |
1587 | 1663 | else "" |
1588 | ||
1664 | ||
1589 | 1665 | lang :: String |
1590 | 1666 | lang |
1591 | 1667 | = if Map.member "lang" ll then |
1593 | 1669 | st{getInCode = True}) |
1594 | 1670 | ++ " Source" |
1595 | 1671 | else "" |
1596 | ||
1672 | ||
1597 | 1673 | code :: String |
1598 | 1674 | code = trilexgen st{getInCode = True} ll "source" |
1599 | 1675 | templateProcessor st ("Java_Code_File", ll) = (st, mainer) |
1600 | where | |
1601 | mainer :: String | |
1676 | where mainer :: String | |
1602 | 1677 | mainer |
1603 | 1678 | = "\\TemplateCode{" ++ |
1604 | 1679 | header ++ |
1605 | 1680 | "}{" ++ "}{" ++ "}{" ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}" |
1606 | ||
1681 | ||
1607 | 1682 | header :: String |
1608 | 1683 | header |
1609 | 1684 | = if Map.member "header" ll then |
1610 | 1685 | (treeToLaTeX (Map.findWithDefault [] "header" ll) |
1611 | 1686 | st{getInCode = True}) |
1612 | 1687 | else "" |
1613 | ||
1688 | ||
1614 | 1689 | lang :: String |
1615 | 1690 | lang = if Map.member "lang" ll then "Java Source" else "" |
1616 | ||
1691 | ||
1617 | 1692 | code :: String |
1618 | 1693 | code = trilexgen st{getInCode = True} ll "source" |
1619 | 1694 | templateProcessor st ("Syntax", ll) = (st, mainer) |
1620 | where | |
1621 | mainer :: String | |
1695 | where mainer :: String | |
1622 | 1696 | mainer |
1623 | 1697 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1624 | 1698 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1625 | ||
1699 | ||
1626 | 1700 | code :: String |
1627 | 1701 | code = trilex st{getInCode = True} ll |
1628 | 1702 | templateProcessor st ("syntax", ll) = (st, mainer) |
1629 | where | |
1630 | mainer :: String | |
1703 | where mainer :: String | |
1631 | 1704 | mainer |
1632 | 1705 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1633 | 1706 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1634 | ||
1707 | ||
1635 | 1708 | code :: String |
1636 | 1709 | code = trilex st{getInCode = True} ll |
1637 | 1710 | templateProcessor st ("HaskellGHCi", ll) = (st, mainer) |
1638 | where | |
1639 | mainer :: String | |
1711 | where mainer :: String | |
1640 | 1712 | mainer |
1641 | 1713 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1642 | 1714 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1643 | ||
1715 | ||
1644 | 1716 | code :: String |
1645 | 1717 | code = trilexgen st{getInCode = True} ll "1" |
1646 | 1718 | templateProcessor st ("Java://", ll) = (st, mainer) |
1647 | where | |
1648 | mainer :: String | |
1719 | where mainer :: String | |
1649 | 1720 | mainer |
1650 | 1721 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1651 | 1722 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1652 | ||
1723 | ||
1653 | 1724 | code :: String |
1654 | 1725 | code = trilexgen st{getInCode = True} ll "1" |
1655 | 1726 | templateProcessor st ("LaTeX/Usage", ll) |
1673 | 1744 | (x, st2) |
1674 | 1745 | = runState (treeToLaTeX2 (Map.findWithDefault [] "render" ll)) st |
1675 | 1746 | templateProcessor st ("java://", ll) = (st, mainer) |
1676 | where | |
1677 | mainer :: String | |
1747 | where mainer :: String | |
1678 | 1748 | mainer |
1679 | 1749 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1680 | 1750 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1681 | ||
1751 | ||
1682 | 1752 | code :: String |
1683 | 1753 | code = trilexgen st{getInCode = True} ll "1" |
1684 | 1754 | templateProcessor st ("java", ll) = (st, mainer) |
1685 | where | |
1686 | mainer :: String | |
1755 | where mainer :: String | |
1687 | 1756 | mainer |
1688 | 1757 | = "\\begin{TemplateCodeInside}{}{\\baselineskip}{\\baselineskip}{}{}{}\n" |
1689 | 1758 | ++ code ++ "\n\\end{TemplateCodeInside}\n" |
1690 | ||
1759 | ||
1691 | 1760 | code :: String |
1692 | 1761 | code = trilex st{getInCode = True} ll |
1693 | 1762 | templateProcessor st ("DOI", ll) |
1813 | 1882 | = (tempProcAdapter $ citearticle ll) st |
1814 | 1883 | templateProcessor st ("cite news", ll) |
1815 | 1884 | = (tempProcAdapter $ citearticle ll) st |
1816 | ||
1817 | 1885 | templateProcessor st ("Druckversionsnotiz", _) = (st, "") |
1818 | ||
1819 | 1886 | templateProcessor st ("meta", ll) |
1820 | 1887 | = (st, |
1821 | 1888 | wikiLinkToLaTeX |
1852 | 1919 | (((C 's') : (C ':') : (Map.findWithDefault [] "1" ll)) ++ |
1853 | 1920 | [C '|'] ++ (Map.findWithDefault [] "2" ll)) |
1854 | 1921 | st) |
1855 | ||
1856 | 1922 | templateProcessor st ("wiktionary", ll) |
1857 | 1923 | = (st, |
1858 | 1924 | wikiLinkToLaTeX |
1859 | ((C 'w') :(C 'i'): (C 'k'): (C 't') : (C ':') : (Map.findWithDefault [] "1" ll)) | |
1925 | ((C 'w') : | |
1926 | (C 'i') : | |
1927 | (C 'k') : (C 't') : (C ':') : (Map.findWithDefault [] "1" ll)) | |
1860 | 1928 | st) |
1861 | templateProcessor st ("indent", ll) | |
1862 | = (st, go) | |
1863 | where | |
1864 | go = case reads (shallowFlatten (Map.findWithDefault [] "1" ll)) :: [(Integer,String)] of | |
1865 | [(n, _)] -> "\\newline{}"++(concat (genericReplicate n "{$\\text{ }$}")) | |
1929 | templateProcessor st ("indent", ll) = (st, go) | |
1930 | where go | |
1931 | = case | |
1932 | reads (shallowFlatten (Map.findWithDefault [] "1" ll)) :: | |
1933 | [(Integer, String)] | |
1934 | of | |
1935 | [(n, _)] -> "\\newline{}" ++ | |
1936 | (concat (genericReplicate n "{$\\text{ }$}")) | |
1866 | 1937 | _ -> "\\newline{}" |
1867 | 1938 | templateProcessor st ("wikipedia", ll) |
1868 | 1939 | = (st, |
1902 | 1973 | templateProcessor st ("Wiktionary", ll) |
1903 | 1974 | = (st, |
1904 | 1975 | wikiLinkToLaTeX |
1905 | ((map C "wiktionary") ++ ((C ':') : (Map.findWithDefault [] "1" ll))) | |
1976 | ((map C "wiktionary") ++ | |
1977 | ((C ':') : (Map.findWithDefault [] "1" ll))) | |
1906 | 1978 | st) |
1907 | ||
1908 | 1979 | templateProcessor st ("B3D:N2P/VTT1", ll) = (st2, r) |
1909 | 1980 | where (r, st2) |
1910 | 1981 | = runState |
1960 | 2031 | ll)) |
1961 | 2032 | st) |
1962 | 2033 | templateProcessor st ("Kasten", ll) |
1963 | = (st, "\\LaTeXZeroBoxTemplate{"++( | |
1964 | treeToLaTeX | |
1965 | (Map.findWithDefault (Map.findWithDefault [] "inhalt" ll) "1" ll) | |
1966 | st)++"}") | |
1967 | ||
2034 | = (st, | |
2035 | "\\LaTeXZeroBoxTemplate{" ++ | |
2036 | (treeToLaTeX | |
2037 | (Map.findWithDefault (Map.findWithDefault [] "inhalt" ll) "1" ll) | |
2038 | st) | |
2039 | ++ "}") | |
1968 | 2040 | templateProcessor st ("Druckversion Titeleintrag", ll) |
1969 | 2041 | = (st, |
1970 | 2042 | "\\pagebreak{}\\begin{longtable}{|p{0.3\\linewidth}|p{0.7\\linewidth}|}\\hline\n" |
2068 | 2140 | = (tempProcAdapter $ javakeyword xs ll "Java:") st |
2069 | 2141 | templateProcessor st ("Haskell lib", ll) |
2070 | 2142 | = (st, linkToLaTeX link st "") |
2071 | where | |
2072 | param :: String -> Maybe [Anything Char] | |
2143 | where param :: String -> Maybe [Anything Char] | |
2073 | 2144 | param name = Map.lookup name ll |
2074 | 2145 | package |
2075 | 2146 | = fromMaybe (map (C) "base") $ param "p" `mplus` param "package" |
2076 | 2147 | version |
2077 | 2148 | = fromMaybe (map (C) "4.1.0.0") $ param "v" `mplus` param "version" |
2078 | ||
2149 | ||
2079 | 2150 | unnamed :: Integer -> [Anything Char] |
2080 | 2151 | unnamed i = fromMaybe [] $ param (show i) |
2081 | 2152 | unnPars = takeWhile (not . null) $ map unnamed [1 ..] |
2108 | 2179 | "\\myhref{http://commons.wikimedia.org/wiki/" ++ |
2109 | 2180 | loc ++ "}{" ++ cap ++ "}") |
2110 | 2181 | where loc = (shallowFlatten (Map.findWithDefault [] "1" ll)) |
2111 | ||
2182 | ||
2112 | 2183 | cap :: String |
2113 | 2184 | cap |
2114 | 2185 | = if (Map.member "2" ll) then |
2119 | 2190 | "\\myhref{http://commons.wikimedia.org/wiki/" ++ |
2120 | 2191 | loc ++ "}{" ++ cap ++ "}") |
2121 | 2192 | where loc = (shallowFlatten (Map.findWithDefault [] "1" ll)) |
2122 | ||
2193 | ||
2123 | 2194 | cap :: String |
2124 | 2195 | cap |
2125 | 2196 | = if (Map.member "2" ll) then |
2130 | 2201 | "\\myhref{http://commons.wikimedia.org/wiki/Category:" ++ |
2131 | 2202 | loc ++ "}{" ++ cap ++ "}") |
2132 | 2203 | where loc = (shallowFlatten (Map.findWithDefault [] "1" ll)) |
2133 | ||
2204 | ||
2134 | 2205 | cap :: String |
2135 | 2206 | cap |
2136 | 2207 | = if (Map.member "2" ll) then |
2141 | 2212 | "\\myhref{http://commons.wikimedia.org/wiki/" ++ |
2142 | 2213 | loc ++ "}{" ++ cap ++ "}") |
2143 | 2214 | where loc = (shallowFlatten (Map.findWithDefault [] "1" ll)) |
2144 | ||
2215 | ||
2145 | 2216 | cap :: String |
2146 | 2217 | cap |
2147 | 2218 | = if (Map.member "2" ll) then |
2164 | 2235 | st) |
2165 | 2236 | templateProcessor st ("Reaktion", ll) |
2166 | 2237 | = (st, edukte ++ " $\\rightarrow$ " ++ produkte) |
2167 | where | |
2168 | reput :: [String] -> [[Anything Char]] -> [[Anything Char]] | |
2238 | where reput :: [String] -> [[Anything Char]] -> [[Anything Char]] | |
2169 | 2239 | reput (k : ks) out |
2170 | 2240 | = if Map.member k ll then |
2171 | 2241 | reput ks ((Map.findWithDefault [] k ll) : out) else reput ks out |
2172 | 2242 | reput [] out = out |
2173 | ||
2243 | ||
2174 | 2244 | myjoin :: [[Anything Char]] -> String -> String |
2175 | 2245 | myjoin (x : xs) acu |
2176 | 2246 | = if xs == [] then |
2189 | 2259 | = templateProcessor st ("C++-Programmierung/ Vorlage:Code", ll) |
2190 | 2260 | templateProcessor st ("Regal:Programmierung: Vorlage:Code", ll) |
2191 | 2261 | = (st, mainer) |
2192 | where | |
2193 | mainer :: String | |
2262 | where mainer :: String | |
2194 | 2263 | mainer |
2195 | 2264 | = "\\TemplateCode{" ++ |
2196 | 2265 | header ++ |
2199 | 2268 | "}{" ++ |
2200 | 2269 | marker ++ |
2201 | 2270 | "}{}{" ++ output ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}" |
2202 | ||
2271 | ||
2203 | 2272 | marker :: String |
2204 | 2273 | marker |
2205 | 2274 | = if Map.member "error" ll then "e" else |
2206 | 2275 | if Map.member "valid" ll then "valid" else "" |
2207 | ||
2276 | ||
2208 | 2277 | header :: String |
2209 | 2278 | header |
2210 | 2279 | = if Map.member "kopf" ll then |
2211 | 2280 | (treeToLaTeX (Map.findWithDefault [] "kopf" ll) |
2212 | 2281 | st{getInCode = True}) |
2213 | 2282 | else "" |
2214 | ||
2283 | ||
2215 | 2284 | lang :: String |
2216 | 2285 | lang |
2217 | 2286 | = if Map.member "lang" ll then |
2219 | 2288 | st{getInCode = True}) |
2220 | 2289 | ++ " Quelltext" |
2221 | 2290 | else "" |
2222 | ||
2291 | ||
2223 | 2292 | code :: String |
2224 | 2293 | code = trilex st{getInCode = True} ll |
2225 | ||
2294 | ||
2226 | 2295 | output :: String |
2227 | 2296 | output |
2228 | 2297 | = if Map.member "output" ll then |
2229 | 2298 | (treeToLaTeX (killnbsp (Map.findWithDefault [] "output" ll)) |
2230 | 2299 | st{getInCode = True}) |
2231 | 2300 | else "" |
2232 | ||
2301 | ||
2233 | 2302 | footer :: String |
2234 | 2303 | footer |
2235 | 2304 | = if Map.member "fuss" ll then |
2240 | 2309 | killnbsp x = x |
2241 | 2310 | templateProcessor st (x, ll) |
2242 | 2311 | = (tempProcAdapter $ unknownTemplate x ll) st |
2243 | ||
2312 | ||
2244 | 2313 | {-DHUN| This function is currently nearly unused, it is essentially the same as templateProcessor. But with this way of writing it down you can find out for which templates handler functions are registered. DHUN-} |
2245 | ||
2314 | ||
2246 | 2315 | templateRegistry :: |
2247 | 2316 | [(String, Map String [Anything Char] -> Renderer String)] |
2248 | 2317 | templateRegistry |
2249 | 2318 | = [("Regal:Programmierung: Vorlage:Code", |
2250 | 2319 | \ ll -> |
2251 | let | |
2252 | marker :: String | |
2320 | let marker :: String | |
2253 | 2321 | marker |
2254 | 2322 | = if Map.member "error" ll then "e" else |
2255 | 2323 | if Map.member "valid" ll then "valid" else "" |
2277 | 2345 | "}{" ++ |
2278 | 2346 | marker ++ |
2279 | 2347 | "}{" ++ output ++ "}{" ++ lang ++ "}{" ++ code ++ "}{}{}{}")] |
2280 | ||
2348 | ||
2281 | 2349 | {-DHUN| processing of Java keywords for the English wikibook on Java, each Java keyword got its own template there DHUN-} |
2282 | ||
2350 | ||
2283 | 2351 | javakeyword :: |
2284 | 2352 | [Char] -> Map String [Anything Char] -> [Char] -> Renderer String |
2285 | 2353 | javakeyword xs ll j |
2294 | 2362 | "abstract", "false", "else", "switch", "continue", "import", |
2295 | 2363 | "final", "break", "implements", "finally", "while", "string", |
2296 | 2364 | "float", "do", "for", "case", "default", "package", "this"]) |
2297 | ||
2365 | ||
2298 | 2366 | {-DHUN| Handler for the unknown template. That is the ones that no handler was registered for DHUN-} |
2299 | ||
2367 | ||
2300 | 2368 | unknownTemplate :: |
2301 | 2369 | String -> Map String [Anything Char] -> Renderer String |
2302 | 2370 | unknownTemplate xx ll2 |
2324 | 2392 | (drop 1 . nullinit $ show x) ++ |
2325 | 2393 | "\n\n" ++ "{" ++ (intercalate "}{" uparams) ++ "}" ++ "\n\n" |
2326 | 2394 | in fromMaybe unknown_sf maybe_known_sf |
2327 | ||
2395 | ||
2328 | 2396 | {-DHUN| helper function to generate image numbers for image in image galleries (gallery tag in mediawiki) the fist input parameter is the start number, the second the end number. A list of all numbers between start and end is returned DHUN-} |
2329 | ||
2397 | ||
2330 | 2398 | generateGINsHelper :: Int -> Int -> [Int] |
2331 | 2399 | generateGINsHelper b e |
2332 | 2400 | = if b == e then [] else b : (generateGINsHelper (b + 1) e) |
2333 | ||
2401 | ||
2334 | 2402 | {-DHUN| function to generate image numbers for image in image galleries (gallery tag in mediawiki). The renderer start before the start of the gallery is given as first parameter. The state of the renderer after the end of the gallery is given as second parameter. A list containing the numbers of the images in the gallery is returned DHUN-} |
2335 | ||
2403 | ||
2336 | 2404 | generateGalleryImageNumbers :: MyState -> MyState -> [Int] |
2337 | 2405 | generateGalleryImageNumbers oldst newst |
2338 | 2406 | = generateGINsHelper (getJ oldst) (getJ newst) |
2339 | ||
2407 | ||
2340 | 2408 | {-DHUN| strips center tags of the parse tree keeping the data inside the center tags in the parse tree, so just flattens out the center tags DHUN-} |
2341 | ||
2409 | ||
2342 | 2410 | uncenter :: [Anything t] -> [Anything t] |
2343 | 2411 | uncenter ((Environment Tag (TagAttr "center" _) l) : xs) |
2344 | 2412 | = l ++ (uncenter xs) |
2346 | 2414 | = (Environment e s (uncenter l)) : (uncenter xs) |
2347 | 2415 | uncenter (x : xs) = x : (uncenter xs) |
2348 | 2416 | uncenter [] = [] |
2349 | ||
2350 | ||
2351 | 2417 | |
2352 | 2418 | doFonts :: Char -> Renderer String |
2353 | 2419 | doFonts c |
2354 | = do st <- get | |
2355 | case (fontStack st) of | |
2356 | (x : _) -> if (getFont x c) == (font st) then return [c] | |
2357 | else | |
2358 | do put st{font = (getFont x c)} | |
2359 | return | |
2360 | ((fontsetter (getFont x c)) ++ (fontstyler x) ++ [c]) | |
2361 | _ -> return [c] | |
2420 | = do st <- get | |
2421 | case (fontStack st) of | |
2422 | (x : _) -> if (getFont x c) == (font st) then return [c] else | |
2423 | do put st{font = (getFont x c)} | |
2424 | return ((fontsetter (getFont x c)) ++ (fontstyler x) ++ [c]) | |
2425 | _ -> return [c] | |
2362 | 2426 | |
2363 | 2427 | {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns the latex representation of the tree as return value. This function should only be used internally in latex renderer since it does not generate the table of names references for the ref tags. DHUN-} |
2364 | ||
2428 | ||
2365 | 2429 | treeToLaTeX :: [Anything Char] -> MyState -> String |
2366 | 2430 | treeToLaTeX l states = fst $ runState (treeToLaTeX2 l) states |
2367 | ||
2431 | ||
2368 | 2432 | {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns a tuple. the first element is the latex representation of the tree. The second is the new state of the renders. Does one run before the actual run, in order to generate a table of names references for the ref tags in mediawiki. This function should be called by the main program after the parser. DHUN-} |
2369 | ||
2433 | ||
2370 | 2434 | treeToLaTeX3 :: [Anything Char] -> MyState -> (String, MyState) |
2371 | 2435 | treeToLaTeX3 l st = runState ttl2twice st |
2372 | 2436 | where ttl2twice |
2374 | 2438 | b <- get |
2375 | 2439 | put st{fndict = fndict b} |
2376 | 2440 | treeToLaTeX2 l |
2377 | ||
2441 | ||
2378 | 2442 | {-DHUN| converts a parse tree to latex. Takes the parse tree as first parameter. Takes the current state of the renderer as second input parameter. Returns the latex representation of the tree as Renderer String. So it actually takes the current state of the renderer as additional monadic input parameter and returns a possible modified version of it as additional monadic return parameter. This function should only be used internally in latex renderer since it does not generate the table of names references for the ref tags. DHUN-} |
2379 | ||
2443 | ||
2380 | 2444 | treeToLaTeX2 :: [Anything Char] -> Renderer String |
2381 | 2445 | treeToLaTeX2 ll |
2382 | 2446 | = do x <- allinfo |
2383 | 2447 | return $ concat x |
2384 | where | |
2385 | allinfo :: Renderer [String] | |
2448 | where allinfo :: Renderer [String] | |
2386 | 2449 | allinfo = mapM nodeToLaTeX (removeBr ll) |
2387 | ||
2450 | ||
2388 | 2451 | walk :: String -> [Anything Char] -> String -> Renderer String |
2389 | 2452 | walk prefix l postfix |
2390 | 2453 | = do d <- treeToLaTeX2 l |
2391 | 2454 | return $ prefix ++ d ++ postfix |
2392 | ||
2455 | ||
2393 | 2456 | walktrim :: String -> [Anything Char] -> String -> Renderer String |
2394 | 2457 | walktrim prefix l postfix |
2395 | 2458 | = do st <- get |
2398 | 2461 | st2 <- get |
2399 | 2462 | put $ st2{getInHeading = getInHeading st} |
2400 | 2463 | return $ prefix ++ (trim d) ++ postfix |
2401 | ||
2464 | ||
2402 | 2465 | walkbf :: [Anything Char] -> Renderer String |
2403 | 2466 | walkbf l |
2404 | 2467 | = do st <- get |
2405 | 2468 | put $ |
2406 | st{lastFontChanged=True, fontStack = | |
2469 | st{lastFontChanged = True, | |
2470 | fontStack = | |
2407 | 2471 | ((fromMaybe |
2408 | 2472 | FontStyle{stylebase = Normal, bold = True, italic = False} |
2409 | 2473 | (maybeHead (fontStack st))){bold = True} |
2412 | 2476 | st2 <- get |
2413 | 2477 | put $ st2{fontStack = drop 1 (fontStack st2)} |
2414 | 2478 | return $ "{\\bfseries " ++ (trim d) ++ "}" |
2415 | ||
2479 | ||
2416 | 2480 | walkit :: [Anything Char] -> Renderer String |
2417 | 2481 | walkit l |
2418 | 2482 | = do st <- get |
2419 | 2483 | put $ |
2420 | st{lastFontChanged=True,fontStack = | |
2484 | st{lastFontChanged = True, | |
2485 | fontStack = | |
2421 | 2486 | ((fromMaybe |
2422 | 2487 | FontStyle{stylebase = Normal, bold = False, italic = True} |
2423 | 2488 | (maybeHead (fontStack st))){italic = True} |
2426 | 2491 | st2 <- get |
2427 | 2492 | put $ st2{fontStack = drop 1 (fontStack st2)} |
2428 | 2493 | return $ "{\\itshape " ++ (trim d) ++ "}" |
2429 | ||
2494 | ||
2430 | 2495 | walktt :: [Anything Char] -> Renderer String |
2431 | 2496 | walktt l |
2432 | 2497 | = do st <- get |
2433 | 2498 | put $ |
2434 | st{lastFontChanged=True,fontStack = | |
2499 | st{lastFontChanged = True, | |
2500 | fontStack = | |
2435 | 2501 | ((fromMaybe |
2436 | 2502 | FontStyle{stylebase = Mono, bold = False, italic = False} |
2437 | 2503 | (maybeHead (fontStack st))){stylebase = Mono} |
2440 | 2506 | st2 <- get |
2441 | 2507 | put $ st2{fontStack = drop 1 (fontStack st2)} |
2442 | 2508 | return $ "{\\ttfamily " ++ (trim d) ++ "}" |
2443 | ||
2509 | ||
2444 | 2510 | walkfn :: String -> [Anything Char] -> String -> Renderer String |
2445 | 2511 | walkfn prefix l postfix |
2446 | 2512 | = do st <- get |
2449 | 2515 | st2 <- get |
2450 | 2516 | put $ st2{getInFootnote = (getInFootnote st)} |
2451 | 2517 | return $ prefix ++ d ++ postfix |
2452 | ||
2518 | ||
2453 | 2519 | nodeToLaTeX :: Anything Char -> Renderer String |
2454 | 2520 | nodeToLaTeX (C c) |
2455 | 2521 | = do st <- get |
2456 | 2522 | case (fontStack st) of |
2457 | 2523 | (x : _) -> if (getFont x c) == (font st) then return (chartrans c) |
2458 | 2524 | else |
2459 | do put st{font = (getFont x c),lastFontChanged=(lastFontChanged st)&&( not (c==' ')) } | |
2525 | do put | |
2526 | st{font = (getFont x c), | |
2527 | lastFontChanged = (lastFontChanged st) && (not (c == ' '))} | |
2460 | 2528 | return |
2461 | ((if ((lastFontChanged st)&& (c==' ')) then "{$\\text{ }$}" else "")++(fontsetter (getFont x c)) ++ (fontstyler x) ++ (chartrans c)) | |
2529 | ((if ((lastFontChanged st) && (c == ' ')) then "{$\\text{ }$}" | |
2530 | else "") | |
2531 | ++ | |
2532 | (fontsetter (getFont x c)) ++ | |
2533 | (fontstyler x) ++ (chartrans c)) | |
2462 | 2534 | _ -> return (chartrans c) |
2463 | 2535 | nodeToLaTeX (Environment ForbiddenTag (Str s) _) |
2464 | 2536 | = return $ s >>= chartrans |
2529 | 2601 | = do prep <- treeToLaTeX2 . prepart $ v |
2530 | 2602 | post <- treeToLaTeX2 . postpart $ v |
2531 | 2603 | return (prep, post) |
2532 | ||
2604 | ||
2533 | 2605 | fulllist :: Renderer [(String, String)] |
2534 | 2606 | fulllist = mapM texit vv |
2535 | ||
2607 | ||
2536 | 2608 | vv :: [[Anything Char]] |
2537 | 2609 | vv = [x | x <- splitOn [Item ';'] l, x /= []] |
2538 | 2610 | prepart v = takeWhile ((/=) (C ':')) v |
2540 | 2612 | = case dropWhile ((/=) (C ':')) v of |
2541 | 2613 | (_ : xs) -> xs |
2542 | 2614 | x -> x |
2543 | ||
2615 | ||
2544 | 2616 | prolist :: [(String, String)] -> MyState -> String |
2545 | 2617 | prolist lis st |
2546 | 2618 | = do (prd, pod) <- lis |
2576 | 2648 | nodeToLaTeX (Item c) = return $ "\n" ++ (itemSeperator c) ++ " " |
2577 | 2649 | nodeToLaTeX (Environment Itemgroup _ l) = walk "" l "" |
2578 | 2650 | nodeToLaTeX (Environment Wikiheading (Str s) l) |
2579 | = do st<-get | |
2580 | if (getInTab st) > 0 then walktrim ("{\\Large ") (uncenter l) | |
2581 | ("}\n") else walktrim ("\\" ++ (getsec s) ++ "{") (uncenter l) ("}\n" ++ (getsecpost s)) | |
2651 | = do st <- get | |
2652 | if (getInTab st) > 0 then | |
2653 | walktrim ("{\\Large ") (uncenter l) ("}\n") else | |
2654 | walktrim ("\\" ++ (getsec s) ++ "{") (uncenter l) | |
2655 | ("}\n" ++ (getsecpost s)) | |
2582 | 2656 | nodeToLaTeX (Environment Tag (TagAttr ('h' : (x : [])) _) l) |
2583 | 2657 | = if x `elem` "123456" then |
2584 | 2658 | case reads [x] of |
2605 | 2679 | linkToLaTeX |
2606 | 2680 | ((map (C) |
2607 | 2681 | (case g of |
2608 | '/' : '/' : gx -> "http://" ++ gx | |
2682 | '/' : ('/' : gx) -> "http://" ++ gx | |
2609 | 2683 | '/' : _ -> wikiUrlDataToString (urld st) g |
2610 | 2684 | _ -> g)) |
2611 | 2685 | ++ [C ' '] ++ l) |
2770 | 2844 | = do let g = case reverse l of |
2771 | 2845 | [] -> [] |
2772 | 2846 | (x : xs) -> if x == (C '\n') then reverse xs else l |
2773 | let xg = case g of | |
2774 | (C '\n'): xs -> xs | |
2775 | _ -> g | |
2847 | let xg | |
2848 | = case g of | |
2849 | (C '\n') : xs -> xs | |
2850 | _ -> g | |
2776 | 2851 | let f = shallowFlatten (map renormalize (breakLines3 linewidth xg)) |
2777 | 2852 | let glines = (Map.lookup "line" a) /= Nothing |
2778 | let spg= splitOn "\n" f | |
2853 | let spg = splitOn "\n" f | |
2779 | 2854 | let spgl = length (show (length spg)) |
2780 | 2855 | let lino = linenumbers spgl 1 (length spg) |
2781 | let newlines = if glines then intercalate "\n" (map (\(k,v)->k++" "++v) (zip lino spg)) else f | |
2782 | d <- treeToLaTeX2 (breakLines3 linewidth (if glines then map C newlines else xg)) | |
2856 | let newlines | |
2857 | = if glines then | |
2858 | intercalate "\n" (map (\ (k, v) -> k ++ " " ++ v) (zip lino spg)) | |
2859 | else f | |
2860 | d <- treeToLaTeX2 | |
2861 | (breakLines3 linewidth (if glines then map C newlines else xg)) | |
2783 | 2862 | st <- get |
2784 | 2863 | case |
2785 | do aa <- Map.lookup "lang" a | |
2786 | guard (not (getInFootnote st)) | |
2787 | guard (not ((getInTab st) > 0)) | |
2788 | return aa | |
2789 | of | |
2790 | Just j -> do gg<-return $ '\n' : | |
2791 | replace2 (replace2 (replace2 | |
2792 | (replace2 | |
2793 | (replace2 | |
2794 | (replace2 | |
2864 | do aa <- Map.lookup "lang" a | |
2865 | guard (not (getInFootnote st)) | |
2866 | guard (not ((getInTab st) > 0)) | |
2867 | return aa | |
2868 | of | |
2869 | Just j -> do gg <- return $ | |
2870 | '\n' : | |
2871 | replace2 | |
2795 | 2872 | (replace2 |
2796 | 2873 | (replace2 |
2797 | 2874 | (replace2 |
2798 | 2875 | (replace2 |
2799 | 2876 | (replace2 |
2800 | 2877 | (replace2 |
2801 | ((formatLaTeXBlock defaultFormatOpts) | |
2802 | (highlightAs j newlines)) | |
2803 | "'" | |
2804 | "\\textquotesingle{}") | |
2805 | "\n" | |
2806 | "\\newline\n") | |
2807 | "{Shaded}\\newline\n" | |
2808 | "{Shaded}\n") | |
2809 | "{Highlighting}[]\\newline\n\\newline\n" | |
2878 | (replace2 | |
2879 | (replace2 | |
2880 | (replace2 | |
2881 | (replace2 | |
2882 | (replace2 | |
2883 | ((formatLaTeXBlock | |
2884 | defaultFormatOpts) | |
2885 | (highlightAs j | |
2886 | newlines)) | |
2887 | "'" | |
2888 | "\\textquotesingle{}") | |
2889 | "\n" | |
2890 | "\\newline\n") | |
2891 | "{Shaded}\\newline\n" | |
2892 | "{Shaded}\n") | |
2893 | "{Highlighting}[]\\newline\n\\newline\n" | |
2894 | "{Highlighting}[]\n") | |
2895 | "{Highlighting}\\newline\n" | |
2896 | "{Highlighting}\n") | |
2897 | " " | |
2898 | "\\ensuremath{\\text{ }}") | |
2899 | "%" | |
2900 | "\\%") | |
2901 | "$" | |
2902 | "\\$") | |
2903 | "{Highlighting}[]\\newline\n" | |
2810 | 2904 | "{Highlighting}[]\n") |
2811 | "{Highlighting}\\newline\n" | |
2812 | "{Highlighting}\n") | |
2813 | " " | |
2814 | "\\ensuremath{\\text{ }}") | |
2815 | "%" | |
2816 | "\\%") | |
2817 | "$" | |
2818 | "\\$") | |
2819 | "{Highlighting}[]\\newline\n" | |
2820 | "{Highlighting}[]\n") | |
2821 | "&" | |
2822 | "\\&") "_" "\\_") "^" "\\^{}" | |
2823 | lll<-mapM doFonts gg | |
2824 | return (concat lll) | |
2825 | Nothing -> return ("\\TemplateSource{" ++ (rtrim d) ++ "}\n") | |
2905 | "&" | |
2906 | "\\&") | |
2907 | "_" | |
2908 | "\\_") | |
2909 | "^" | |
2910 | "\\^{}" | |
2911 | lll <- mapM doFonts gg | |
2912 | return (concat lll) | |
2913 | Nothing -> return ("\\TemplateSource{" ++ (rtrim d) ++ "}\n") | |
2826 | 2914 | nodeToLaTeX (Environment Tag (TagAttr "font" a) l) |
2827 | 2915 | = if Map.member "style" a then |
2828 | 2916 | if |
2897 | 2985 | nodeToLaTeX (Environment Label (Str s) _) |
2898 | 2986 | = return $ "\\label{" ++ s ++ "}" |
2899 | 2987 | nodeToLaTeX _ = return [] |
2900 | ||
2988 | ||
2901 | 2989 | {-DHUN| Unicode escaping for latex strings DHUN-} |
2902 | ||
2990 | ||
2903 | 2991 | doUnicode :: String -> String |
2904 | 2992 | doUnicode ('\206' : ('\178' : xs)) |
2905 | 2993 | = "\\ensuremath{\\beta}" ++ doUnicode xs |
0 | 0 | {-DHUN| module storing information on image licensing on wikipedia wikimedia commons and so on. DHUN-} |
1 | 1 | module Licenses where |
2 | ||
2 | ||
3 | 3 | {-DHUN| a map (written as list) to map an url to a license to an abbriviated text of the license. In contrast to the map minlicenses in this module the presence of a leading http: is optional DHUN-} |
4 | ||
4 | ||
5 | 5 | licenses :: [(String, String)] |
6 | 6 | licenses = minlicenses ++ (map go minlicenses) |
7 | where | |
8 | go :: (String, String) -> (String, String) | |
7 | where go :: (String, String) -> (String, String) | |
9 | 8 | go (x, y) = ((drop 5) x, y) |
10 | ||
9 | ||
11 | 10 | {-DHUN| a map (written as list) to map an url to a license to an abbriviated text of the license DHUN-} |
12 | ||
11 | ||
13 | 12 | minlicenses :: [(String, String)] |
14 | 13 | minlicenses |
15 | 14 | = [("http://en.wikipedia.org/wiki/public_domain", "PD"), |
16 | 15 | ("https://en.wikipedia.org/wiki/Public_domain", "PD"), |
17 | ("https://de.wikibooks.org/wiki/GNU_Freie_Dokumentationslizenz", "GFDL"), | |
18 | ("https://de.wikipedia.org/wiki/GNU-Lizenz_f%C3%BCr_freie_Dokumentation", "GFDL"), | |
19 | ("https://de.wikipedia.org/wiki/GNU-Lizenz_für_freie_Dokumentation", "GFDL"), | |
20 | ||
16 | ("https://de.wikibooks.org/wiki/GNU_Freie_Dokumentationslizenz", | |
17 | "GFDL"), | |
18 | ("https://de.wikipedia.org/wiki/GNU-Lizenz_f%C3%BCr_freie_Dokumentation", | |
19 | "GFDL"), | |
20 | ("https://de.wikipedia.org/wiki/GNU-Lizenz_f\252r_freie_Dokumentation", | |
21 | "GFDL"), | |
21 | 22 | ("http://en.wikipedia.org/wiki/en:GNU_Free_Documentation_License", |
22 | 23 | "GFDL"), |
23 | 24 | ("http://en.wikipedia.org/wiki/en:GNU_General_Public_License", |
24 | 25 | "GPL"), |
25 | ("http://de.wikipedia.org/wiki/GNU_General_Public_License", | |
26 | "GPL"), | |
26 | ("http://de.wikipedia.org/wiki/GNU_General_Public_License", "GPL"), | |
27 | 27 | ("http://de.wikipedia.org/wiki/Gemeinfreiheit", "PD"), |
28 | 28 | ("http://en.wikipedia.org/wiki/de:Gemeinfreiheit", "PD"), |
29 | 29 | ("http://de.wikipedia.org/wiki/Gemeinfreiheit", "PD"), |
24 | 24 | import Network.URL |
25 | 25 | import Control.Monad.Except |
26 | 26 | import System.Process |
27 | ||
27 | ||
28 | 28 | notendyet :: |
29 | 29 | (String -> ImperativeMonad String) -> |
30 | 30 | ParsecT String () ImperativeMonad String -> |
43 | 43 | <|> |
44 | 44 | do a <- anyChar |
45 | 45 | notendyet action sstart eend (aku ++ [a]) |
46 | ||
46 | ||
47 | 47 | beginning :: |
48 | 48 | (String -> ImperativeMonad String) -> |
49 | 49 | ParsecT String () ImperativeMonad String -> |
57 | 57 | do _ <- sstart |
58 | 58 | ne <- notendyet action sstart eend [] |
59 | 59 | return (ne) |
60 | ||
60 | ||
61 | 61 | startToEnd :: |
62 | 62 | (String -> ImperativeMonad String) -> |
63 | 63 | ParsecT String () ImperativeMonad String -> |
72 | 72 | do a <- anyChar |
73 | 73 | s <- startToEnd action sstart eend |
74 | 74 | return (a : s) |
75 | ||
75 | ||
76 | 76 | zeroAction :: (Monad m) => t -> t1 -> m [Char] |
77 | 77 | zeroAction _ _ = return "" |
78 | ||
78 | ||
79 | 79 | runAction :: |
80 | 80 | String -> |
81 | 81 | String -> |
90 | 90 | case x of |
91 | 91 | Left _ -> return "" |
92 | 92 | Right xs -> return xs |
93 | ||
93 | ||
94 | 94 | chapterAction :: WikiUrl -> String -> ImperativeMonad String |
95 | 95 | chapterAction wurl text |
96 | 96 | = do pp <- liftIO (getpage d (wurl)) |
99 | 99 | noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) |
100 | 100 | _ -> return "" |
101 | 101 | where d = (trim (takeWhile (/= '|') text)) |
102 | ||
102 | ||
103 | 103 | chapterAction2 :: FullWikiUrl -> String -> ImperativeMonad String |
104 | 104 | chapterAction2 fu text |
105 | 105 | = do pp <- liftIO (getpage d (wurl)) |
111 | 111 | where e = (trim (takeWhile (/= '|') text)) |
112 | 112 | d = (removePrintVersion (lemma fu)) ++ "/" ++ e |
113 | 113 | wurl = wikiUrl fu |
114 | ||
115 | 114 | |
116 | 115 | chapterAction3 :: FullWikiUrl -> String -> ImperativeMonad String |
117 | 116 | chapterAction3 fu text |
127 | 126 | |
128 | 127 | includeAction :: WikiUrl -> String -> ImperativeMonad String |
129 | 128 | includeAction = qIncludeAction |
130 | ||
129 | ||
131 | 130 | qIncludeAction :: WikiUrl -> String -> ImperativeMonad String |
132 | 131 | qIncludeAction wurl text |
133 | = if isInfixOf "Vorlage" text then return ("{{"++text++"}}") | |
134 | else | |
135 | do pp <- (liftIO (print d)) >>liftIO (getpage d (wurl)) | |
132 | = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}") | |
133 | else | |
134 | do pp <- (liftIO (print d)) >> liftIO (getpage d (wurl)) | |
136 | 135 | case pp of |
137 | Just p -> do _ <- addContributors d Nothing | |
138 | noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) | |
139 | _ -> return "" | |
136 | Just p -> do _ <- addContributors d Nothing | |
137 | noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) | |
138 | _ -> return "" | |
140 | 139 | where d = (trim (takeWhile (/= '|') text)) |
141 | 140 | |
142 | 141 | qBookIncludeAction :: WikiUrl -> String -> ImperativeMonad String |
143 | 142 | qBookIncludeAction wurl text |
144 | = if isInfixOf "Vorlage" text then return ("{{"++text++"}}") | |
145 | else | |
146 | do pp <- (liftIO (print d)) >>liftIO (getBookpage d (wurl)) | |
143 | = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}") | |
144 | else | |
145 | do pp <- (liftIO (print d)) >> liftIO (getBookpage d (wurl)) | |
147 | 146 | case pp of |
148 | Just p -> do _ <- addContributors d Nothing | |
149 | x <- noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) | |
150 | st<- get | |
151 | systempdir <- liftIO getTemporaryDirectory | |
152 | tempdir <- liftIO $ createTempDirectory systempdir "MediaWiki2LaTeXParser" | |
153 | liftIO $ Tools.writeFile (tempdir </> "input") x | |
154 | _ <- liftIO $ system("mediawiki2latex -x "++( Hex.hex (show (fullconfigbase{compile=Just tempdir})))) | |
155 | t<-liftIO $ Tools.readFile (tempdir </> "output") | |
156 | put st{loadacu=((read t)++(loadacu st)::[Anything Char])} | |
157 | return x | |
158 | _ -> return "" | |
147 | Just p -> do _ <- addContributors d Nothing | |
148 | x <- noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) | |
149 | st <- get | |
150 | systempdir <- liftIO getTemporaryDirectory | |
151 | tempdir <- liftIO $ | |
152 | createTempDirectory systempdir "MediaWiki2LaTeXParser" | |
153 | liftIO $ Tools.writeFile (tempdir </> "input") x | |
154 | _ <- liftIO $ | |
155 | system | |
156 | ("mediawiki2latex -x " ++ | |
157 | (Hex.hex (show (fullconfigbase{compile = Just tempdir})))) | |
158 | t <- liftIO $ Tools.readFile (tempdir </> "output") | |
159 | put st{loadacu = ((read t) ++ (loadacu st) :: [Anything Char])} | |
160 | return x | |
161 | _ -> return "" | |
159 | 162 | where d = (trim (takeWhile (/= '|') text)) |
160 | 163 | |
161 | ||
162 | 164 | makeUrl :: String -> String -> String -> [Char] |
163 | 165 | makeUrl lang theFam thePage |
164 | 166 | = (unify . exportURL) |
182 | 184 | Absolute |
183 | 185 | (Host{protocol = HTTP True, host = "toolserver.org", |
184 | 186 | port = Nothing})})) |
185 | ||
187 | ||
186 | 188 | langau :: Map String String |
187 | 189 | langau |
188 | 190 | = fromList |
203 | 205 | ("ce", "\1071\1079\1076\1072\1088\1093\1086"), ("nl", "Auteurs"), |
204 | 206 | ("es", "Autores"), ("eu", "Egile"), ("fr", "Auteurs"), |
205 | 207 | ("cs", "Autor"), ("br", "Aozer")] |
206 | ||
208 | ||
207 | 209 | makeHeader :: FullWikiUrl -> Maybe String -> [Char] |
208 | 210 | makeHeader fullurl m |
209 | 211 | = let mmm = m >>= (\ yy -> Map.lookup yy langau) in |
222 | 224 | "\\label{Contributors}\n" ++ |
223 | 225 | "\\begin{longtable}{rp{0.6\\linewidth}}\n" ++ |
224 | 226 | "\\textbf{Edits}&\\textbf{User}\\\\\n" |
225 | ||
227 | ||
226 | 228 | makeHeaderHTML :: FullWikiUrl -> Maybe String -> [Char] |
227 | 229 | makeHeaderHTML fullurl m |
228 | 230 | = let mmm = m >>= (\ yy -> Map.lookup yy langau) in |
238 | 240 | _ -> "Contributors") |
239 | 241 | ++ |
240 | 242 | "</h2>\n" ++ |
241 | "<table rules=\"all\">" ++ | |
242 | "<tr><td>Edits</td><td>User</td></tr>" | |
243 | "<table rules=\"all\">" ++ "<tr><td>Edits</td><td>User</td></tr>" | |
243 | 244 | |
244 | 245 | makeBody :: (Ord t) => Map t Contributor -> URL -> [Char] |
245 | 246 | makeBody m u = concat (map go (sort (toList m))) |
251 | 252 | (concat |
252 | 253 | (map chartransforlink (exportURL (u{url_path = (fun (href v))})))) |
253 | 254 | ++ "}{" ++ (concat (map chartrans (name v))) ++ "}\\\\\n" |
254 | ||
255 | ||
255 | 256 | makeBodyHTML :: (Ord t) => Map t Contributor -> URL -> [Char] |
256 | 257 | makeBodyHTML m u = concat (map go (sort (toList m))) |
257 | 258 | where fun ('/' : xs) = xs |
258 | 259 | fun xs = xs |
259 | 260 | go (_, v) |
260 | = "<tr><td>"++(show (edits v)) ++ | |
261 | "</td><td><a href=\"" ++ | |
262 | (((exportURL (u{url_path = (fun (href v))})))) | |
263 | ++ "\">" ++ (concat (map chartrans (name v))) ++ "</a></td></tr>" | |
264 | ||
261 | = "<tr><td>" ++ | |
262 | (show (edits v)) ++ | |
263 | "</td><td><a href=\"" ++ | |
264 | (((exportURL (u{url_path = (fun (href v))})))) ++ | |
265 | "\">" ++ (concat (map chartrans (name v))) ++ "</a></td></tr>" | |
265 | 266 | |
266 | 267 | makeContributors :: Maybe URL -> ImperativeMonad (String, String) |
267 | 268 | makeContributors uu |
277 | 278 | = case (deepGet2 "html" (parseit minparsers yy)) of |
278 | 279 | ((Environment Tag (TagAttr _ m) _) : []) -> Map.lookup "lang" m |
279 | 280 | _ -> Nothing |
280 | return ( | |
281 | ((makeHeader (fullUrl st) lang) ++ | |
282 | (makeBody (myaudict) (url (fullUrl st))) ++ | |
283 | "\\end{longtable}\n" ++ "\\pagebreak\n"),(makeHeaderHTML (fullUrl st) lang) ++ | |
284 | (makeBodyHTML (myaudict) (url (fullUrl st))) ++"</table>") | |
285 | ||
281 | return | |
282 | (((makeHeader (fullUrl st) lang) ++ | |
283 | (makeBody (myaudict) (url (fullUrl st))) ++ | |
284 | "\\end{longtable}\n" ++ "\\pagebreak\n"), | |
285 | (makeHeaderHTML (fullUrl st) lang) ++ | |
286 | (makeBodyHTML (myaudict) (url (fullUrl st))) ++ "</table>") | |
287 | ||
286 | 288 | parseUrl :: String -> ImperativeMonad FullWikiUrl |
287 | 289 | parseUrl u |
288 | 290 | = case analyseFull u of |
289 | 291 | Just x -> return x |
290 | 292 | _ -> throwError (WikiUrlParseError u) |
291 | ||
293 | ||
292 | 294 | getContributors :: |
293 | 295 | [String] -> |
294 | ImperativeMonad | |
295 | (([(Map String Contributor)], [(Maybe String)])) | |
296 | ImperativeMonad (([(Map String Contributor)], [(Maybe String)])) | |
296 | 297 | getContributors u |
297 | 298 | = do st <- get |
298 | stz<- liftIO imperativeStateZero | |
299 | put stz {counter=counter st} | |
299 | stz <- liftIO imperativeStateZero | |
300 | put stz{counter = counter st} | |
300 | 301 | au <- mapM go u |
301 | 302 | newState <- get |
302 | 303 | put st |
307 | 308 | sst <- get |
308 | 309 | put sst{fullUrl = purl} |
309 | 310 | addContributors (lemma purl) (Just (UrlAnalyse.url purl)) |
310 | ||
311 | ||
311 | 312 | addContributors :: |
312 | 313 | [Char] -> Maybe URL -> ImperativeMonad ((Maybe String)) |
313 | 314 | addContributors theLemma uu |
314 | 315 | = do sst <- get |
315 | 316 | let st = fullUrl sst |
316 | 317 | thetheLemma <- liftIO $ return theLemma |
317 | thetheHostname <- liftIO $ return (hostname st) | |
318 | thetheHostname <- liftIO $ return (hostname st) | |
318 | 319 | thetheUU <- liftIO $ return uu |
319 | 320 | au <- (liftIO ((((fun sst)) thetheLemma thetheHostname thetheUU))) |
320 | 321 | :: ImperativeMonad ((Map String Contributor, Maybe String)) |
322 | 323 | lic <- liftIO (((return . snd)) au) |
323 | 324 | put sst{audict = auau : (audict sst)} |
324 | 325 | return lic |
325 | where fun ssst lem ho uuu | |
326 | where fun ssst lem ho uuu | |
326 | 327 | = do xx <- simpleContributors lem ho uuu ssst |
327 | 328 | return (Data.List.foldl runGo2 Map.empty xx, myvalue xx) |
328 | 329 | runGo2 mymap (author, theHref, theEdits, _) |
332 | 333 | = case yy of |
333 | 334 | [(_, __, _, Just lic)] -> (Just lic) |
334 | 335 | _ -> Nothing |
335 | ||
336 | ||
336 | 337 | infun :: |
337 | 338 | String -> |
338 | 339 | String -> Integer -> Maybe Contributor -> Maybe Contributor |
340 | 341 | = case xx of |
341 | 342 | Nothing -> Just Contributor{name = a, href = h, edits = e} |
342 | 343 | Just old -> Just old{edits = (edits old) + e} |
343 | ||
344 | ||
344 | 345 | noinclude :: t -> String -> ImperativeMonad [Char] |
345 | 346 | noinclude wurl |
346 | 347 | = runAction "<noinclude>" "</noinclude>" (zeroAction wurl) |
347 | ||
348 | ||
348 | 349 | runActions :: FullWikiUrl -> String -> ImperativeMonad String |
349 | 350 | runActions fu text |
350 | 351 | = do x <- noinclude wurl text |
351 | y <- runAction "{{Druckversion Kapitel|" "}}" (chapterAction3 fu) | |
352 | x | |
352 | y <- runAction "{{Druckversion Kapitel|" "}}" (chapterAction3 fu) x | |
353 | 353 | v <- runAction "{{Print entry|" "}}" (chapterAction2 fu) y |
354 | 354 | z <- runAction "{{print entry|" "}}" (chapterAction2 fu) v |
355 | 355 | a <- runAction "{{Print entry|" "}}" (chapterAction wurl) z |
365 | 365 | _ <- runAction "{{:" "}}" (qIncludeAction wurl) i |
366 | 366 | runAction "{{:" "}}" (qIncludeAction wurl) j |
367 | 367 | where wurl = wikiUrl fu |
368 | ||
368 | ||
369 | 369 | runBookActions :: FullWikiUrl -> String -> ImperativeMonad String |
370 | 370 | runBookActions fu text |
371 | 371 | = do x <- noinclude wurl text |
372 | 372 | runAction "[[" "]]" (qBookIncludeAction wurl) x |
373 | 373 | where wurl = wikiUrl fu |
374 | ||
375 | 374 | |
376 | 375 | replacements :: String -> String |
377 | 376 | replacements x |
386 | 385 | "\n|") |
387 | 386 | "{{Fortran:Vorlage: Table}}" |
388 | 387 | "prettytable" |
389 | ||
388 | ||
390 | 389 | loadPlain :: ImperativeState -> Maybe URL -> ImperativeMonad [Char] |
391 | 390 | loadPlain st uu |
392 | 391 | = let fu = fullUrl st in |
405 | 404 | runBookActions fu p |
406 | 405 | _ -> throwError (DownloadError (lemma fu) (exportURL (url fu))) |
407 | 406 | |
408 | ||
409 | 407 | loadHTML :: ImperativeState -> ImperativeMonad String |
410 | 408 | loadHTML st |
411 | 409 | = let fu = fullUrl st in |
412 | do midst<-get | |
413 | (res,newst)<-liftIO (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst) | |
414 | case res of | |
415 | Right _->put newst | |
416 | _ -> return () | |
410 | do midst <- get | |
411 | (res, newst) <- liftIO | |
412 | (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst) | |
413 | case res of | |
414 | Right _ -> put newst | |
415 | _ -> return () | |
417 | 416 | x <- liftIO (geturl2 (exportURL (url fu))) |
418 | 417 | return . decode . unpack $ x |
419 | ||
420 | 418 | |
421 | 419 | loadBookHTML :: ImperativeState -> ImperativeMonad String |
422 | 420 | loadBookHTML st |
423 | 421 | = let fu = fullUrl st in |
424 | do midst<-get | |
425 | (res,newst)<-liftIO (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst) | |
426 | case res of | |
427 | Right _->put newst | |
428 | _ -> return () | |
422 | do midst <- get | |
423 | (res, newst) <- liftIO | |
424 | (runStateT (runExceptT (loadPlain st (Just (url fu)))) midst) | |
425 | case res of | |
426 | Right _ -> put newst | |
427 | _ -> return () | |
429 | 428 | x <- liftIO (geturl2 (exportURL (url fu))) |
430 | 429 | return . decode . unpack $ x |
431 | ||
432 | 430 | |
433 | 431 | loadMediaWiki :: ImperativeState -> ImperativeMonad [Char] |
434 | 432 | loadMediaWiki st |
457 | 455 | throwError |
458 | 456 | (DownloadError (lemma fu) (exportURL (url fu))) |
459 | 457 | _ -> throwError (DownloadError (lemma fu) (exportURL (url fu))) |
460 | ||
458 | ||
461 | 459 | {-DHUN| main function to download images form the wiki. It takes the RunMode as only parameter. In case of HTML the html from the website is loaded. In all other cases the wiki source text is downloaded. In case of ExpandedTemplates the templates are also expanded by mediawiki running on the wiki website DHUN-} |
462 | ||
460 | ||
463 | 461 | load :: RunMode -> ImperativeMonad String |
464 | 462 | load theRunMode |
465 | 463 | = do st <- get |
0 | 0 | {-DHUN| module for logging currently unused DHUN-} |
1 | 1 | module Logger where |
2 | 2 | import ImperativeState |
3 | ||
3 | ||
4 | 4 | {-DHUN| initilaize logging facility DHUN-} |
5 | ||
5 | ||
6 | 6 | minInit :: ImperativeMonad () |
7 | 7 | minInit = return () |
3 | 3 | import Network.URI |
4 | 4 | import Data.List.Split |
5 | 5 | import qualified Data.Map as Map |
6 | ||
6 | ||
7 | 7 | {-DHUN| Wikimedia project prefixes so 'de' from de.wikipedia.org DHUN-} |
8 | ||
8 | ||
9 | 9 | foreignPrefixes :: [String] |
10 | 10 | foreignPrefixes |
11 | 11 | = ["af", "als", "an", "roa-rup", "ast", "gn", "av", "ay", "az", |
31 | 31 | "kn", "ks", "ml", "mr", "ne", "new", "or", "pa", "sa", "si", "ta", |
32 | 32 | "te", "km", "lo", "th", "am", "ti", "iu", "chr", "ko", "ja", "zh", |
33 | 33 | "wuu", "lzh", "yue"] |
34 | ||
34 | ||
35 | 35 | {-DHUN| Wikimedia projects for interwiki links [[w:Foobar]] means en.wikipedia.org/wiki/Foobar DHUN-} |
36 | ||
36 | ||
37 | 37 | multilangwikis :: [(String, String)] |
38 | 38 | multilangwikis |
39 | 39 | = ([("w", "wikipedia"), ("wikipedia", "wikipedia"), |
47 | 47 | ++ |
48 | 48 | [("wikispecies", "wikispecies"), ("v", "wikiversity"), |
49 | 49 | ("wikiversity", "wikiversity")]) |
50 | ||
50 | ||
51 | 51 | {-DHUN| Wikimedia projects for interwiki links to wikis which only have got a single language version DHUN-} |
52 | ||
52 | ||
53 | 53 | singlelangwikis :: [(String, String)] |
54 | 54 | singlelangwikis |
55 | 55 | = [("wikimedia", "wikimediafoundation"), |
56 | 56 | ("foundation", "wikimediafoundation"), |
57 | 57 | ("wmf", "wikimediafoundation"), ("mw", "mediawiki")] |
58 | ||
58 | ||
59 | 59 | {-DHUN| Wikimedia projects for interwiki links to wikis which only have got a single language version DHUN-} |
60 | ||
60 | ||
61 | 61 | wikimediasingellangwikis :: [(String, String)] |
62 | 62 | wikimediasingellangwikis |
63 | 63 | = [("commons", "commons"), ("metawikipedia", "meta"), |
64 | 64 | ("meta", "meta"), ("m", "meta"), ("incubator", "incubator"), |
65 | 65 | ("strategy", "strategy")] |
66 | ||
66 | ||
67 | 67 | {-DHUN| All Wikis DHUN-} |
68 | ||
68 | ||
69 | 69 | allwikis :: [(String, String)] |
70 | 70 | allwikis |
71 | 71 | = multilangwikis ++ singlelangwikis ++ wikimediasingellangwikis |
72 | ||
72 | ||
73 | 73 | {-DHUN| Prefixes for including images in wikis DHUN-} |
74 | ||
74 | ||
75 | 75 | imgtags :: [[Char]] |
76 | 76 | imgtags |
77 | 77 | = [map toLower x | |
95 | 95 | "\1057\1083\1080\1082\1072", "Bild", "\3652\3615\3621\3660", |
96 | 96 | "Talaksan", "Dosya", "\1496\1506\1511\1506", "\22294\20687", |
97 | 97 | "\22294\20687"]] |
98 | ||
98 | ||
99 | 99 | {-DHUN| lower Greek letter for HTML entity to latex so δ to \\delta DHUN-} |
100 | ||
100 | ||
101 | 101 | lowergreek :: [[Char]] |
102 | 102 | lowergreek |
103 | 103 | = ["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", |
104 | 104 | "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "pi", "rho", |
105 | 105 | "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega"] |
106 | ||
106 | ||
107 | 107 | {-DHUN| not Greek but to be processed like the Greeks above DHUN-} |
108 | ||
108 | ||
109 | 109 | notsogreek :: [[Char]] |
110 | 110 | notsogreek = ["cap", "cup", "sim"] |
111 | ||
111 | ||
112 | 112 | {-DHUN| Full list of characters with Greek like processing explained above DHUN-} |
113 | ||
113 | ||
114 | 114 | greek :: [[Char]] |
115 | 115 | greek |
116 | 116 | = concat |
119 | 119 | (x : xs) -> return [(toUpper x) : xs] |
120 | 120 | [] -> return []) |
121 | 121 | ++ lowergreek ++ notsogreek |
122 | ||
122 | ||
123 | 123 | {-DHUN| HTML entities to latex DHUN-} |
124 | ||
124 | ||
125 | 125 | htmlchars :: [([Char], [Char])] |
126 | 126 | htmlchars |
127 | 127 | = [("thetasym", "{\\mbox{$\\vartheta$}}"), |
504 | 504 | ("zwnj", "{}"), ("zwj", ""), ("lrm", ""), ("rlm", ""), |
505 | 505 | ("gt", "{\\mbox{$>$}}"), ("Mu", "{\\mbox{$\\Mu$}}"), |
506 | 506 | ("#151", "{--}"), ("Nu", "{\\mbox{$\\Nu$}}"), ("frasl", "\8260")] |
507 | ||
507 | ||
508 | 508 | {-DHUN| get latex representation of HTML entity like & DHUN-} |
509 | ||
509 | ||
510 | 510 | getHtmlChar :: String -> String |
511 | 511 | getHtmlChar x = Map.findWithDefault [] x (Map.fromList htmlchars) |
512 | ||
512 | ||
513 | 513 | {-DHUN| a function to remove the taling print version string from urls (usually on wikibooks). This function is needed so the name of the book without the tailing print version string will be printed on the titlepage of the book DHUN-} |
514 | ||
514 | ||
515 | 515 | removePrintVersion :: [Char] -> [Char] |
516 | 516 | removePrintVersion lem |
517 | 517 | = fun |
518 | ["/Druckversion","/ Druckversion","/Print version", "/Complete Wikibook", "/All Chapters", | |
519 | "/Print Version", "/print version", "/Printable version", | |
520 | "/The Whole Book", "/print", ": Druckversion"] | |
518 | ["/Druckversion", "/ Druckversion", "/Print version", | |
519 | "/Complete Wikibook", "/All Chapters", "/Print Version", | |
520 | "/print version", "/Printable version", "/The Whole Book", | |
521 | "/print", ": Druckversion"] | |
521 | 522 | lem |
522 | 523 | where fun (y : ys) x |
523 | 524 | = case splitOn y x of |
524 | 525 | (z : _) -> fun ys z |
525 | 526 | _ -> [] |
526 | 527 | fun [] x = x |
527 | ||
528 | ||
528 | 529 | {-DHUN| Nearly all HTML tags DHUN-} |
529 | ||
530 | ||
530 | 531 | goodtags1 :: [[Char]] |
531 | 532 | goodtags1 |
532 | 533 | = ["includeonly", "references", "blockquote", "noinclude", |
536 | 537 | "strike", "object", "input", "center", "legend", "iframe", "small", |
537 | 538 | "video", "audio", "style", "input", "label", "tbody", "thead", |
538 | 539 | "title", "frame", "param", "base", "area", "font", "code", "span", |
539 | "abbr", "body", "link", "menu", "math", "meta", "samp", "cite", "head", | |
540 | "html", "poem", "form", "cite", "ref", "div", "pre", "sub", "sup", | |
541 | "big", "del", "map", "bdo", "var", "dfn", "kbd", "col", "ins", "bdi", | |
542 | "dir", "img", "h1", "h2", "h3", "h4", "h5", "h6", "li", "ul", "ol", | |
543 | "tt", "dd", "dl", "dt", "hr", "em", "b", "i", "s", "u", "p", "q", | |
544 | "a"] | |
545 | ||
540 | "abbr", "body", "link", "menu", "math", "meta", "samp", "cite", | |
541 | "head", "html", "poem", "form", "cite", "ref", "div", "pre", "sub", | |
542 | "sup", "big", "del", "map", "bdo", "var", "dfn", "kbd", "col", | |
543 | "ins", "bdi", "dir", "img", "h1", "h2", "h3", "h4", "h5", "h6", | |
544 | "li", "ul", "ol", "tt", "dd", "dl", "dt", "hr", "em", "b", "i", | |
545 | "s", "u", "p", "q", "a"] | |
546 | ||
546 | 547 | {-DHUN| HTML tags for tables rows in tables and so on, only lower case DHUN-} |
547 | ||
548 | ||
548 | 549 | tabletags :: [[Char]] |
549 | 550 | tabletags = ["table", "td", "th", "tr"] |
550 | ||
551 | ||
551 | 552 | {-DHUN| HTML tags for tables rows in tables and so on, lower case as well as upper case DHUN-} |
552 | ||
553 | ||
553 | 554 | listOfTableTags :: [[Char]] |
554 | 555 | listOfTableTags = tabletags ++ (map (map toUpper) tabletags) |
555 | ||
556 | ||
556 | 557 | {-DHUN| All HTML tags DHUN-} |
557 | ||
558 | ||
558 | 559 | listOfTags :: [[Char]] |
559 | 560 | listOfTags = goodtags1 ++ (map (map toUpper) goodtags1) |
560 | ||
561 | ||
561 | 562 | {-DHUN| Character escaping from Unicode to latex DHUN-} |
562 | ||
563 | ||
563 | 564 | chartrans :: Char -> String |
564 | 565 | chartrans '\'' = "\\textquotesingle{}" |
565 | 566 | chartrans '[' = "{$\\text{[}$}" |
578 | 579 | chartrans '<' = "<{}" |
579 | 580 | chartrans '>' = ">{}" |
580 | 581 | chartrans '-' = "-{}" |
581 | chartrans ' ' = "\\," | |
582 | chartrans '\8239' = "\\," | |
582 | 583 | chartrans c = c : [] |
583 | ||
584 | ||
584 | 585 | {-DHUN| Character escaping from Unicode to web links inside latex with the URL package DHUN-} |
585 | ||
586 | ||
586 | 587 | chartransforlink :: Char -> String |
587 | 588 | chartransforlink '&' = "\\&" |
588 | 589 | chartransforlink '%' = "\\%" |
0 | 0 | {-DHUN| A module providing all necessary types of a parse tree for the representation of source written in the MediaWiki markup language DHUN-} |
1 | 1 | module MediaWikiParseTree where |
2 | 2 | import Data.Map.Strict (Map) |
3 | ||
3 | ||
4 | 4 | {-DHUN| Lists the different environment possible in the mediawiki markup language and example of an environment is an HTML tag with everything included between the its opening and closing tags. Her is is called Tag DHUN-} |
5 | ||
5 | ||
6 | 6 | data EnvType = Wikilink |
7 | 7 | | IncludeOnly |
8 | 8 | | ImageMap |
50 | 50 | | Parameter |
51 | 51 | | NumHtml |
52 | 52 | deriving (Show, Eq, Read) |
53 | ||
53 | ||
54 | 54 | {-DHUN| A type representing a node in a the parse tree. Open and Close represent opening and closing bracket. They will be replace by environments (look at 'Environment' in this data structure) before the parse tree is processed further. The C represents a single character. S stands for a String. Tab is a special elements used like the tabulator character for line breaking purposes. Quad is similar to that. The Item... data construction are for processing itemization enumerations and so on and well be replace be environments before further processing DHUN-} |
55 | ||
55 | ||
56 | 56 | data Anything a = Environment EnvType StartData [Anything a] |
57 | 57 | | Open Int EnvType StartData Int |
58 | 58 | | Close Int EnvType |
63 | 63 | | Quad |
64 | 64 | | Tab |
65 | 65 | deriving (Show, Eq, Read) |
66 | ||
66 | ||
67 | 67 | {-DHUN| represents the result of a parser for the begin of an environment. A parser for an opening HTML tag is an example. TagAttr means tag with attributes. And is thus a string for the element and a map from string to string for it attributes. Str is a String. And Attr is key value pair and used for attribute in tables. DHUN-} |
68 | ||
68 | ||
69 | 69 | data StartData = Str [Char] |
70 | 70 | | TagAttr String (Map String String) |
71 | 71 | | Attr (String, String) |
13 | 13 | import Data.Maybe |
14 | 14 | import Network.URI |
15 | 15 | import WikiHelper |
16 | ||
16 | 17 | {-DHUN| flattens out the HTML 'a' tags. That is it replaces each 'a' element with its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'a' HTML elements flattened |DHUN-} |
17 | ||
18 | ||
18 | 19 | reducea :: [Anything Char] -> [Anything Char] |
19 | 20 | reducea ll = concat (map go ll) |
20 | where | |
21 | go :: Anything Char -> [Anything Char] | |
21 | where go :: Anything Char -> [Anything Char] | |
22 | 22 | go (Environment Tag (TagAttr "a" _) l) = l |
23 | 23 | go (Environment x y l) = [Environment x y (reducea l)] |
24 | 24 | go x = [x] |
25 | ||
25 | ||
26 | 26 | {-DHUN| flattens out the HTML 'div' tags, which have got a 'class' attributes present with the value 'noresize'. That is it replaces each 'div' element with the properties mentioned above by its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'div' HTML elements with the properties given above flattened |DHUN-} |
27 | ||
27 | ||
28 | 28 | reducediv :: [Anything Char] -> [Anything Char] |
29 | 29 | reducediv ll = concat (map go ll) |
30 | where | |
31 | go :: Anything Char -> [Anything Char] | |
30 | where go :: Anything Char -> [Anything Char] | |
32 | 31 | go (Environment Tag (TagAttr "div" m) l) |
33 | 32 | | (Map.lookup "class" m) == (Just "noresize") = l |
34 | 33 | go (Environment x y l) = [Environment x y (reducea l)] |
35 | 34 | go x = [x] |
36 | 35 | |
37 | 36 | {-DHUN| a function to get HTML elements out of a parse tree. The first parameter is name of the tag to be looked for. The second parameter is a key in the attributes of that element that has to be present for the element to be considered. The third parameter is a value that has to be found under the given key in the attributes of the element, in order for the element to be part of the returned output list. The fourth parameter is the parse tree. |DHUN-} |
38 | ||
37 | ||
39 | 38 | deepGet :: |
40 | 39 | [Char] -> String -> [Char] -> [Anything a] -> [Anything a] |
41 | 40 | deepGet t k v ll = concat $ map go ll |
46 | 45 | go _ = [] |
47 | 46 | |
48 | 47 | {-DHUN| flattens a part of the parse tree, that is takes the characters found in the tree and turns them into a string dropping all other information in the tree DHUN-} |
49 | ||
48 | ||
50 | 49 | deepFlatten :: [Anything t] -> [Anything t] |
51 | 50 | deepFlatten ll = concat $ map go ll |
52 | where go (Environment _ _ l) = (deepFlatten l) | |
51 | where go (Environment HtmlChar s l) = [Environment HtmlChar s l] | |
52 | go (Environment _ _ l) = (deepFlatten l) | |
53 | 53 | go x = [x] |
54 | 54 | |
55 | 55 | {-DHUN| converts a wiki source document to a parse tree to be converted to LaTeX be treeToLaTeX3. The first parameter is that list of parsers. That is the list of environments to be recognized by the parser. This is usually either only plain HTML, or HTML mixed with mediawiki markup. The second parameter is the source code to be parsed. This function returns a parse tree |DHUN-} |
56 | ||
56 | ||
57 | 57 | parseit :: [MyParser Char] -> String -> [Anything Char] |
58 | 58 | parseit pp x |
59 | 59 | = (parseit2 |
66 | 66 | [])) |
67 | 67 | ('\n' : x)) |
68 | 68 | |
69 | ||
70 | 69 | {-DHUN| helper function of parseit, not to be called directly. This function takes the parser for the grammar, in the sense of a parser of the parsec library, (so that is the final combined parser) as first argument. It takes the source code to be parsed (usually HTML of mediawiki markup mixed with HTML) as second and runs the parser on the source code. It returns the resulting parse tree. |DHUN-} |
71 | ||
70 | ||
72 | 71 | parseit2 :: Parser [Anything Char] -> String -> [Anything Char] |
73 | 72 | parseit2 p input |
74 | 73 | = case (parse p "" input) of |
76 | 75 | Right x -> x |
77 | 76 | |
78 | 77 | {-DHUN| A parser for one particular element of the mediawiki grammar DHUN-} |
79 | ||
78 | ||
80 | 79 | data MyParser tok = MyParser{bad :: |
81 | 80 | [Anything tok] -> GenParser tok () (), |
82 | 81 | start :: MyStack tok -> GenParser tok () StartData, |
84 | 83 | self :: EnvType, |
85 | 84 | modify :: StartData -> [Anything tok] -> [Anything tok], |
86 | 85 | reenv :: EnvType -> EnvType} |
87 | ||
86 | ||
88 | 87 | {-DHUN| the stack of the parser. See documentation on MyStackFrame in this module for details. DHUN-} |
89 | ||
88 | ||
90 | 89 | type MyStack tok = [MyStackFrame tok] |
91 | ||
90 | ||
92 | 91 | {-DHUN| A stack frame on the parsers stack. A stack frame represents an environment that was opened. So something like an opening HTML tag. The value endparser. Is a parser that should match exactly the closing bracket of the environment. The value startdata is the return value of the start parser of the enviroment. This stack frame is created immediately after the startparser of an environment has matches and is given the return value of that startparser as startdata. The value environment is the environment this stack frame belongs to in case of an HTML tag this would be Tag. See the type EnvType in the module MediaWikiParseTree for a full list of possible environments. In the parse tree that is finally generated each node with arbitrarily nested children has got an EnvType associated with it. The badparser is a parser that is repeatedly tries while processing the current environment, if it matches the current environment is considered to be invalid. Backtracking occurs an the characters currently under consideration are parser (possible very) different manner. So with badparser you can signal that an environment is invalid if a creating parser (the badparser) matches within the environment. The parserenumber is just a number that uniquely identifies each parsers in the list of parsers active for the whole parsing process. These numbers are usually generated by the remake function. The nestingdepth is a bit of a misnomer. It is a unique number for each stack frame. So each stack frame that is newly created gets a different number. DHUN-} |
93 | ||
92 | ||
94 | 93 | data MyStackFrame tok = MyStackFrame{endparser :: |
95 | 94 | GenParser tok () (), |
96 | 95 | startdata :: StartData, environment :: EnvType, |
97 | 96 | badparser :: [Anything tok] -> GenParser tok () (), |
98 | 97 | parsernumber :: Int, nestingdepth :: Int} |
99 | ||
98 | ||
100 | 99 | {-DHUN| takes a result returned by parseAnything3 and converts it into a parse tree for further processing. The only purpose of this function is to convert the notation of bracket. The bracket a denoted by the Open and Close parse tree elements of the type Anything be the function parseAnything3. The need to be converted to environments, that is node with children in the parse tree. The environments will be denoted by the 'Environment' data constructors of the type Anything|DHUN-} |
101 | ||
100 | ||
102 | 101 | decon2 :: |
103 | 102 | (Monad m) => |
104 | 103 | [(a1, MyParser a)] -> m (t, [Anything a]) -> m [Anything a] |
105 | 104 | decon2 l x |
106 | 105 | = do (_, s) <- x |
107 | 106 | return (findMatchingBrackets l (reverse s)) |
108 | ||
107 | ||
109 | 108 | {-DHUN| Usually bracket can be close in an order different from the reverse one in which they were opened. But for certain environments this is not allowed, and the order has to be strictly followed. This value is the list of those environments. DHUN-} |
110 | ||
109 | ||
111 | 110 | preserving :: [EnvType] |
112 | 111 | preserving |
113 | 112 | = [Math, Source, Comment, Gallery, NoWiki, NoInclude, BigMath, |
114 | 113 | Preformat, TableCap, TableRowSep, TableColSep, TableHeadColSep, |
115 | 114 | TemplateInside, Wikitable, TableTag] |
116 | ||
115 | ||
117 | 116 | {-DHUN| Helper function for parseAnyClosingBracket. Should not be called directly. The only parameter is the current parser stack. Returns the depth on the stack of the stack frame whose closing bracket matched. DHUN-} |
118 | ||
117 | ||
119 | 118 | parseAnyClosingBracket2 :: |
120 | 119 | (Show tok, Eq tok, Read tok) => |
121 | 120 | MyStack tok -> GenParser tok () Integer |
122 | 121 | parseAnyClosingBracket2 |
123 | 122 | = (parseAnyClosingBracket3 0) . (List.map (\ x -> endparser x)) |
124 | ||
123 | ||
125 | 124 | {-DHUN| Helper function for parseAnyClosingBracket. Should not be called directly. The this will take stack frame by stack frame of the stack. The first parameter is an integer and indicates how many stack frames have allready been take of the stack. Returns the depth on the stack of the stack frame whose closing bracket matched. DHUN-} |
126 | ||
125 | ||
127 | 126 | parseAnyClosingBracket3 :: |
128 | 127 | Integer -> [GenParser tok () ()] -> GenParser tok () Integer |
129 | 128 | parseAnyClosingBracket3 i (x : xs) |
132 | 131 | return i) |
133 | 132 | <|> (parseAnyClosingBracket3 (i + 1) xs) |
134 | 133 | parseAnyClosingBracket3 _ [] = pzero |
135 | ||
134 | ||
136 | 135 | {-DHUN| Remove the n'th elements from a list. n is an integer an given is first parameter. The list to be processed is given as second parameter DHUN-} |
137 | ||
136 | ||
138 | 137 | myremove :: Integer -> [a] -> [a] |
139 | 138 | myremove _ [] = [] |
140 | 139 | myremove 0 (_ : xs) = myremove (-1) xs |
141 | 140 | myremove i (x : xs) = x : (myremove (i - 1) xs) |
142 | ||
141 | ||
143 | 142 | {-DHUN| Enumerates a list of parsers. needed to prepare a list of parsers for use with parseAnything2 DHUN-} |
144 | ||
143 | ||
145 | 144 | remake :: [a] -> [(Int, a)] |
146 | 145 | remake x = zip (iterate (+ 1) 0) x |
147 | ||
146 | ||
148 | 147 | {-DHUN| predicate to test whether the current stack-frame-index is in a stack. The first parameter is the stack-frame-index the second parameter is the stack. Returns true if it could be found DHUN-} |
149 | ||
148 | ||
150 | 149 | isin :: |
151 | 150 | (Show tok, Eq tok, Read tok) => Int -> (MyStack tok) -> Bool |
152 | 151 | isin i s = i `elem` (List.map nestingdepth s) |
153 | ||
152 | ||
154 | 153 | {-DHUN| tries to parse exactly one specific opening bracket. The parameters are identical to the ones of parseAnyOpeningBracket, which the exception of the second parameter. The second parameter is that parser for the bracket currently under consideration. This function 'catch' the BBad 'exception' 'thrown' by parseAnything. In this case it returns pzero, causing the parser to backtrack. DHUN-} |
155 | ||
154 | ||
156 | 155 | parseSpecificOpeningBracket :: |
157 | 156 | (Show tok, Eq tok, Read tok) => |
158 | 157 | Int -> |
173 | 172 | case r of |
174 | 173 | BBad (ss, y) -> if isin v ss then pzero else return (BBad (ss, y)) |
175 | 174 | _ -> return r |
176 | ||
175 | ||
177 | 176 | {-DHUN| tried to parse any of the opening brackets given by the parsers passed as the third parameter. The first parameter is the stack number (see documentation of the parseAnything function for more details on that). The second parameter is the current parser stack (see documentation of the parseAnything2 function for more details on that). The forth parameter is the list of parsers to b taken into account by the general parsing process. In contrast the third parameter contains only a list of parsers that are allowed to match in the current step of the parsing process. The fifth parameter is the current parser output stream. That is the information returned by the parser up to the current step. It is kind of an accumulator for parser results. DHUN-} |
178 | ||
177 | ||
179 | 178 | parseAnyOpeningBracket :: |
180 | 179 | (Show tok, Eq tok, Read tok) => |
181 | 180 | Int -> |
188 | 187 | parseAnyOpeningBracket v s (x : xs) l i |
189 | 188 | = try (parseSpecificOpeningBracket v x s l i) <|> |
190 | 189 | parseAnyOpeningBracket v s xs l i |
191 | ||
190 | ||
192 | 191 | {-DHUN| insert a list of closing brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be close. The second parameter is the parser stack. It says which kind of brackets should be closed. It returns a parser output stream just containing the opening brackets DHUN-} |
193 | ||
192 | ||
194 | 193 | generateClosingBrackets :: |
195 | 194 | (Num a, Eq a, Show tok, Eq tok, Read tok) => |
196 | 195 | a -> MyStack tok -> [Anything tok] |
200 | 199 | = (Close (length xs) (environment s)) : |
201 | 200 | (generateClosingBrackets (mi - 1) xs) |
202 | 201 | generateClosingBrackets _ _ = [] |
203 | ||
202 | ||
204 | 203 | {-DHUN| insert a list of opening brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be opened. The second parameter is the parser stack. It says which kind of brackets should be opened. It returns a parser output stream just containing the opening brackets DHUN-} |
205 | ||
204 | ||
206 | 205 | generateOpeningBrackets :: |
207 | 206 | (Num a, Eq a, Show tok, Eq tok, Read tok) => |
208 | 207 | a -> MyStack tok -> [Anything tok] |
211 | 210 | = (Open (length xs) (environment s) (startdata s) (parsernumber s)) |
212 | 211 | : (generateOpeningBrackets (mi - 1) xs) |
213 | 212 | generateOpeningBrackets _ _ = [] |
214 | ||
213 | ||
215 | 214 | {-DHUN| a version of either with the difference that the left and right types are the same. RRight stands for sucessful parse of a token. BBad stands for parse failure in which the next possiblity is tried. DHUN-} |
216 | ||
215 | ||
217 | 216 | data Either2 b = RRight b |
218 | 217 | | BBad b |
219 | ||
218 | ||
220 | 219 | {-DHUN| tries to match any of the currently possible closing brackets. Brackets closed in a order different from the reverse to the one in which they were opened are usually possible. And exception are the so called preserving elements, they can only be closed in the correct order. In the general case of this kind of crossbracketing it is necessary to add some opening and closing brackets to the output stream and to take the right stack frame of the stack keeping all others on it in the right order. DHUN-} |
221 | ||
220 | ||
222 | 221 | parseAnyClosingBracket :: |
223 | 222 | (Show tok, Eq tok, Read tok) => |
224 | 223 | Int -> |
245 | 244 | ((generateClosingBrackets mi s) ++ |
246 | 245 | (reverse (generateOpeningBrackets mi ss)))) |
247 | 246 | ++ i) |
248 | ||
247 | ||
249 | 248 | {-DHUN| this function tries to match the bad parser of the current environment. If it matches it returns BBAD, otherwise it returns RRight. See also comment of the parserAnything function. DHUN-} |
250 | ||
249 | ||
251 | 250 | trybadparser :: |
252 | 251 | (Show tok, Eq tok, Read tok) => |
253 | 252 | MyStack tok -> |
260 | 259 | [] -> return False |
261 | 260 | if x == True then return (BBad (s, [])) else |
262 | 261 | return (RRight (s, [])) |
263 | ||
262 | ||
264 | 263 | {-DHUN| this is the main function of the parser which calls itself recursively. To run the parser you should not call this function directly but rather use parseAnything2. The parameter are the same as the parameters to the parameters to the function parseAnything2. So look at the documentation for their meaning. But there is one additional parameter namely the first one. This is the stack frame number, it is increase for every stack frame and never decreased this way each stack frame has got a unique identifier this way. An other difference is the return type this function returns the always same type as the function parseAnything2, but wrapped in the Either2 monad. The Either2 monad has an additional bit to signal whether the parse was good or bad. The bad bit signals so called bad parser of the current environment has matched signaling that the environment is to be considered invalid, and we have to backtrack. But what we do here is just stop paring and return a successful parse, but return the bad flag as set in the return type. This will propagate through to the parser that was trying to open the environment that caused the current problem. If that recognizes the problem it can flag the environment as failed by returning pzero. So again here we just return BBad. So we kind of throw an exception. And in parseSpecificOpeningBracket we will catch BBad and signal the actual problem by returning pzero and that way kick of backtracking. DHUN-} |
265 | ||
264 | ||
266 | 265 | parseAnything :: |
267 | 266 | (Show tok, Eq tok, Read tok) => |
268 | 267 | Int -> |
296 | 295 | i) |
297 | 296 | <|> return (BBad (s, i)) |
298 | 297 | [] -> pzero |
299 | ||
298 | ||
300 | 299 | {-DHUN| This is the main entry point of the parse. So the function you need to call when you want to convert the source into the parse tree. The first parameter is the stack. I usually should contain only and exactly the root stack frame. The second parameter is an enumerated list of parsers. You usually take a list like the list parsers from this module and enumerate it by running remake on it. So thats the list of environments the parser is able to recognize. The third parameter is the parse results that have been created so far. Since we are just starting the parse this has to be the empty list. The function returns a parser. See the documentation of the parse module for more details on the type GenParser. Roughly is means that this parser takes an input list whose items are of type tok and that the parsers does not have state (hence the void type '()') and return a tuple. The first elements of that tuple is a stack. Where a new stack frame is added to the stack for each new environment that is found to open by the parser, like an opening HTML tag. And the second elements of the tuple is a parse tree, that is a list of parse tree elements, where each parse tree element may contain sublists of parse tree element. This way it is a real tree. DHUN-} |
301 | ||
300 | ||
302 | 301 | parseAnything2 :: |
303 | 302 | (Show tok, Eq tok, Read tok) => |
304 | 303 | MyStack tok -> |
309 | 308 | case x of |
310 | 309 | BBad (_, b) -> return (s, b) |
311 | 310 | RRight b -> return b |
312 | ||
311 | ||
313 | 312 | {-DHUN| this find the matching closing bracket for an opening bracket. It returns a tuple. Its first element is the environment created form the given opening bracket together with its closing bracket and the content between opening and closing bracket. Its second elements is the remaining list of parsed elements, after the closing bracket. This list does still contain the Open and Close parser tree elements for opening and closing bracket, and those are not yet converted to environments. This function takes the list of parse tree elements after the opening bracket as first input parameter. It takes the index of the parser that created the opening bracket as second input parameter. That is the index created by the remake function in this module. It takes the size of the stack at the time when the opening bracket was found as third input parameter. It takes the EnvType of the environment of the opening bracket as fourth input parameter. It takes the StartData parse result associated with the opening bracket as fifth parameter. The sixth parameter is the accumulator an should be the empty list when calling this function externally. The seventh parameter is the remaining parse tree after the opening bracket without the opening and closing brackets converted to environments DHUN-} |
314 | ||
313 | ||
315 | 314 | findMatchingClosingBracket :: |
316 | 315 | [(a1, MyParser a)] -> |
317 | 316 | Int -> |
331 | 330 | = (Environment ((reenv (snd (l !! n))) e) s |
332 | 331 | ((modify (snd (l !! n))) s (findMatchingBrackets l (reverse b))), |
333 | 332 | []) |
334 | ||
333 | ||
335 | 334 | {-DHUN| run findMatchingBrackets on the inner part environment given as second parameter. This function takes the enumerated list of parsers created by remake as first input parameter DHUN-} |
336 | ||
335 | ||
337 | 336 | findMatchingBrackets2 :: |
338 | 337 | [(a1, MyParser a)] -> Anything a -> Anything a |
339 | 338 | findMatchingBrackets2 l (Environment e s b) |
340 | 339 | = Environment e s (findMatchingBrackets l b) |
341 | 340 | findMatchingBrackets2 l xs |
342 | 341 | = Environment Root (Str "") (findMatchingBrackets l [xs]) |
343 | ||
342 | ||
344 | 343 | {-DHUN| the parser (Anything3) creates a list of parser elements, which is not a tree. The environments which will form the nodes with children in the final tree are denoted as opening and closing brackets in this list. This function takes that list as second input parameter, finds matching pairs of opening and closing brackets and converts the to environments. The opening an closing brackets are already balanced because of the way Anything3 works, that means there is exactly one matching closing bracket for each opening one and they open and close in to proper order. This function takes the enumerated list of parsers as first input parameter, that is the same list also given to the function Anything. DHUN-} |
345 | ||
344 | ||
346 | 345 | findMatchingBrackets :: |
347 | 346 | [(a1, MyParser a)] -> [Anything a] -> [Anything a] |
348 | 347 | findMatchingBrackets l ((Open i e s n) : xs) |
350 | 349 | (findMatchingBrackets2 l t) : (findMatchingBrackets l xxs) |
351 | 350 | findMatchingBrackets l (x : xs) = x : (findMatchingBrackets l xs) |
352 | 351 | findMatchingBrackets _ [] = [] |
353 | ||
352 | ||
354 | 353 | {-DHUN| a list of environments. Most parsers use this list as their 'allowed' variable. Meaning that the parser is only allowed to match within the environments given in the 'allowed' list DHUN-} |
355 | ||
354 | ||
356 | 355 | everywhere :: [EnvType] |
357 | 356 | everywhere = [Wikitable] ++ everywheretbl |
358 | ||
357 | ||
359 | 358 | {-DHUN| list containing the Italic and Bold environments, see documentation on the list 'everywhere' in this module DHUN-} |
360 | ||
359 | ||
361 | 360 | bi :: [EnvType] |
362 | 361 | bi = [Italic, Bold] |
363 | ||
362 | ||
364 | 363 | {-DHUN| list containing the same environments as the list 'everywhere' except the Wikitable environment, see documentation on the list 'everywhere' in this module DHUN-} |
365 | ||
364 | ||
366 | 365 | everywheretbl :: [EnvType] |
367 | 366 | everywheretbl = bi ++ everywherebi |
368 | ||
367 | ||
369 | 368 | {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold and Italic, see documentation on the list 'everywhere' in this module DHUN-} |
370 | ||
369 | ||
371 | 370 | everywherebi :: [EnvType] |
372 | 371 | everywherebi = basicwhere ++ [Wikilink] |
373 | ||
372 | ||
374 | 373 | {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic and Wikilink see documentation on the list 'everywhere' in this module DHUN-} |
375 | ||
374 | ||
376 | 375 | basicwhere :: [EnvType] |
377 | 376 | basicwhere = [Link] ++ verybasicwhere |
378 | ||
377 | ||
379 | 378 | {-DHUN| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic, Wikilink and Link see documentation on the list 'everywhere' in this module DHUN-} |
380 | ||
379 | ||
381 | 380 | verybasicwhere :: [EnvType] |
382 | 381 | verybasicwhere |
383 | 382 | = [Itemgroup, Root, Wikiheading, TableCap, Chapter, Tag, TableTag, |
384 | 383 | TemplateInside, IncludeOnly] |
385 | ||
384 | ||
386 | 385 | {-DHUN| list containing the environments where the parser linkp is allowed to match. Currently this seems to be everywhere. So this possibly can go away DHUN-} |
387 | ||
386 | ||
388 | 387 | everywherel :: [EnvType] |
389 | 388 | everywherel = basicwhere ++ bi ++ [Wikitable, Wikilink] |
390 | ||
389 | ||
391 | 390 | {-DHUN| list containing the same environments as the list 'everywhere' except the Link environment see documentation on the list 'everywhere' in this module DHUN-} |
392 | ||
391 | ||
393 | 392 | everywherel2 :: [EnvType] |
394 | 393 | everywherel2 = verybasicwhere ++ bi ++ [Wikitable, Wikilink] |
395 | ||
394 | ||
396 | 395 | {-DHUN| list containing the TableColSep and TableHeadColSep environments, see documentation on the list 'everywhere' in this module. the environments mean table header column separator and table column separator DHUN-} |
397 | ||
396 | ||
398 | 397 | wikilinkwhere :: [EnvType] |
399 | 398 | wikilinkwhere = [TableColSep, TableHeadColSep] |
400 | ||
399 | ||
401 | 400 | {-DHUN| the list of parsers needed for processing the HTML output created by MediaWiki DHUN-} |
402 | ||
401 | ||
403 | 402 | minparsers :: [MyParser Char] |
404 | 403 | minparsers |
405 | 404 | = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser, |
406 | 405 | pagebreakp, htmlcharp, p302p, attrp, greekp, brparser, mytablep, |
407 | mytrsepp, mytcolsepp, mytcapp, mythcolsepp, annop, tagparser, tagparserp, | |
408 | tagparser2, tagparser2p, tagparsert, tagparsert, tagparser2t, | |
409 | tagparsers, stagparser, commentp, numhtmlp, rtagparser] | |
410 | ||
406 | mytrsepp, mytcolsepp, mytcapp, mythcolsepp, annop, tagparser, | |
407 | tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert, | |
408 | tagparser2t, tagparsers, stagparser, commentp, numhtmlp, | |
409 | rtagparser] | |
410 | ||
411 | 411 | {-DHUN| the list of parsers for parsing contributor information for images on MediaWiki websites DHUN-} |
412 | ||
412 | ||
413 | 413 | htmlminparsers :: [MyParser Char] |
414 | 414 | htmlminparsers |
415 | 415 | = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser, |
418 | 418 | tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert, |
419 | 419 | tagparser2t, tagparsers, stagparser, commentp, numhtmlp, |
420 | 420 | rtagparser] |
421 | ||
421 | ||
422 | 422 | {-DHUN| the list of parsers needed for processing the image title description so that is the content a html attibutes DHUN-} |
423 | ||
423 | ||
424 | 424 | imgparsers :: [MyParser Char] |
425 | 425 | imgparsers = [supp, subp, htmlcharp, p302p, greekp, numhtmlp] |
426 | ||
426 | ||
427 | 427 | {-DHUN| the list of parsers needed for parsing source code in the MediaWiki markup language DHUN-} |
428 | ||
428 | ||
429 | 429 | parsers :: [MyParser Char] |
430 | 430 | parsers |
431 | 431 | = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser, |
439 | 439 | tagparser, tagparser2, tagparsert, tagparser2t, tagparsers, |
440 | 440 | stagparser, commentp, reservedp, templatewikilinkp, wikiparamp, |
441 | 441 | wikitemplatep, templateinsideverbatimp, templateinsidep, |
442 | gallerywlp, imagemapwlp, hdevlinep, linkp, linkp2, presectionp, presectionpt, | |
443 | numhtmlp, rtagparser] | |
444 | ||
442 | gallerywlp, imagemapwlp, hdevlinep, linkp, linkp2, presectionp, | |
443 | presectionpt, numhtmlp, rtagparser] | |
444 | ||
445 | 445 | {-DHUN| the parser record, with some fields initialized with default values DHUN-} |
446 | ||
446 | ||
447 | 447 | baseParser :: MyParser tok |
448 | 448 | baseParser |
449 | 449 | = MyParser{bad = \ _ -> pzero, start = undefined, |
450 | 450 | end = \ _ -> return (), allowed = everywhere, self = undefined, |
451 | 451 | modify = \ _ x -> x, reenv = id} |
452 | ||
452 | ||
453 | 453 | {-DHUN| this function takes a string and returns a parser that matches any of the given strings DHUN-} |
454 | ||
454 | ||
455 | 455 | oneOfTheStrings :: [String] -> Parser String |
456 | 456 | oneOfTheStrings (x : xs) = try (string x) <|> (oneOfTheStrings xs) |
457 | 457 | oneOfTheStrings [] = pzero |
458 | ||
458 | ||
459 | 459 | {-DHUN| parses a HTML entity, that is a character escaped with the ampersand notation DHUN-} |
460 | ||
460 | ||
461 | 461 | htmlcharp :: MyParser Char |
462 | 462 | htmlcharp |
463 | 463 | = baseParser{start = |
468 | 468 | return (Str (s)), |
469 | 469 | allowed = Preformat : SpaceIndent : NoWiki : everywhere, |
470 | 470 | self = HtmlChar} |
471 | ||
471 | ||
472 | 472 | {-DHUN| parses a HTML entity, escaped with numeric ampersand notation DHUN-} |
473 | ||
473 | ||
474 | 474 | numhtmlp :: MyParser Char |
475 | 475 | numhtmlp |
476 | 476 | = baseParser{start = |
484 | 484 | return (Str (s)), |
485 | 485 | allowed = Preformat : SpaceIndent : NoWiki : everywhere, |
486 | 486 | self = NumHtml} |
487 | ||
487 | ||
488 | 488 | {-DHUN| parses a HTML #302 character. Special parser needed since it acts on the receding character DHUN-} |
489 | ||
489 | ||
490 | 490 | p302p :: MyParser Char |
491 | 491 | p302p |
492 | 492 | = baseParser{start = |
496 | 496 | return (Str (c : [])), |
497 | 497 | self = P302} |
498 | 498 | |
499 | ||
500 | ||
501 | 499 | {-DHUN| parses a HTML &sub entity. DHUN-} |
502 | ||
500 | ||
503 | 501 | subp :: MyParser Char |
504 | 502 | subp |
505 | 503 | = baseParser{start = |
509 | 507 | _ <- string ";" |
510 | 508 | return (Str (c : [])), |
511 | 509 | self = Sub} |
512 | ||
510 | ||
513 | 511 | {-DHUN| parses a HTML &sup entity. DHUN-} |
514 | ||
512 | ||
515 | 513 | supp :: MyParser Char |
516 | 514 | supp |
517 | 515 | = baseParser{start = |
521 | 519 | _ <- string ";" |
522 | 520 | return (Str (c : [])), |
523 | 521 | self = Sup} |
524 | ||
522 | ||
525 | 523 | {-DHUN| parses the start of a new URL. That is the place where a page begin that was downloaded from an URL different from the previous one DHUN-} |
526 | ||
524 | ||
527 | 525 | dhunurlp :: MyParser Char |
528 | 526 | dhunurlp |
529 | 527 | = baseParser{start = |
532 | 530 | return (Str ""), |
533 | 531 | end = \ _ -> string "\n" >> return (), self = DhunUrl, |
534 | 532 | allowed = [Root, Tag]} |
535 | ||
533 | ||
536 | 534 | {-DHUN| parses a Greek HTML entity. So a Greek letter or something similar DHUN-} |
537 | ||
535 | ||
538 | 536 | greekp :: MyParser Char |
539 | 537 | greekp |
540 | 538 | = baseParser{start = |
544 | 542 | _ <- char ';' |
545 | 543 | return (Str (s)), |
546 | 544 | self = Greek} |
547 | ||
545 | ||
548 | 546 | {-DHUN| parses the mediawiki math tag. That is a latex formula in the wiki DHUN-} |
549 | ||
547 | ||
550 | 548 | mathp :: MyParser Char |
551 | 549 | mathp |
552 | 550 | = (maketagparser ["math"]){allowed = |
553 | 551 | SpaceIndent : everywhere ++ wikilinkwhere, |
554 | 552 | self = Math} |
555 | ||
553 | ||
556 | 554 | annop :: MyParser Char |
557 | 555 | annop |
558 | 556 | = (maketagparser ["annotation"]){allowed = |
559 | SpaceIndent : everywhere ++ wikilinkwhere, | |
560 | self = Math, reenv = const Tag} | |
557 | SpaceIndent : everywhere ++ wikilinkwhere, | |
558 | self = Math, reenv = const Tag} | |
561 | 559 | |
562 | 560 | {-DHUN| parses a new chapter heading DHUN-} |
563 | ||
561 | ||
564 | 562 | chapterp :: MyParser Char |
565 | 563 | chapterp |
566 | 564 | = baseParser{start = |
571 | 569 | string "dhunincludechaper" >> return (Str ""), |
572 | 570 | end = \ _ -> string "/dhunincludechaper" >> return (), |
573 | 571 | self = Chapter} |
574 | ||
572 | ||
575 | 573 | {-DHUN| parses a horizontal dividing line DHUN-} |
576 | ||
574 | ||
577 | 575 | hdevlinep :: MyParser Char |
578 | 576 | hdevlinep |
579 | 577 | = baseParser{start = |
582 | 580 | skipMany (string "-") |
583 | 581 | return (Str ""), |
584 | 582 | allowed = [Root], self = HDevLine} |
585 | ||
583 | ||
586 | 584 | {-DHUN| parses the mediawiki 'nowiki' tag DHUN-} |
587 | ||
585 | ||
588 | 586 | nowikip :: MyParser Char |
589 | 587 | nowikip |
590 | 588 | = baseParser{start = \ _ -> string "<nowiki>" >> return (Str ""), |
591 | 589 | end = \ _ -> string "</nowiki>" >> return (), |
592 | 590 | allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent], |
593 | 591 | self = NoWiki} |
594 | ||
592 | ||
595 | 593 | {-DHUN| parses the mediawiki 'noinclude' tag DHUN-} |
596 | ||
594 | ||
597 | 595 | noincludep :: MyParser Char |
598 | 596 | noincludep |
599 | 597 | = baseParser{start = |
603 | 601 | try (string "</noinclude>" >> return ()) <|> |
604 | 602 | lookAhead (eof >> return ()), |
605 | 603 | self = NoInclude} |
606 | ||
604 | ||
607 | 605 | {-DHUN| parses the mediawiki 'includeonly' tag DHUN-} |
608 | ||
606 | ||
609 | 607 | includep :: MyParser Char |
610 | 608 | includep |
611 | 609 | = baseParser{start = |
612 | 610 | \ _ -> string "<includeonly>" >> return (Str ""), |
613 | 611 | end = \ _ -> string "</includeonly>" >> return (), |
614 | 612 | self = IncludeOnly} |
615 | ||
613 | ||
616 | 614 | {-DHUN| parses the mediawiki 'onlyinclude' tag DHUN-} |
617 | ||
615 | ||
618 | 616 | includep2 :: MyParser Char |
619 | 617 | includep2 |
620 | 618 | = baseParser{start = |
621 | 619 | \ _ -> string "<onlyinclude>" >> return (Str ""), |
622 | 620 | end = \ _ -> string "</onlyinclude>" >> return (), |
623 | 621 | self = IncludeOnly} |
624 | ||
622 | ||
625 | 623 | {-DHUN| parses the mediawiki 'gallery' tag DHUN-} |
626 | ||
624 | ||
627 | 625 | galleryp :: MyParser Char |
628 | 626 | galleryp |
629 | 627 | = baseParser{start = |
634 | 632 | _ <- char '>' |
635 | 633 | return (Str ""), |
636 | 634 | end = \ _ -> string "</gallery>" >> return (), self = Gallery} |
637 | ||
635 | ||
638 | 636 | {-DHUN| parses a wikilink inside a gallery DHUN-} |
639 | ||
637 | ||
640 | 638 | gallerywlp :: MyParser Char |
641 | 639 | gallerywlp |
642 | 640 | = baseParser{bad = |
650 | 648 | return (), |
651 | 649 | modify = \ _ x -> dropWhile (== (C ' ')) x, allowed = [Gallery], |
652 | 650 | self = Wikilink} |
653 | ||
651 | ||
654 | 652 | {-DHUN| parses the mediawiki 'imagemap' tag DHUN-} |
655 | ||
653 | ||
656 | 654 | imagemapp :: MyParser Char |
657 | 655 | imagemapp |
658 | 656 | = baseParser{start = |
663 | 661 | _ <- char '>' |
664 | 662 | return (Str ""), |
665 | 663 | end = \ _ -> string "</imagemap>" >> return (), self = ImageMap} |
666 | ||
664 | ||
667 | 665 | {-DHUN| parses a wikilink inside and imagemap DHUN-} |
668 | ||
666 | ||
669 | 667 | imagemapwlp :: MyParser Char |
670 | 668 | imagemapwlp |
671 | 669 | = baseParser{bad = |
684 | 682 | do _ <- lookAhead (string "\n") |
685 | 683 | return (), |
686 | 684 | allowed = [ImageMap], self = Wikilink} |
687 | ||
685 | ||
688 | 686 | {-DHUN| matches a sequence of arbitrary characters up to the character (an excluding it) where one of the strings given as first parameter matches DHUN-} |
689 | ||
687 | ||
690 | 688 | myany :: [String] -> Parser String |
691 | 689 | myany x |
692 | 690 | = do b <- (try (lookAhead (oneOfTheStrings x) >> return False)) <|> |
696 | 694 | cs <- (myany x) |
697 | 695 | return (c : cs) |
698 | 696 | else return "" |
699 | ||
697 | ||
700 | 698 | {-DHUN| parses the mediawiki template DHUN-} |
701 | ||
699 | ||
702 | 700 | wikitemplatep :: MyParser Char |
703 | 701 | wikitemplatep |
704 | 702 | = baseParser{start = |
711 | 709 | everywhere ++ |
712 | 710 | wikilinkwhere ++ [TemplateInsideVerbatim, SpaceIndent], |
713 | 711 | self = Template} |
714 | ||
712 | ||
715 | 713 | {-DHUN| a special stack frame for parsing the inside of a template DHUN-} |
716 | ||
714 | ||
717 | 715 | madframe :: MyStackFrame Char |
718 | 716 | madframe |
719 | 717 | = MyStackFrame{endparser = |
720 | 718 | (try (lookAhead (oneOfTheStrings ["}}", "|", "="]))) >> return (), |
721 | 719 | startdata = Str "", environment = TemplateInside, |
722 | 720 | badparser = \ _ -> pzero, parsernumber = 0, nestingdepth = 0} |
723 | ||
721 | ||
724 | 722 | {-DHUN| parses the inside of a mediawiki template DHUN-} |
725 | ||
723 | ||
726 | 724 | templateinsidep :: MyParser Char |
727 | 725 | templateinsidep |
728 | 726 | = baseParser{start = |
746 | 744 | else return (Str ""), |
747 | 745 | end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (), |
748 | 746 | allowed = [Template], self = TemplateInside} |
749 | ||
747 | ||
750 | 748 | {-DHUN| parses the inside of a mediawiki template, it is parser verbatim that means inner structures are parsed, but returned as plain characters in the parse tree. Needed for templates use for source codes DHUN-} |
751 | ||
749 | ||
752 | 750 | templateinsideverbatimp :: MyParser Char |
753 | 751 | templateinsideverbatimp |
754 | 752 | = baseParser{start = |
759 | 757 | (trim gg) `elem` |
760 | 758 | ["HaskellGHCiExample", |
761 | 759 | "\"HaskellGHCi\",Visual Basic .NET: Vorlage:Code", |
762 | "C++-Programmierung/ Vorlage:Syntax","syntax","Syntax", | |
763 | "C++-Programmierung/ Vorlage:Code","BigJava", | |
764 | "LaTeX/Usage", "LaTeX/LaTeX", "Latex Index"] | |
760 | "C++-Programmierung/ Vorlage:Syntax", "syntax", | |
761 | "Syntax", "C++-Programmierung/ Vorlage:Code", | |
762 | "BigJava", "LaTeX/Usage", "LaTeX/LaTeX", | |
763 | "Latex Index"] | |
765 | 764 | then |
766 | 765 | do _ <- string "|" |
767 | 766 | try |
776 | 775 | end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (), |
777 | 776 | allowed = [Template], self = TemplateInsideVerbatim, |
778 | 777 | reenv = const TemplateInside} |
779 | ||
778 | ||
780 | 779 | {-DHUN| parses the inside of a template parameter DHUN-} |
781 | ||
780 | ||
782 | 781 | wikiparamp :: MyParser Char |
783 | 782 | wikiparamp |
784 | 783 | = baseParser{start = \ _ -> string "{{{" >> return (Str ""), |
785 | 784 | end = \ _ -> string "}}}" >> return (), |
786 | 785 | allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent], |
787 | 786 | self = Parameter} |
788 | ||
787 | ||
789 | 788 | {-DHUN| parses a wikilink DHUN-} |
790 | ||
789 | ||
791 | 790 | wikilinkp :: MyParser Char |
792 | 791 | wikilinkp |
793 | 792 | = baseParser{start = \ _ -> string "[[" >> return (Str ""), |
812 | 811 | else pzero, |
813 | 812 | allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent], |
814 | 813 | self = Wikilink} |
815 | ||
814 | ||
816 | 815 | {-DHUN| parses a wikilink template for wikipedia links DHUN-} |
817 | ||
816 | ||
818 | 817 | templatewikilinkp :: MyParser Char |
819 | 818 | templatewikilinkp |
820 | 819 | = baseParser{start = \ _ -> string "{{w|" >> return (Str ""), |
821 | 820 | end = \ _ -> string "}}" >> return (), |
822 | 821 | allowed = everywhere ++ wikilinkwhere, |
823 | 822 | modify = \ _ x -> (C 'w') : (C ':') : x, self = Wikilink} |
824 | ||
823 | ||
825 | 824 | {-DHUN| parses a link DHUN-} |
826 | ||
825 | ||
827 | 826 | linkp :: MyParser Char |
828 | 827 | linkp |
829 | 828 | = baseParser{bad = |
838 | 837 | return (Str (if s == "//" then "http://" else s)), |
839 | 838 | end = \ _ -> string "]" >> return (), allowed = everywherel, |
840 | 839 | self = Link} |
841 | ||
840 | ||
842 | 841 | {-DHUN| parses a link, in contrast to linkp this does not match inside links and does not require the square bracket notation DHUN-} |
843 | ||
842 | ||
844 | 843 | linkp2 :: MyParser Char |
845 | 844 | linkp2 |
846 | 845 | = baseParser{start = |
849 | 848 | return (Str s), |
850 | 849 | end = \ _ -> lookAhead (oneOf " \n\r\t<>|\"") >> return (), |
851 | 850 | allowed = everywherel2, self = Link2, reenv = const Link} |
852 | ||
851 | ||
853 | 852 | {-DHUN| parses a wikitable DHUN-} |
854 | ||
853 | ||
855 | 854 | wikitablep :: MyParser Char |
856 | 855 | wikitablep |
857 | 856 | = baseParser{start = |
858 | 857 | \ _ -> |
859 | do _ <- try (char '\n') <|> return '\n' | |
858 | do _ <- try (char '\n') <|> return '\n' | |
860 | 859 | skipMany (char ' ') |
861 | 860 | _ <- try (string "{|") <|> try (string "{{(!}}") |
862 | 861 | s <- many (noneOf "\n") |
868 | 867 | _ <- try (string "|}") <|> try (string "{{!)}}") |
869 | 868 | return (), |
870 | 869 | self = Wikitable} |
871 | ||
870 | ||
872 | 871 | {-DHUN| parses a heading, can be a chapter heading as well as a section heading and so on DHUN-} |
873 | ||
872 | ||
874 | 873 | wikiheadingp :: MyParser Char |
875 | 874 | wikiheadingp |
876 | 875 | = baseParser{bad = |
895 | 894 | _ <- notFollowedBy (char '=') |
896 | 895 | return (), |
897 | 896 | self = Wikiheading, allowed = everywherel2} |
898 | ||
897 | ||
899 | 898 | {-DHUN| parses an italic text DHUN-} |
900 | ||
899 | ||
901 | 900 | italicp :: MyParser Char |
902 | 901 | italicp |
903 | 902 | = baseParser{start = |
913 | 912 | return (), |
914 | 913 | allowed = SpaceIndent : Wikitable : Bold : everywherebi, |
915 | 914 | self = Italic} |
916 | ||
915 | ||
917 | 916 | {-DHUN| parses a bold text DHUN-} |
918 | ||
917 | ||
919 | 918 | boldp :: MyParser Char |
920 | 919 | boldp |
921 | 920 | = baseParser{start = |
930 | 929 | return (), |
931 | 930 | allowed = SpaceIndent : Wikitable : Italic : everywherebi, |
932 | 931 | self = Bold} |
933 | ||
932 | ||
934 | 933 | {-DHUN| parses a table caption DHUN-} |
935 | ||
934 | ||
936 | 935 | tablecapp :: MyParser Char |
937 | 936 | tablecapp |
938 | 937 | = baseParser{bad = |
964 | 963 | notFollowedBy . char $ '}' |
965 | 964 | return ()), |
966 | 965 | allowed = [Wikitable], self = TableCap} |
967 | ||
966 | ||
968 | 967 | {-DHUN| parses a table caption with additional parameter given to the beginning of the caption element in the wiki source DHUN-} |
969 | ||
968 | ||
970 | 969 | tablecapp2 :: MyParser Char |
971 | 970 | tablecapp2 |
972 | 971 | = baseParser{start = |
989 | 988 | <|> |
990 | 989 | (try ((string "||" >> return ())) <|> (string "!!" >> return ()))), |
991 | 990 | allowed = [Wikitable], self = TableCap} |
992 | ||
991 | ||
993 | 992 | {-DHUN| parses a table caption DHUN-} |
994 | ||
993 | ||
995 | 994 | tablecapp3 :: MyParser Char |
996 | 995 | tablecapp3 |
997 | 996 | = baseParser{start = |
1002 | 1001 | _ <- notFollowedBy (oneOf "-}") |
1003 | 1002 | return (Str "2"), |
1004 | 1003 | allowed = [Wikitable], self = TableCap} |
1005 | ||
1004 | ||
1006 | 1005 | {-DHUN| parses a table row separator DHUN-} |
1007 | ||
1006 | ||
1008 | 1007 | rowsepp :: MyParser Char |
1009 | 1008 | rowsepp |
1010 | 1009 | = baseParser{start = |
1015 | 1014 | s <- many (noneOf "\n") |
1016 | 1015 | return (Str s), |
1017 | 1016 | allowed = [Wikitable], self = TableRowSep} |
1018 | ||
1017 | ||
1019 | 1018 | {-DHUN| parses a table column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See DHUN-} |
1020 | ||
1019 | ||
1021 | 1020 | colsepp :: MyParser Char |
1022 | 1021 | colsepp |
1023 | 1022 | = baseParser{bad = |
1052 | 1051 | _ <- notFollowedBy (oneOf "}|") |
1053 | 1052 | return ()), |
1054 | 1053 | allowed = [Wikitable], self = TableColSep} |
1055 | ||
1054 | ||
1056 | 1055 | {-DHUN| parses a column separator without anything inside it DHUN-} |
1057 | ||
1056 | ||
1058 | 1057 | colsepp2 :: MyParser Char |
1059 | 1058 | colsepp2 |
1060 | 1059 | = baseParser{start = |
1069 | 1068 | do _ <- try (string "||") <|> try (string "{{!!}}") |
1070 | 1069 | return (Str "2"), |
1071 | 1070 | allowed = [Wikitable], self = TableColSep} |
1072 | ||
1071 | ||
1073 | 1072 | {-DHUN| parses a table header column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See DHUN-} |
1074 | ||
1073 | ||
1075 | 1074 | headcolsepp :: MyParser Char |
1076 | 1075 | headcolsepp |
1077 | 1076 | = baseParser{bad = |
1104 | 1103 | notFollowedBy (oneOf "-}|") |
1105 | 1104 | return (), |
1106 | 1105 | allowed = [Wikitable], self = TableHeadColSep} |
1107 | ||
1106 | ||
1108 | 1107 | {-DHUN| parses a header column separator without anything inside it DHUN-} |
1109 | ||
1108 | ||
1110 | 1109 | headcolsepp2 :: MyParser Char |
1111 | 1110 | headcolsepp2 |
1112 | 1111 | = baseParser{start = |
1126 | 1125 | attrinside x = try (string "&" >> return '&') <|> (noneOf x) |
1127 | 1126 | |
1128 | 1127 | {-DHUN| matches a key value pair . So an attribute of an HTML element DHUN-} |
1129 | ||
1128 | ||
1130 | 1129 | attr :: GenParser Char () ([Char], [Char]) |
1131 | 1130 | attr |
1132 | 1131 | = do skipMany1 (oneOf " \n") |
1133 | 1132 | k <- many1 (try (alphaNum) <|> oneOf ":-") |
1134 | 1133 | v <- try |
1135 | 1134 | (do skipMany (oneOf " \n") |
1136 | _ <- char '=' | |
1135 | _ <- char '=' | |
1137 | 1136 | skipMany (oneOf " \n") |
1138 | 1137 | vv <- try |
1139 | 1138 | (do _ <- try (char '"') |
1154 | 1153 | return vv) |
1155 | 1154 | <|> return "" |
1156 | 1155 | return (k, v) |
1157 | ||
1156 | ||
1158 | 1157 | {-DHUN| Matches a key value pair. So an attribute of an HTML element DHUN-} |
1159 | ||
1158 | ||
1160 | 1159 | attrns :: GenParser Char u ([Char], [Char]) |
1161 | 1160 | attrns |
1162 | 1161 | = do k <- many1 (try (alphaNum) <|> oneOf ":-") |
1176 | 1175 | return vv |
1177 | 1176 | _ <- try (many (oneOf " \n")) <|> return [] |
1178 | 1177 | return (k, v) |
1179 | ||
1178 | ||
1180 | 1179 | {-DHUN| Matches a list of key value pairs . So all attributes of an HTML element DHUN-} |
1181 | ||
1180 | ||
1182 | 1181 | attrp :: MyParser Char |
1183 | 1182 | attrp |
1184 | 1183 | = baseParser{start = |
1187 | 1186 | return (Attr atr), |
1188 | 1187 | allowed = [TableHeadColSep, TableColSep, TableCap], |
1189 | 1188 | self = Attribute} |
1190 | ||
1189 | ||
1191 | 1190 | {-DHUN| Parses the HTML 'pre' tag DHUN-} |
1192 | ||
1191 | ||
1193 | 1192 | prep :: MyParser Char |
1194 | 1193 | prep |
1195 | 1194 | = baseParser{start = |
1210 | 1209 | _ <- char '>' |
1211 | 1210 | return (), |
1212 | 1211 | allowed = everywhere, self = Preformat} |
1213 | ||
1212 | ||
1214 | 1213 | {-DHUN| Parses the HTML 'br' tag DHUN-} |
1215 | ||
1214 | ||
1216 | 1215 | brparser :: MyParser Char |
1217 | 1216 | brparser |
1218 | 1217 | = baseParser{start = |
1227 | 1226 | _ <- char '>' |
1228 | 1227 | return (TagAttr "br" Map.empty), |
1229 | 1228 | allowed = SpaceIndent : everywhere, self = Tag} |
1230 | ||
1229 | ||
1231 | 1230 | {-DHUN| Returns a Parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser does not match inside tables. Use makettagparser for that DHUN-} |
1232 | ||
1231 | ||
1233 | 1232 | maketagparser :: [String] -> MyParser Char |
1234 | 1233 | maketagparser tags |
1235 | 1234 | = baseParser{start = |
1250 | 1249 | _ <- char '>' |
1251 | 1250 | return (), |
1252 | 1251 | allowed = SpaceIndent : everywheretbl, self = Tag} |
1253 | ||
1252 | ||
1254 | 1253 | {-DHUN| Parser for the 'meta' tag of HTML DHUN-} |
1255 | ||
1254 | ||
1256 | 1255 | metatagparser :: MyParser Char |
1257 | 1256 | metatagparser |
1258 | 1257 | = baseParser{start = |
1265 | 1264 | _ <- try (char '>') <|> (return '>') |
1266 | 1265 | return (TagAttr (t) (Map.fromList atr)), |
1267 | 1266 | allowed = SpaceIndent : everywhere, self = Tag} |
1268 | ||
1267 | ||
1269 | 1268 | {-DHUN| Parser for the !DOCTYPE tag of HTML DHUN-} |
1270 | ||
1269 | ||
1271 | 1270 | doctagparser :: MyParser Char |
1272 | 1271 | doctagparser |
1273 | 1272 | = baseParser{start = |
1278 | 1277 | _ <- char '>' |
1279 | 1278 | return (TagAttr (t) (Map.fromList [])), |
1280 | 1279 | allowed = SpaceIndent : everywhere, self = Tag} |
1281 | ||
1280 | ||
1282 | 1281 | {-DHUN| Parser for closing HTML tags that have not matching opening tag. DHUN-} |
1283 | ||
1282 | ||
1284 | 1283 | ctagparser :: [String] -> GenParser Char () () |
1285 | 1284 | ctagparser tags |
1286 | 1285 | = do _ <- string "</" |
1288 | 1287 | _ <- try (many (oneOf " \n")) <|> return [] |
1289 | 1288 | _ <- char '>' |
1290 | 1289 | return () |
1291 | ||
1290 | ||
1292 | 1291 | {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. DHUN-} |
1293 | ||
1292 | ||
1294 | 1293 | maketagparser2 :: [String] -> MyParser Char |
1295 | 1294 | maketagparser2 tags |
1296 | 1295 | = baseParser{start = |
1303 | 1302 | _ <- char '>' |
1304 | 1303 | return (TagAttr (t) (Map.fromList atr)), |
1305 | 1304 | allowed = Wikitable : SpaceIndent : everywheretbl, self = Tag} |
1306 | ||
1305 | ||
1307 | 1306 | {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may not be self closing so not like <br/> because of the tailing /. DHUN-} |
1308 | ||
1307 | ||
1309 | 1308 | maketagparser3 :: [String] -> MyParser Char |
1310 | 1309 | maketagparser3 tags |
1311 | 1310 | = baseParser{start = |
1317 | 1316 | _ <- char '>' |
1318 | 1317 | return (TagAttr (t) (Map.fromList atr)), |
1319 | 1318 | allowed = SpaceIndent : everywheretbl, self = Tag} |
1320 | ||
1319 | ||
1321 | 1320 | {-DHUN| tags that can not be nested and are thus allowed to closed by an opening tag instead of a closing one DHUN-} |
1322 | ||
1321 | ||
1323 | 1322 | nonNestTags :: [String] |
1324 | 1323 | nonNestTags |
1325 | 1324 | = ["tt", "pre", "TT", "PRE", "b", "B", "i", "I", "sc", "SC", |
1326 | 1325 | "code", "CODE"] |
1327 | ||
1326 | ||
1328 | 1327 | {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only inside tables. Use maketagparser for that DHUN-} |
1329 | ||
1328 | ||
1330 | 1329 | makettagparser :: [String] -> MyParser Char |
1331 | 1330 | makettagparser tags |
1332 | 1331 | = baseParser{bad = |
1351 | 1350 | _ <- char '>' |
1352 | 1351 | return (), |
1353 | 1352 | allowed = [Wikitable], self = Tag} |
1354 | ||
1353 | ||
1355 | 1354 | {-DHUN| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. Matches only inside tables DHUN-} |
1356 | ||
1355 | ||
1357 | 1356 | makettagparser2 :: [String] -> MyParser Char |
1358 | 1357 | makettagparser2 tags |
1359 | 1358 | = baseParser{bad = |
1372 | 1371 | _ <- char '>' |
1373 | 1372 | return (TagAttr (t) (Map.fromList atr)), |
1374 | 1373 | allowed = [Wikitable], self = Tag} |
1375 | ||
1374 | ||
1376 | 1375 | {-DHUN| maketagparser for all HTML elements see documentation on function maketagparser DHUN-} |
1377 | ||
1376 | ||
1378 | 1377 | tagparser :: MyParser Char |
1379 | 1378 | tagparser = maketagparser listOfTags |
1380 | ||
1379 | ||
1381 | 1380 | {-DHUN| maketagparser for the 'pre' HTML tag see documentation on function maketagparser DHUN-} |
1382 | ||
1381 | ||
1383 | 1382 | tagparserp :: MyParser Char |
1384 | 1383 | tagparserp = maketagparser ["pre"] |
1385 | ||
1384 | ||
1386 | 1385 | {-DHUN| maketagparser for the HTML tags for HTML tables see documentation on function maketagparser DHUN-} |
1387 | ||
1386 | ||
1388 | 1387 | tagparsert :: MyParser Char |
1389 | 1388 | tagparsert |
1390 | 1389 | = (maketagparser listOfTableTags){self = TableTag, |
1391 | 1390 | reenv = const Tag} |
1392 | ||
1391 | ||
1393 | 1392 | {-DHUN| makettagparser for all HTML elements see documentation on function makettagparser DHUN-} |
1394 | ||
1393 | ||
1395 | 1394 | ttagparser :: MyParser Char |
1396 | 1395 | ttagparser = makettagparser listOfTags |
1397 | ||
1396 | ||
1398 | 1397 | {-DHUN| makettagparser for the 'pre' HTML tag see documentation on function makettagparser DHUN-} |
1399 | ||
1398 | ||
1400 | 1399 | ttagparserp :: MyParser Char |
1401 | 1400 | ttagparserp = makettagparser ["pre"] |
1402 | ||
1401 | ||
1403 | 1402 | {-DHUN| makettagparser for the HTML tags for HTML tables see documentation on function makettagparser DHUN-} |
1404 | ||
1403 | ||
1405 | 1404 | ttagparsert :: MyParser Char |
1406 | 1405 | ttagparsert |
1407 | 1406 | = (makettagparser listOfTableTags){self = TableTag, |
1408 | 1407 | reenv = const Tag} |
1409 | ||
1408 | ||
1410 | 1409 | {-DHUN| maketagparser2 for all HTML elements see documentation on function maketagparser2 DHUN-} |
1411 | ||
1410 | ||
1412 | 1411 | tagparser2 :: MyParser Char |
1413 | 1412 | tagparser2 = maketagparser2 listOfTags |
1414 | ||
1413 | ||
1415 | 1414 | {-DHUN| maketagparser2 for the 'pre' HTML tag see documentation on function maketagparser2 DHUN-} |
1416 | ||
1415 | ||
1417 | 1416 | tagparser2p :: MyParser Char |
1418 | 1417 | tagparser2p = maketagparser2 ["pre"] |
1419 | ||
1418 | ||
1420 | 1419 | {-DHUN| maketagparser2 for the HTML tags for HTML tables see documentation on function maketagparser2 DHUN-} |
1421 | ||
1420 | ||
1422 | 1421 | tagparser2t :: MyParser Char |
1423 | 1422 | tagparser2t |
1424 | 1423 | = (maketagparser2 listOfTableTags){self = TableTag, |
1425 | 1424 | reenv = const Tag} |
1426 | ||
1425 | ||
1427 | 1426 | {-DHUN| makettagparser2 for all HTML elements see documentation on function makettagparser2 DHUN-} |
1428 | ||
1427 | ||
1429 | 1428 | ttagparser2 :: MyParser Char |
1430 | 1429 | ttagparser2 = makettagparser2 listOfTags |
1431 | ||
1430 | ||
1432 | 1431 | {-DHUN| makettagparser2 for the 'pre' HTML tag see documentation on function makettagparser2 DHUN-} |
1433 | ||
1432 | ||
1434 | 1433 | ttagparser2p :: MyParser Char |
1435 | 1434 | ttagparser2p = makettagparser2 ["pre"] |
1436 | ||
1435 | ||
1437 | 1436 | {-DHUN| makettagparser2 for the HTML tags for HTML tables see documentation on function makettagparser2 DHUN-} |
1438 | ||
1437 | ||
1439 | 1438 | ttagparser2t :: MyParser Char |
1440 | 1439 | ttagparser2t |
1441 | 1440 | = (makettagparser2 listOfTableTags){self = TableTag, |
1442 | 1441 | reenv = const Tag} |
1443 | ||
1442 | ||
1444 | 1443 | {-DHUN| a parser for mediawiki source extension tags DHUN-} |
1445 | ||
1444 | ||
1446 | 1445 | tagparsers :: MyParser Char |
1447 | 1446 | tagparsers |
1448 | 1447 | = (maketagparser ["source", "syntaxhighlight"]){self = Source} |
1449 | ||
1448 | ||
1450 | 1449 | {-DHUN| a parser for mediawiki source extension tags inside tables DHUN-} |
1451 | ||
1450 | ||
1452 | 1451 | ttagparsers :: MyParser Char |
1453 | 1452 | ttagparsers |
1454 | 1453 | = (maketagparser ["source", "syntaxhighlight"]){self = Source, |
1455 | 1454 | allowed = [Wikitable]} |
1456 | ||
1455 | ||
1457 | 1456 | {-DHUN| a parser for HTML tables DHUN-} |
1458 | ||
1457 | ||
1459 | 1458 | mytablep :: MyParser Char |
1460 | 1459 | mytablep |
1461 | 1460 | = (maketagparser ["table"]){self = TableTag, |
1462 | 1461 | reenv = const Wikitable, |
1463 | 1462 | allowed = Wikitable : SpaceIndent : everywheretbl} |
1464 | ||
1463 | ||
1465 | 1464 | {-DHUN| a parser for HTML table rows DHUN-} |
1466 | ||
1465 | ||
1467 | 1466 | mytrsepp :: MyParser Char |
1468 | 1467 | mytrsepp |
1469 | 1468 | = (maketagparser3 ["tr"]){reenv = const TableRowSep, |
1470 | 1469 | allowed = everywhere} |
1471 | ||
1470 | ||
1472 | 1471 | {-DHUN| a parser for normal HTML table cells DHUN-} |
1473 | ||
1472 | ||
1474 | 1473 | mytcolsepp :: MyParser Char |
1475 | 1474 | mytcolsepp |
1476 | 1475 | = (maketagparser3 ["td"]){reenv = const TableColSep, |
1477 | 1476 | allowed = everywhere} |
1478 | ||
1477 | ||
1479 | 1478 | {-DHUN| a parser for HTML table captions cells DHUN-} |
1480 | ||
1479 | ||
1481 | 1480 | mytcapp :: MyParser Char |
1482 | 1481 | mytcapp |
1483 | 1482 | = (maketagparser ["caption"]){reenv = const TableCap, |
1484 | 1483 | allowed = everywhere} |
1485 | ||
1484 | ||
1486 | 1485 | {-DHUN| a parser for HTML table header cells, so th tags DHUN-} |
1487 | ||
1486 | ||
1488 | 1487 | mythcolsepp :: MyParser Char |
1489 | 1488 | mythcolsepp |
1490 | 1489 | = (maketagparser3 ["th"]){reenv = const TableHeadColSep, |
1491 | 1490 | allowed = everywhere} |
1492 | ||
1491 | ||
1493 | 1492 | {-DHUN| a parser for HTML tables for html parse mode only DHUN-} |
1494 | ||
1493 | ||
1495 | 1494 | htmytablep :: MyParser Char |
1496 | 1495 | htmytablep = (maketagparser ["table"]) |
1497 | ||
1496 | ||
1498 | 1497 | {-DHUN| a parser for HTML table rows for html parse mode only DHUN-} |
1499 | ||
1498 | ||
1500 | 1499 | htmytrsepp :: MyParser Char |
1501 | 1500 | htmytrsepp = (maketagparser ["tr"]) |
1502 | ||
1501 | ||
1503 | 1502 | {-DHUN| a parser for normal HTML table cells for html parse mode only DHUN-} |
1504 | ||
1503 | ||
1505 | 1504 | htmytcolsepp :: MyParser Char |
1506 | 1505 | htmytcolsepp = (maketagparser ["td"]) |
1507 | ||
1506 | ||
1508 | 1507 | {-DHUN| a parser for HTML table captions cells for html parse mode only DHUN-} |
1509 | ||
1508 | ||
1510 | 1509 | htmytcapp :: MyParser Char |
1511 | 1510 | htmytcapp = (maketagparser ["caption"]) |
1512 | ||
1511 | ||
1513 | 1512 | {-DHUN| a parser for HTML table header cells, so th tags for html parse mode only DHUN-} |
1514 | ||
1513 | ||
1515 | 1514 | htmythcolsepp :: MyParser Char |
1516 | 1515 | htmythcolsepp = (maketagparser ["th"]) |
1517 | ||
1516 | ||
1518 | 1517 | {-DHUN| a parser for closing HTML tags which don't have an opening partner. This parser is only allowed to match within itemization enumerations etc. DHUN-} |
1519 | ||
1518 | ||
1520 | 1519 | itagparser :: MyParser Char |
1521 | 1520 | itagparser |
1522 | 1521 | = baseParser{start = |
1529 | 1528 | _ <- char '>' |
1530 | 1529 | return (TagAttr (t) (Map.fromList atr)), |
1531 | 1530 | allowed = [Itemgroup], self = Tag} |
1532 | ||
1531 | ||
1533 | 1532 | {-DHUN| a parser for closing HTML tags which don't have an opening partner DHUN-} |
1534 | ||
1533 | ||
1535 | 1534 | rtagparser :: MyParser Char |
1536 | 1535 | rtagparser |
1537 | 1536 | = baseParser{start = |
1542 | 1541 | atr <- many (try (attr)) |
1543 | 1542 | _ <- try (many (oneOf " \n")) <|> return [] |
1544 | 1543 | _ <- char '>' |
1545 | return (TagAttr ("bad"++s) (Map.fromList atr)), | |
1544 | return (TagAttr ("bad" ++ s) (Map.fromList atr)), | |
1546 | 1545 | allowed = everywhere, self = Tag} |
1547 | ||
1546 | ||
1548 | 1547 | {-DHUN| a parser for HTML opening tags which might be self closing but never have a matching closing partner DHUN-} |
1549 | ||
1548 | ||
1550 | 1549 | stagparser :: MyParser Char |
1551 | 1550 | stagparser |
1552 | 1551 | = baseParser{start = |
1559 | 1558 | _ <- char '>' |
1560 | 1559 | return (TagAttr t (Map.fromList atr)), |
1561 | 1560 | self = Tag, allowed = []} |
1562 | ||
1561 | ||
1563 | 1562 | {-DHUN| a parser for HTML page breaks DHUN-} |
1564 | ||
1563 | ||
1565 | 1564 | pagebreakp :: MyParser Char |
1566 | 1565 | pagebreakp |
1567 | 1566 | = baseParser{start = |
1569 | 1568 | do _ <- string "<div style=\"page-break-before:always\"/>" |
1570 | 1569 | return (Str ""), |
1571 | 1570 | end = \ _ -> return (), allowed = everywhere, self = PageBreak} |
1572 | ||
1571 | ||
1573 | 1572 | {-DHUN| a parser for HTML comments DHUN-} |
1574 | ||
1573 | ||
1575 | 1574 | commentp :: MyParser Char |
1576 | 1575 | commentp |
1577 | 1576 | = baseParser{start = |
1584 | 1583 | do _ <- string "-->" |
1585 | 1584 | return (), |
1586 | 1585 | self = Comment} |
1587 | ||
1586 | ||
1588 | 1587 | {-DHUN| a parser for mediawiki reserved words DHUN-} |
1589 | ||
1588 | ||
1590 | 1589 | reservedp :: MyParser Char |
1591 | 1590 | reservedp |
1592 | 1591 | = baseParser{start = |
1602 | 1601 | <|> string " " |
1603 | 1602 | return (Str s), |
1604 | 1603 | self = Reserved, allowed = SpaceIndent : everywhere} |
1605 | ||
1604 | ||
1606 | 1605 | {-DHUN| See documentation on evaluateItemgroup and itemParserLevelTwoDHUN-} |
1607 | ||
1606 | ||
1608 | 1607 | itemStartString :: Anything Char -> String |
1609 | 1608 | itemStartString (ItemStart x) = x : [] |
1610 | 1609 | itemStartString _ = "" |
1611 | ||
1610 | ||
1612 | 1611 | {-DHUN| See documentation on evaluateItemgroup and itemParserLevelTwo DHUN-} |
1613 | ||
1612 | ||
1614 | 1613 | itemStopString :: Anything Char -> String |
1615 | 1614 | itemStopString (ItemStop x) = x : [] |
1616 | 1615 | itemStopString _ = "" |
1617 | ||
1616 | ||
1618 | 1617 | {-DHUN| see documentation on evaluateItemgroup. Parser to parse the ItemStart and ItemStop elements which are essentially bracket into environments which are essentially nodes with children in the parse tree DHUN-} |
1619 | ||
1618 | ||
1620 | 1619 | itemParserLevelTwo :: MyParser (Anything Char) |
1621 | 1620 | itemParserLevelTwo |
1622 | 1621 | = MyParser{bad = \ _ -> pzero, |
1634 | 1633 | return (), |
1635 | 1634 | allowed = [Root, ItemEnv], self = ItemEnv, modify = \ _ x -> x, |
1636 | 1635 | reenv = id} |
1637 | ||
1636 | ||
1638 | 1637 | {-DHUN| see documentation on evaluateItemgroup. helper function to generate a list of only opening or only closing brackets, that is ItemStart or ItemStop values. The string given as first parameter states which kinds of bracket shall be opened or closed. For Openning brackets you have to call this function ItemStart as second and True as third parameter. For closing brackets you have to use ItemStop and False DHUN-} |
1639 | ||
1638 | ||
1640 | 1639 | generateEnvironmentTagsHelper :: |
1641 | 1640 | String -> (Char -> Anything Char) -> Bool -> [Anything Char] |
1642 | 1641 | generateEnvironmentTagsHelper (c : cs) t b |
1644 | 1643 | (t c) : ([Item c]) ++ (generateEnvironmentTagsHelper cs t b) else |
1645 | 1644 | (generateEnvironmentTagsHelper cs t b) ++ [(t c)] |
1646 | 1645 | generateEnvironmentTagsHelper [] _ _ = [] |
1647 | ||
1646 | ||
1648 | 1647 | {-DHUN| see documentation on evaluateItemgroup and insertEnvironmentTags. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second is the same for the current ItemLine. This function return a list of ItemStart and ItemStop values for the difference between the first and the second parameter DHUN-} |
1649 | ||
1648 | ||
1650 | 1649 | generateEnvironmentTags :: String -> String -> [Anything Char] |
1651 | 1650 | generateEnvironmentTags (o : os) (n : ns) |
1652 | 1651 | = if (o == n) then generateEnvironmentTags os ns else |
1657 | 1656 | generateEnvironmentTags [] (n : ns) |
1658 | 1657 | = generateEnvironmentTagsHelper (n : ns) ItemStart True |
1659 | 1658 | generateEnvironmentTags [] [] = [] |
1660 | ||
1659 | ||
1661 | 1660 | {-DHUN| see documentation on evaluateItemgroup. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted DHUN-} |
1662 | ||
1661 | ||
1663 | 1662 | insertEnvironmentTags :: |
1664 | 1663 | String -> [Anything Char] -> [Anything Char] |
1665 | 1664 | insertEnvironmentTags s ((Environment ItemLine (Str x) _) : xs) |
1677 | 1676 | ++ insertEnvironmentTags x xs) |
1678 | 1677 | insertEnvironmentTags s (x : xs) = x : insertEnvironmentTags s xs |
1679 | 1678 | insertEnvironmentTags _ [] = [] |
1680 | ||
1679 | ||
1681 | 1680 | {-DHUN| see documentation on evaluateItemgroup. Inserts the ItemStart and ItemStop values as preparation for running the second level parse on the content of an ItemGroup. The first pararmenter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted DHUN-} |
1682 | ||
1681 | ||
1683 | 1682 | toEnvironmentTags :: String -> [Anything Char] -> [Anything Char] |
1684 | 1683 | toEnvironmentTags s l |
1685 | 1684 | = insertEnvironmentTags "" |
1686 | 1685 | ((Environment ItemLine (Str s) []) : |
1687 | 1686 | l ++ [(Environment ItemLine (Str "") [])]) |
1688 | ||
1687 | ||
1689 | 1688 | {-DHUN| see documentation on evaluateItemgroup. Get the second level parse result, back to the first level parse result DHUN-} |
1690 | ||
1689 | ||
1691 | 1690 | convertFromParsingLevelTwoToLevelOne :: |
1692 | 1691 | [Anything (Anything Char)] -> [Anything Char] |
1693 | 1692 | convertFromParsingLevelTwoToLevelOne ((C x) : xs) |
1704 | 1703 | convertFromParsingLevelTwoToLevelOne (_ : xs) |
1705 | 1704 | = (convertFromParsingLevelTwoToLevelOne xs) |
1706 | 1705 | convertFromParsingLevelTwoToLevelOne [] = [] |
1707 | ||
1706 | ||
1708 | 1707 | {-DHUN| see documentation on evaluateItemgroup. Runs the parser itemParserLevelTwo on the inner part of an ItemGroup DHUN-} |
1709 | ||
1708 | ||
1710 | 1709 | runItemGroupPraserLevelTwo :: |
1711 | 1710 | GenParser (Anything Char) () [Anything (Anything Char)] -> |
1712 | 1711 | [Anything Char] -> [Anything (Anything Char)] |
1714 | 1713 | = case (parse p "" input) of |
1715 | 1714 | Left _ -> [] |
1716 | 1715 | Right x -> x |
1717 | ||
1716 | ||
1718 | 1717 | {-DHUN| this is for parsing itemization. It is implemented as two step process in the first step Itemgroup is created with ItemLine s in it. And ItemLine is a line starting with any combination of *#: . And Itemgroup is a sequence of lines of that kind. After this is done this function gets called with the content of an itemgroup. In the second step ItemStart and ItemStop elements are created, those are essentially the bracketed of the opening an closing part of the enumerations itemizations etc. . The function toEnvironmentTags does the insertion of the ItemStop and ItemStart values and remove the ItemLine values. parseAnything2 is run on it using the itemParserLevelTwo. This is turned into the parse tree by being passed to decon2 and put out of the parser monad by runItemGroupPraserLevelTwo. The resulting parse tree is one of type [Anything (Anything Char)] this needs to be but one level down to [Anything Char] this is done be convertFromParsingLevelTwoToLevelOne. The first parameter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup DHUN-} |
1719 | ||
1718 | ||
1720 | 1719 | evaluateItemgroup :: String -> [Anything Char] -> [Anything Char] |
1721 | 1720 | evaluateItemgroup s l |
1722 | 1721 | = convertFromParsingLevelTwoToLevelOne |
1729 | 1728 | (remake [itemParserLevelTwo]) |
1730 | 1729 | [])) |
1731 | 1730 | (toEnvironmentTags s l)) |
1732 | ||
1731 | ||
1733 | 1732 | {-DHUN| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is allowed to match nearly everywhere DHUN-} |
1734 | ||
1733 | ||
1735 | 1734 | itempgroupp :: MyParser Char |
1736 | 1735 | itempgroupp |
1737 | 1736 | = MyParser{bad = \ _ -> pzero, |
1748 | 1747 | notFollowedBy ((oneOf ['*', ':', ';', '#'])))), |
1749 | 1748 | allowed = [Root, Wikitable, TemplateInside, Tag], self = Itemgroup, |
1750 | 1749 | modify = \ (Str x) -> evaluateItemgroup x, reenv = id} |
1751 | ||
1750 | ||
1752 | 1751 | {-DHUN| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is only allowed to match within templates DHUN-} |
1753 | ||
1752 | ||
1754 | 1753 | itempgrouppt :: MyParser Char |
1755 | 1754 | itempgrouppt |
1756 | 1755 | = MyParser{bad = \ _ -> pzero, |
1761 | 1760 | return (Str a), |
1762 | 1761 | end = |
1763 | 1762 | \ _ -> |
1764 | (try (lookAhead (string "}}") <|> (lookAhead (string "\n">>(many (char ' ')) >>string"|"))) >> | |
1765 | return ()), | |
1763 | (try | |
1764 | (lookAhead (string "}}") <|> | |
1765 | (lookAhead (string "\n" >> (many (char ' ')) >> string "|"))) | |
1766 | >> return ()), | |
1766 | 1767 | allowed = [TemplateInside], self = Itemgroup, |
1767 | 1768 | modify = \ (Str x) -> evaluateItemgroup x, reenv = id} |
1768 | ||
1769 | ||
1769 | 1770 | {-DHUN| a parser for a preformat created by indenting with space DHUN-} |
1770 | ||
1771 | ||
1771 | 1772 | presectionp :: MyParser Char |
1772 | 1773 | presectionp |
1773 | 1774 | = MyParser{bad = \ _ -> try (string "}}") >> (return ()), |
1785 | 1786 | modify = \ _ x -> (C ' ') : x, reenv = id} |
1786 | 1787 | |
1787 | 1788 | {-DHUN| a parser for a preformat created by indenting with space withing templates DHUN-} |
1788 | ||
1789 | ||
1789 | 1790 | presectionpt :: MyParser Char |
1790 | 1791 | presectionpt |
1791 | 1792 | = MyParser{bad = \ _ -> try (string "}}") >> (return ()), |
1792 | 1793 | start = |
1793 | 1794 | \ _ -> |
1794 | 1795 | do _ <- string "\n" |
1795 | _ <- notFollowedBy ((many1 (char ' '))>>(char '|')) | |
1796 | _ <- notFollowedBy ((many1 (char ' ')) >> (char '|')) | |
1796 | 1797 | _ <- char ' ' |
1797 | 1798 | return (Str ""), |
1798 | 1799 | end = |
1799 | 1800 | \ _ -> |
1800 | 1801 | lookAhead |
1801 | 1802 | (do _ <- string "\n" |
1802 | notFollowedBy (char ' ')) , | |
1803 | notFollowedBy (char ' ')), | |
1803 | 1804 | allowed = [TemplateInside], self = SpaceIndent, |
1804 | 1805 | modify = \ _ x -> (C ' ') : x, reenv = id} |
1805 | 1806 | |
1806 | ||
1807 | 1807 | {-DHUN| a parser for a line starting with one of *:;# representing in enumeration itemization etc. DHUN-} |
1808 | ||
1808 | ||
1809 | 1809 | itemlinep :: MyParser Char |
1810 | 1810 | itemlinep |
1811 | 1811 | = MyParser{bad = \ _ -> pzero, |
1817 | 1817 | end = \ _ -> (return ()), allowed = [Itemgroup], self = ItemLine, |
1818 | 1818 | modify = \ _ x -> x, reenv = id} |
1819 | 1819 | |
1820 | ||
1821 | 1820 | {-DHUN| takes a parse tree that was created form the HTML returned by MediaWiki when being requested for the print version of a wiki page. And returns a modified version of that parse tree ready for being converted to LaTeX with treeToLaTeX3 |DHUN-} |
1822 | ||
1821 | ||
1823 | 1822 | printPrepareTree :: [Anything Char] -> [Anything Char] |
1824 | 1823 | printPrepareTree ll = concat (map printPrepareNode ll) |
1825 | where | |
1826 | printPrepareNode :: Anything Char -> [Anything Char] | |
1824 | where printPrepareNode :: Anything Char -> [Anything Char] | |
1827 | 1825 | printPrepareNode (Environment Tag (TagAttr "div" mm) l) |
1828 | 1826 | | (Map.lookup "class" mm) == (Just "thumbinner") = |
1829 | 1827 | case |
1840 | 1838 | . (filter magnpred) |
1841 | 1839 | $ tt |
1842 | 1840 | _ -> mzero |
1843 | return $ imgfun m llll (Just tt) | |
1841 | return $ imgfun m (printPrepareTree llll) (Just (printPrepareTree tt)) | |
1844 | 1842 | of |
1845 | 1843 | Just x -> x |
1846 | 1844 | _ -> printPrepareTree l |
1847 | 1845 | printPrepareNode (Environment Wikitable (TagAttr "table" m) _) |
1848 | 1846 | | (Map.lookup "class" m) == (Just "toc") = [] |
1849 | printPrepareNode (Environment Tag (TagAttr "tbody" _) x) = printPrepareTree x | |
1847 | printPrepareNode (Environment Tag (TagAttr "tbody" _) x) | |
1848 | = printPrepareTree x | |
1850 | 1849 | printPrepareNode (Environment Tag (TagAttr "div" m) _) |
1851 | 1850 | | ((Map.lookup "class" m) == (Just "toc") || |
1852 | 1851 | (Map.lookup "id" m) == (Just "toc")) |
1891 | 1890 | te <- case deepGet "div" "class" "gallerytext" l of |
1892 | 1891 | [Environment Tag (TagAttr "div" _) te] -> return te |
1893 | 1892 | _ -> mzero |
1894 | return $ imgfun mmm lll (Just te)) | |
1893 | return $ imgfun mmm (printPrepareTree lll) (Just te)) | |
1895 | 1894 | of |
1896 | 1895 | Just x -> x |
1897 | 1896 | _ -> printPrepareTree l |
1915 | 1914 | | (Map.lookup "class" m) == (Just "mw-editsection") = [] |
1916 | 1915 | printPrepareNode (Environment Tag (TagAttr "a" m) l) |
1917 | 1916 | = case (Map.lookup "class" m) of |
1918 | (Just "image") -> imgfun m l Nothing | |
1917 | (Just "image") -> imgfun m (printPrepareTree l) Nothing | |
1919 | 1918 | _ -> case (Map.lookup "class" m) of |
1920 | 1919 | (Just "external free") -> [Environment Tag (TagAttr "a" m) []] |
1921 | 1920 | _ -> [Environment Tag (TagAttr "a" m) l] |
1922 | 1921 | printPrepareNode (Environment Tag (TagAttr "div" m) _) |
1923 | 1922 | | (Map.lookup "class" m) == (Just "bodyContent") = [] |
1924 | ||
1925 | 1923 | printPrepareNode (Environment Tag (TagAttr "math" m) l) |
1926 | = case deepGet "annotation" "encoding" "application/x-tex" l of | |
1927 | [Environment Tag (TagAttr "annotation" _) x] -> [Environment Math (TagAttr "math" m) | |
1928 | (map C | |
1929 | (replace2 (replace2 (replace2 (shallowFlatten x) "&" "&") "<" "<") "gt;" | |
1930 | ">"))] | |
1931 | _ -> [] | |
1924 | = case deepGet "annotation" "encoding" "application/x-tex" l of | |
1925 | [Environment Tag (TagAttr "annotation" _) x] -> [Environment Math | |
1926 | (TagAttr "math" m) | |
1927 | (map C | |
1928 | (replace2 | |
1929 | (replace2 | |
1930 | (replace2 | |
1931 | (shallowFlatten x) | |
1932 | "&" | |
1933 | "&") | |
1934 | "<" | |
1935 | "<") | |
1936 | ">" | |
1937 | ">"))] | |
1938 | _ -> [] | |
1932 | 1939 | printPrepareNode (Environment Tag (TagAttr "img" m) l) |
1933 | | (Map.lookup "class" m) `elem` (map Just ["tex","mwe-math-fallback-png-inline tex","mwe-math-fallback-image-inline tex"]) = | |
1940 | | (Map.lookup "class" m) `elem` | |
1941 | (map Just | |
1942 | ["tex", "mwe-math-fallback-png-inline tex", | |
1943 | "mwe-math-fallback-image-inline tex"]) | |
1944 | = | |
1934 | 1945 | case Map.lookup "alt" m of |
1935 | 1946 | Just x -> [Environment Math (TagAttr "math" m) |
1936 | 1947 | (map C |
1941 | 1952 | printPrepareNode (Environment Tag (TagAttr "div" m) l) |
1942 | 1953 | = case |
1943 | 1954 | do c <- Map.lookup "class" m |
1944 | guard $ ((isInfixOf "source" c)||(isInfixOf "highlight" c)) | |
1955 | guard $ ((isInfixOf "source" c) || (isInfixOf "highlight" c)) | |
1945 | 1956 | return $ |
1946 | 1957 | Environment Source |
1947 | 1958 | (TagAttr "source" |
1953 | 1964 | printPrepareNode (Environment x y l) |
1954 | 1965 | = [Environment x y (printPrepareTree l)] |
1955 | 1966 | printPrepareNode x = [x] |
1956 | ||
1967 | ||
1957 | 1968 | mypred :: String -> Anything Char -> Bool |
1958 | 1969 | mypred x y |
1959 | 1970 | = case y of |
1960 | 1971 | (Environment Tag (TagAttr z _) _) | z == x -> True |
1961 | 1972 | _ -> False |
1962 | ||
1973 | ||
1963 | 1974 | magnpred :: Anything Char -> Bool |
1964 | 1975 | magnpred y |
1965 | 1976 | = case y of |
2020 | 2031 | Just x -> return $ [C '|'] ++ (map C (x ++ "px")) |
2021 | 2032 | Nothing -> return [] |
2022 | 2033 | return (Environment Wikilink (Str "") ((map C h) ++ w ++ t)) |
2023 | ||
2024 |
0 | 0 | {-DHUN| module storing information on which ttf font should be used for which character and fontstyle DHUN-} |
1 | 1 | module MegaFont where |
2 | 2 | import BaseFont |
3 | ||
3 | ||
4 | 4 | {-DHUN| map (written as list) storing information on which ttf font should be used for which character and fontstyle DHUN-} |
5 | ||
5 | ||
6 | 6 | megafont :: [(FontStyle, [Char])] |
7 | 7 | megafont |
8 | 8 | = [(FontStyle{stylebase = Normal, bold = False, italic = False}, |
4 | 4 | import Control.Monad.Trans.State (State) |
5 | 5 | import MediaWikiParseTree |
6 | 6 | import BaseFont |
7 | ||
7 | ||
8 | 8 | {-DHUN| a type used as mutable state while processing a table. See documentation of the TableHelper module DHUN-} |
9 | ||
9 | ||
10 | 10 | data TableState = TableState{rowCounter :: Int, |
11 | 11 | inputLastRowOfHeader :: Int, outputLastRowOfHeader :: Int, |
12 | 12 | outputTableHasHeaderRows :: Bool, |
18 | 18 | currentRowIsHeaderRow :: Bool, |
19 | 19 | lastCellWasNotFirstCellOfRow :: Bool, columnsWidthList :: [Float], |
20 | 20 | lastCellWasMultiColumn :: Bool, activeColumn :: Maybe Int} |
21 | ||
21 | ||
22 | 22 | {-DHUN| see documentation of the makeLables function in WikiHelper module DHUN-} |
23 | ||
23 | ||
24 | 24 | data UrlState = UrlState{iUrlState :: Int, sUrlState :: String, |
25 | 25 | mUrlState :: Map String String} |
26 | 26 | deriving (Show, Eq, Read) |
27 | ||
27 | ||
28 | 28 | {-DHUN| see initial value of the type UrlState DHUN-} |
29 | ||
29 | ||
30 | 30 | initialUrlState :: UrlState |
31 | 31 | initialUrlState |
32 | 32 | = UrlState{iUrlState = 0, sUrlState = "", mUrlState = Map.empty} |
33 | ||
33 | ||
34 | 34 | {-DHUN| a type used as mutable state during the course of the LaTeXRederer DHUN-} |
35 | ||
35 | ||
36 | 36 | data MyState = MyState{getImages :: [String], getJ :: Int, |
37 | 37 | getF :: Float, getC :: Int, getInTab :: Int, getInGallery :: Bool, |
38 | 38 | getInFootnote :: Bool, getInHeading :: Bool, getInCenter :: Bool, |
41 | 41 | urld :: WikiUrlData, getGalleryNumbers :: [Integer], |
42 | 42 | currentUrl :: String, fndict :: Map String [Anything Char], |
43 | 43 | tablist :: [[String]], tabmap :: Map Int (Map Int Double), |
44 | fontStack :: [FontStyle], font :: Font, langu:: Maybe String, forms::Map String Int, lastChar::Char, lastFontChanged::Bool} | |
44 | fontStack :: [FontStyle], font :: Font, langu :: Maybe String, | |
45 | forms :: Map String Int, lastChar :: Char, lastFontChanged :: Bool} | |
45 | 46 | deriving (Show, Eq) |
46 | ||
47 | ||
47 | 48 | {-DHUN| Renderer is the State monad using MyState as mutable state DHUN-} |
48 | ||
49 | ||
49 | 50 | type Renderer = State MyState |
50 | ||
51 | ||
51 | 52 | {-DHUN| the initial value for MyState DHUN-} |
52 | ||
53 | ||
53 | 54 | initialState :: MyState |
54 | 55 | initialState |
55 | 56 | = MyState{getImages = [], getJ = 1, getF = 1, getC = 1, |
61 | 62 | tabmap = Map.empty, |
62 | 63 | fontStack = |
63 | 64 | [FontStyle{stylebase = Normal, bold = False, italic = False}], |
64 | font = ComputerModernRoman, langu=Nothing, forms=Map.empty , lastChar=' ', lastFontChanged=False} | |
65 | ||
65 | font = ComputerModernRoman, langu = Nothing, forms = Map.empty, | |
66 | lastChar = ' ', lastFontChanged = False} | |
67 | ||
66 | 68 | {-DHUN| represents an URL to a wiki (not to a page thereof), which is not a sister project of wikipedia, so not wikibooks wikisource, etc. DHUN-} |
67 | ||
69 | ||
68 | 70 | data WikiBaseUrl = WikiBaseUrl{baseUrl :: String} |
69 | 71 | deriving (Show, Eq) |
70 | ||
72 | ||
71 | 73 | {-DHUN| represents an URL to a wiki (not to a page thereof), which is a sister project of wikipedia, so wikibooks wikisource, etc. DHUN-} |
72 | ||
74 | ||
73 | 75 | data WikiUrlInfo = WikiUrlInfo{language :: String, |
74 | 76 | wikitype :: String} |
75 | 77 | deriving (Show, Eq) |
76 | ||
78 | ||
77 | 79 | {-DHUN| represents an URL to a wiki (not to a page thereof), which is either a sister project of wikipedia, so wikibooks wikisource, etc. or isn't a sister project of wikipedia DHUN-} |
78 | ||
80 | ||
79 | 81 | data WikiUrlData = BaseUrl WikiBaseUrl |
80 | 82 | | UrlInfo WikiUrlInfo |
81 | 83 | deriving (Show, Eq) |
82 | ||
84 | ||
83 | 85 | {-DHUN| represents an URL to a page on a wiki DHUN-} |
84 | ||
86 | ||
85 | 87 | data WikiLinkInfo = WikiLinkInfo{urldata :: WikiUrlData, |
86 | 88 | page :: String} |
87 | 89 | deriving (Show, Eq) |
0 | 0 | {-DHUN| module for parallel computations. The paralellism is reached by forking threads with liftIO. When starting a thread and empty MVar is returned immediately. The MVar is filled on completing of the thread. Normal function can be lifted to threaded function operation on MVar of the parameters instead of the parameters themself. Thus it is possible to define dependencies of function on the return values of other functions and let the order of excecution evolove automatically, so a function is called as soon as all its parameters have been calculated. All functions considered in this module are understood to have an IO return type. This module is mainly used for paraller downloading from the web. DHUN-} |
1 | 1 | module Parallel where |
2 | 2 | import Control.Concurrent.MVar |
3 | ||
3 | ||
4 | 4 | {-DHUN| takes a function of return type IO which takes on parameter and an MVar containing at the same type as the parameter of the function and returns and IO action containing an MVar containing the same type as the tyoe contained in the IO Action returned by the function. This function retuns immediately and passed an empty MVar as return type. As soon as the MVar containing the same type as the parameter of the function is ready for reading the the function is executed in a new thread. As soon as the function return a value, the it is written into the returned MVar. DHUN-} |
5 | ||
5 | ||
6 | 6 | (<$$>) :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b) |
7 | 7 | (<$$>) vv f x |
8 | 8 | = do var <- newEmptyMVar |
12 | 12 | = do xx <- readMVar x |
13 | 13 | result <- f xx |
14 | 14 | putMVar v result |
15 | ||
15 | ||
16 | 16 | {-DHUN| takes a value and returns an IO action that contain a filled MVar conatining the value DHUN-} |
17 | ||
17 | ||
18 | 18 | base :: MVar Int -> a -> IO (MVar a) |
19 | base v x = ((ppure v). return) x | |
20 | ||
19 | base v x = ((ppure v) . return) x | |
20 | ||
21 | 21 | {-DHUN| takes an IO action and return an MVar containing the and IO action the returns an MVar containing the same type as contained by the IO action passed to this function. The IO action passed as first parameter to this function is executed in a new thread. As soon as it finishes its result is written into the returned MVar DHUN-} |
22 | ||
23 | ppure :: MVar Int -> IO a -> IO (MVar a) | |
22 | ||
23 | ppure :: MVar Int -> IO a -> IO (MVar a) | |
24 | 24 | ppure vv x |
25 | 25 | = do var <- newEmptyMVar |
26 | 26 | _ <- myFork vv (go var) |
31 | 31 | |
32 | 32 | {-DHUN| alias to switch between forkOS and forkIO for testing DHUN-} |
33 | 33 | |
34 | myFork :: MVar Int -> IO () -> IO () | |
34 | myFork :: MVar Int -> IO () -> IO () | |
35 | 35 | myFork _ x = x |
36 | 36 | |
37 | {-DHUN| takes a function which takes a list and return an IO action as first parameter. It takes a list of MVar containing the same type as the type contained in the list as second parameter. It returnes an empty MVar immediately. As soon as all MVar in the list given as second parameter could be read, the function is started in a new thread. As soon as it finishes, the returend MVar is filled with the value returend by the function DHUN-} | |
37 | 38 | |
38 | ||
39 | {- | |
40 | do i<-takeMVar v | |
41 | if i<1 | |
42 | then do putMVar v $!! (i+1) | |
43 | forkIO $ do x | |
44 | ii<-takeMVar v | |
45 | putMVar v $!! (ii-1) | |
46 | else do putMVar v $!! i | |
47 | threadDelay 1000 | |
48 | myFork v x | |
49 | ||
50 | -} | |
51 | {-DHUN| takes a function which takes a list and return an IO action as first parameter. It takes a list of MVar containing the same type as the type contained in the list as second parameter. It returnes an empty MVar immediately. As soon as all MVar in the list given as second parameter could be read, the function is started in a new thread. As soon as it finishes, the returend MVar is filled with the value returend by the function DHUN-} | |
52 | ||
53 | 39 | liftList :: MVar Int -> ([a] -> IO b) -> [MVar a] -> IO (MVar b) |
54 | 40 | liftList vv f x |
55 | 41 | = do var <- newEmptyMVar |
59 | 45 | = do xx <- mapM readMVar x |
60 | 46 | result <- f xx |
61 | 47 | putMVar v result |
62 | ||
48 | ||
63 | 49 | {-DHUN| the same as liftList. Only difference is that the seconds parameter is not a list of MVars containing values but an MVar containing a list of MVars containing values. So an addinal step is made to read the out MVar DHUN-} |
64 | ||
65 | liftList2 :: MVar Int -> ([a] -> IO b) -> MVar [MVar a] -> IO (MVar b) | |
50 | ||
51 | liftList2 :: | |
52 | MVar Int -> ([a] -> IO b) -> MVar [MVar a] -> IO (MVar b) | |
66 | 53 | liftList2 vv f x |
67 | 54 | = do var <- newEmptyMVar |
68 | 55 | _ <- myFork vv (go var) |
72 | 59 | xx <- mapM readMVar xxx |
73 | 60 | result <- f xx |
74 | 61 | putMVar v result |
75 | ||
62 | ||
76 | 63 | {-DHUN| prefix version of the angle bracked dollar operator described above DHUN-} |
77 | ||
78 | liftA :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b) | |
64 | ||
65 | liftA :: MVar Int -> (a -> IO b) -> MVar a -> IO (MVar b) | |
79 | 66 | liftA v f x = (<$$>) v f x |
80 | ||
67 | ||
81 | 68 | {-DHUN| same as liftA. Just function must have exactly two parameter instead of only one DHUN-} |
82 | ||
83 | liftA2 :: MVar Int -> (a -> b -> IO c) -> MVar a -> MVar b -> IO (MVar c) | |
69 | ||
70 | liftA2 :: | |
71 | MVar Int -> (a -> b -> IO c) -> MVar a -> MVar b -> IO (MVar c) | |
84 | 72 | liftA2 vv f x y |
85 | 73 | = do var <- newEmptyMVar |
86 | 74 | _ <- myFork vv (go var) |
90 | 78 | yy <- readMVar y |
91 | 79 | result <- f xx yy |
92 | 80 | putMVar v result |
93 | ||
81 | ||
94 | 82 | {-DHUN| same as liftA. Just function must have exactly three parameter instead of only one DHUN-} |
95 | ||
83 | ||
96 | 84 | liftA3 :: |
97 | MVar Int -> (a -> b -> c -> IO d) -> MVar a -> MVar b -> MVar c -> IO (MVar d) | |
85 | MVar Int -> | |
86 | (a -> b -> c -> IO d) -> MVar a -> MVar b -> MVar c -> IO (MVar d) | |
98 | 87 | liftA3 vv f x y z |
99 | 88 | = do var <- newEmptyMVar |
100 | 89 | _ <- myFork vv (go var) |
0 | ||
0 | 1 | module SelfTest where |
1 | runSelfTest::Integer->Integer->IO () | |
2 | ||
3 | runSelfTest :: Integer -> Integer -> IO () | |
2 | 4 | runSelfTest _ _ = return () |
8 | 8 | import System.IO |
9 | 9 | import Control.Monad.Trans |
10 | 10 | import Data.ByteString.UTF8 |
11 | import Data.ByteString.Lazy hiding (pack, reverse, takeWhile, dropWhile) | |
11 | import Data.ByteString.Lazy | |
12 | hiding (pack, reverse, takeWhile, dropWhile) | |
12 | 13 | import Control.Concurrent |
13 | 14 | import Control.Monad.State |
14 | 15 | import ImperativeState hiding (name) |
15 | 16 | import Hex |
16 | 17 | import Data.Map.Strict |
17 | import Data.Maybe | |
18 | import Data.Maybe | |
18 | 19 | import System.Process hiding (cwd) |
19 | 20 | import Data.List |
20 | 21 | import Text.Blaze.Internal |
23 | 24 | import Control.DeepSeq |
24 | 25 | import System.Exit |
25 | 26 | |
26 | mytext ::String-> H.Html | |
27 | mytext :: String -> H.Html | |
27 | 28 | mytext = text . pack |
28 | 29 | |
29 | pageFrame :: H.Html-> H.Html | |
30 | pageFrame input = H.table H.! A.style "min-width:100%;border:0px;padding:0;border-spacing:0px 0px;" $ | |
31 | do H.tr H.! A.style "min-width:100%;border:0px;padding:0" $ | |
32 | do H.td H.! A.style "background-color:#444444" H.! A.colspan "3" $ | |
33 | do H.div H.! A.style "color:white;font-size:60;border:20px;font-family:Arial,Helvetica,sans-serif;" $ H.b $ | |
34 | do mytext "MediaWiki2LaTeX" | |
35 | H.tr H.! A.style "min-width:100%;border:0px;padding:0" $ | |
36 | do H.td H.! A.style "background-color:#222222" H.! A.colspan "3" $ H.div H.! A.style "padding: 5px;padding-left: 10px" $ H.div H.! A.style "padding:3px;display:inline;background-color:#595959;border-radius:3px" $ H.div H.! A.style "font-family: times, serif;display:inline;font-size:20;color:#cccccc" $ "Home" | |
37 | H.tr $ | |
38 | do H.td $ H.table H.! A.style "padding:20px" $ | |
39 | do H.tr $ infoBox "MediaWiki to LaTeX" "MediaWiki to LaTeX converts Wiki pages to LaTeX and PDF. It works with any website running MediaWiki, especially Wikipedia and Wikibooks. MediaWiki to LaTeX is purely written in the purely functional language Haskell. It was mainly devolved by Dirk Hünniger. The source code is freely available under the terms of the GNU General Public License. Binary releases for the most common operating systems are available for download. The Debian package is maintained by Georges Khaznadar." | |
40 | H.tr $ infoBox "Contact" $ | |
41 | do mytext "Dirk Hünniger" | |
42 | H.br | |
43 | mytext "Emil Schweitzer Str. S 10" | |
44 | H.br | |
45 | mytext "D-47506 Neukirchen Vluyn" | |
46 | H.br | |
47 | mytext "Germany" | |
48 | H.br | |
49 | mytext "Telephone ++49-2845-3799993" | |
50 | H.br | |
51 | H.a H.! A.href "mailto:dirk.hunniger@googlemail.com?Subject=MediaWiki2LaTeX" $ "dirk.hunniger@googlemail.com" | |
52 | input | |
53 | H.td $ H.table H.! A.style "padding:20px" $ | |
54 | do H.tr $ infoBox "Documentation and Links" $ | |
55 | do H.ul $ | |
56 | do H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf/manual" $ "Users Manual" | |
57 | H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" $ "Project Wiki Page" | |
58 | H.li $ H.div H.! A.style "font-size:x-large" $ do | |
59 | H.b $ H.a H.! A.href "https://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" $ "Download Full Version for Free" | |
60 | ||
61 | H.li $ H.a H.! A.href "http://sourceforge.net/p/wb2pdf/git/ci/master/tree/" $ "Git Sourcecode Repository" | |
62 | H.li $ H.a H.! A.href "http://de.wikibooks.org/wiki/Benutzer_Diskussion:Dirk_Huenniger/wb2pdf/Requests" $ "Bug Tracker" | |
63 | H.tr $ infoBox "Web Interface" $ | |
64 | do mytext "The web interface was artistically designed by Thomas Blume using " | |
65 | H.a H.! A.href "http://www.yaml.org/" $ "YAML" | |
66 | mytext ". It was technically implemented by Dirk Hünniger using the Blaze Html Framework in Haskell. Important ideas on the design of the user interface were contributed by Georges Khaznadar and Martin Wermers. The server this interface is running on is run by a private person, who has to cover the costs, so " | |
67 | H.b "please consider downloading the full version for free" | |
68 | mytext " and running at on your own hardware instead of using this server." | |
69 | ||
70 | ||
71 | ||
72 | infoBox :: String->H.Html->H.Html | |
73 | infoBox heading content = H.td $ H.div H.! A.style "padding:10px" $H.div H.! A.style "text-align:jusitfy;text-justify:inter-word;padding:20px;border-color:#aaaaaa;background-color:#f4f4f4;border-radius:5px;border-width:1px;border-style:solid;font-family:Arial,Helvetica,sans-serif" $ | |
74 | do H.div H.! A.style "font-size:30" $ H.b ( mytext heading) | |
75 | H.br | |
76 | H.div H.! A.style "text-align:jusitfy" $content | |
77 | ||
78 | ||
79 | ||
80 | data ProgressInfo = ProgressInfo { progress :: Double, filename:: Maybe String , startTime::Double, barValue::Double, lastStepTime::Double, lastRuntime::Double, extension::String, failed::Bool} | |
81 | ||
82 | ||
83 | instance NFData ProgressInfo | |
84 | where | |
85 | rnf a = a `seq` () | |
30 | pageFrame :: H.Html -> H.Html | |
31 | pageFrame input | |
32 | = H.table H.! | |
33 | A.style | |
34 | "min-width:100%;border:0px;padding:0;border-spacing:0px 0px;" | |
35 | $ | |
36 | do H.tr H.! A.style "min-width:100%;border:0px;padding:0" $ | |
37 | do H.td H.! A.style "background-color:#444444" H.! A.colspan "3" $ | |
38 | do H.div H.! | |
39 | A.style | |
40 | "color:white;font-size:60;border:20px;font-family:Arial,Helvetica,sans-serif;" | |
41 | $ H.b $ do mytext "MediaWiki2LaTeX" | |
42 | H.tr H.! A.style "min-width:100%;border:0px;padding:0" $ | |
43 | do H.td H.! A.style "background-color:#222222" H.! A.colspan "3" $ | |
44 | H.div H.! A.style "padding: 5px;padding-left: 10px" $ | |
45 | H.div H.! | |
46 | A.style | |
47 | "padding:3px;display:inline;background-color:#595959;border-radius:3px" | |
48 | $ | |
49 | H.div H.! | |
50 | A.style | |
51 | "font-family: times, serif;display:inline;font-size:20;color:#cccccc" | |
52 | $ "Home" | |
53 | H.tr $ | |
54 | do H.td $ | |
55 | H.table H.! A.style "padding:20px" $ | |
56 | do H.tr $ | |
57 | infoBox "MediaWiki to LaTeX" | |
58 | "MediaWiki to LaTeX converts Wiki pages to LaTeX and PDF. It works with any website running MediaWiki, especially Wikipedia and Wikibooks. MediaWiki to LaTeX is purely written in the purely functional language Haskell. It was mainly devolved by Dirk H\252nniger. The source code is freely available under the terms of the GNU General Public License. Binary releases for the most common operating systems are available for download. The Debian package is maintained by Georges Khaznadar." | |
59 | H.tr $ | |
60 | infoBox "Contact" $ | |
61 | do mytext "Dirk H\252nniger" | |
62 | H.br | |
63 | mytext "Emil Schweitzer Str. S 10" | |
64 | H.br | |
65 | mytext "D-47506 Neukirchen Vluyn" | |
66 | H.br | |
67 | mytext "Germany" | |
68 | H.br | |
69 | mytext "Telephone ++49-2845-3799993" | |
70 | H.br | |
71 | H.a H.! | |
72 | A.href | |
73 | "mailto:dirk.hunniger@googlemail.com?Subject=MediaWiki2LaTeX" | |
74 | $ "dirk.hunniger@googlemail.com" | |
75 | input | |
76 | H.td $ | |
77 | H.table H.! A.style "padding:20px" $ | |
78 | do H.tr $ | |
79 | infoBox "Documentation and Links" $ | |
80 | do H.ul $ | |
81 | do H.li $ | |
82 | H.a H.! | |
83 | A.href | |
84 | "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf/manual" | |
85 | $ "Users Manual" | |
86 | H.li $ | |
87 | H.a H.! | |
88 | A.href | |
89 | "http://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" | |
90 | $ "Project Wiki Page" | |
91 | H.li $ | |
92 | H.div H.! A.style "font-size:x-large" $ | |
93 | do H.b $ | |
94 | H.a H.! | |
95 | A.href | |
96 | "https://de.wikibooks.org/wiki/Benutzer:Dirk_Huenniger/wb2pdf" | |
97 | $ "Download Full Version for Free" | |
98 | H.li $ | |
99 | H.a H.! | |
100 | A.href "http://sourceforge.net/p/wb2pdf/git/ci/master/tree/" | |
101 | $ "Git Sourcecode Repository" | |
102 | H.li $ | |
103 | H.a H.! | |
104 | A.href | |
105 | "http://de.wikibooks.org/wiki/Benutzer_Diskussion:Dirk_Huenniger/wb2pdf/Requests" | |
106 | $ "Bug Tracker" | |
107 | H.tr $ | |
108 | infoBox "Web Interface" $ | |
109 | do mytext | |
110 | "The web interface was artistically designed by Thomas Blume using " | |
111 | H.a H.! A.href "http://www.yaml.org/" $ "YAML" | |
112 | mytext | |
113 | ". It was technically implemented by Dirk H\252nniger using the Blaze Html Framework in Haskell. Important ideas on the design of the user interface were contributed by Georges Khaznadar and Martin Wermers. The server this interface is running on is run by a WMF, who have to cover the costs, so " | |
114 | H.b "please consider downloading the full version for free" | |
115 | mytext | |
116 | " and running at on your own hardware instead of using this server." | |
117 | ||
118 | infoBox :: String -> H.Html -> H.Html | |
119 | infoBox heading content | |
120 | = H.td $ | |
121 | H.div H.! A.style "padding:10px" $ | |
122 | H.div H.! | |
123 | A.style | |
124 | "text-align:jusitfy;text-justify:inter-word;padding:20px;border-color:#aaaaaa;background-color:#f4f4f4;border-radius:5px;border-width:1px;border-style:solid;font-family:Arial,Helvetica,sans-serif" | |
125 | $ | |
126 | do H.div H.! A.style "font-size:30" $ H.b (mytext heading) | |
127 | H.br | |
128 | H.div H.! A.style "text-align:jusitfy" $ content | |
129 | ||
130 | data ProgressInfo = ProgressInfo{progress :: Double, | |
131 | filename :: Maybe String, startTime :: Double, barValue :: Double, | |
132 | lastStepTime :: Double, lastRuntime :: Double, extension :: String, | |
133 | failed :: Bool} | |
134 | ||
135 | instance NFData ProgressInfo where | |
136 | rnf a = a `seq` () | |
86 | 137 | |
87 | 138 | {-DHUN| IO action to run the server DHUN-} |
88 | ||
139 | ||
89 | 140 | serve :: Int -> IO () |
90 | serve p = do a<-newMVar Data.Map.Strict.empty | |
91 | simpleHTTP nullConf{port = p, timeout = 100000} $ msum [dirs "progress" $ path $ \subject -> progressBar a subject, dirs "file" $ path $ \subject -> fileFrame a subject, dirs "fill" $ path $ \subject -> formPage a subject, formPage a ""] | |
92 | ||
93 | ||
94 | ||
141 | serve p | |
142 | = do a <- newMVar Data.Map.Strict.empty | |
143 | simpleHTTP nullConf{port = p, timeout = 100000} $ | |
144 | msum | |
145 | [dirs "progress" $ path $ \ subject -> progressBar a subject, | |
146 | dirs "file" $ path $ \ subject -> fileFrame a subject, | |
147 | dirs "fill" $ path $ \ subject -> formPage a subject, | |
148 | formPage a ""] | |
95 | 149 | |
96 | 150 | {-DHUN| template for the start page of the server DHUN-} |
97 | ||
151 | ||
98 | 152 | template :: Text -> H.Html -> Response |
99 | 153 | template title body |
100 | = toResponse $ H.docTypeHtml $ | |
101 | do H.head $ do H.meta H.! A.charset "utf-8" | |
102 | H.title (H.toHtml title) | |
154 | = toResponse $ | |
155 | H.docTypeHtml $ | |
156 | do H.head $ | |
157 | do H.meta H.! A.charset "utf-8" | |
158 | H.title (H.toHtml title) | |
103 | 159 | H.body $ do body |
104 | ||
160 | ||
105 | 161 | {-DHUN| takes an url to a wiki article and a filename for the temporary file to be created and return a shell command to run mediawiki2latex to compile a pdf document from the given url and write it to the given temporary filename DHUN-} |
106 | ||
162 | ||
107 | 163 | mainAction :: FullConfig -> IO String |
108 | 164 | mainAction oldcfg |
109 | 165 | = do cwd <- getCurrentDirectory |
110 | let cfg = oldcfg {mainPath = cwd} | |
166 | let cfg = oldcfg{mainPath = cwd} | |
111 | 167 | return (hex (show cfg)) |
112 | ||
168 | ||
113 | 169 | {-DHUN| main webpage of the server containing a from with an entry for an url to a wiki article with returns the pdf on submission of the form DHUN-} |
114 | ||
170 | ||
115 | 171 | gogo :: Eq b => [(a, b)] -> b -> b |
116 | gogo ((_,v):xs) vv = if vv==v then | |
117 | case xs of | |
118 | (h:_)->(snd h) | |
119 | _->v | |
120 | else | |
121 | gogo xs vv | |
172 | gogo ((_, v) : xs) vv | |
173 | = if vv == v then | |
174 | case xs of | |
175 | (h : _) -> (snd h) | |
176 | _ -> v | |
177 | else gogo xs vv | |
122 | 178 | gogo [] vv = vv |
123 | 179 | |
124 | progressBar :: MVar (Map Int ProgressInfo) -> String->ServerPart Response | |
180 | progressBar :: | |
181 | MVar (Map Int ProgressInfo) -> String -> ServerPart Response | |
125 | 182 | progressBar t sub |
126 | = do let theIndex=case (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int,String)] of | |
127 | ((k,_):_)->k | |
128 | _ -> 1 | |
129 | m<-liftIO (takeMVar t) | |
130 | let uu = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m)) | |
131 | let nextKnownRelativeProgressToBeReached = gogo mylist (progress uu) | |
132 | let lastReachedKnownRelativeProgress= progress uu | |
133 | let timeOfLastReachedKnownRelativeProgress= lastStepTime uu | |
134 | ttime <- liftIO getPOSIXTime | |
135 | let time= (realToFrac ttime) | |
136 | let expectedRuntime=if lastReachedKnownRelativeProgress<0.01 then 60.0 else (time-(startTime uu))/lastReachedKnownRelativeProgress | |
137 | let runtime=time-startTime uu | |
138 | let p=lastReachedKnownRelativeProgress+(1-exp(-(time-timeOfLastReachedKnownRelativeProgress)/(expectedRuntime*(nextKnownRelativeProgressToBeReached-lastReachedKnownRelativeProgress))))*(nextKnownRelativeProgressToBeReached-lastReachedKnownRelativeProgress) | |
139 | let oldProgressBarValue = barValue uu | |
140 | let progressBarValue=oldProgressBarValue+(max((p-oldProgressBarValue)*(runtime-(lastRuntime uu))/5.0) 0.0) | |
141 | let prog=if lastReachedKnownRelativeProgress==1.0 then 1000 else if failed uu then 0 else round(progressBarValue*1000.0)::Integer | |
142 | liftIO $ if not (member theIndex m) then putMVar t m else putMVar t $!! (Data.Map.Strict.insert theIndex uu{barValue=progressBarValue,lastRuntime=runtime} m ) | |
143 | case filename uu of | |
144 | Nothing -> do method GET | |
145 | ok $ | |
146 | template "Converting" $ pageFrame $ infoBox (if not (member theIndex m) then "Not enough resources availiable to process your request! Your request has been dropped! Please download the full version for free and run it on your own computer!" else (if not (failed uu) then"Conversion Running" else "Conversion Failed due to timeout or non zero exit code")) $ | |
147 | do H.meta H.! A.httpEquiv "refresh" H.! A.content "1" | |
148 | H.table $ | |
149 | do H.tr $ | |
150 | do H.td $ | |
151 | do H.progress H.! A.style wwidth H.! A.value (stringValue (show (prog))) H.! A.max "1000" $ "" | |
152 | Just _ -> do method GET | |
153 | ok $ | |
154 | template "Conversion Finished" $ pageFrame $ infoBox "Conversion Finished" $ | |
155 | do H.meta H.! A.httpEquiv "refresh" H.! A.content (stringValue ("0;url=/file/"++(show theIndex)++"."++(extension uu))) | |
156 | H.table $ | |
157 | do H.tr $ | |
158 | do H.td $ | |
159 | do H.progress H.! A.style wwidth H.! A.value (stringValue (show (1000::Integer))) H.! A.max "1000" $ "" | |
160 | ||
161 | ||
162 | fileFrame :: MVar (Map Int ProgressInfo) -> String->ServerPart Response | |
163 | fileFrame t sub | |
164 | = do let theIndex=case (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int,String)] of | |
165 | ((k,_):_)->k | |
166 | _ -> 1 | |
167 | m<-liftIO (takeMVar t) | |
168 | liftIO $ putMVar t m | |
169 | let uu = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m)) | |
170 | case filename uu of | |
171 | Just x-> do f <- serveFile (guessContentTypeM mimeTypes) x | |
172 | _ <- liftIO (forkIO (do threadDelay 2000000 | |
173 | removeFile x)) | |
174 | return f | |
175 | Nothing -> do method GET | |
176 | ok $ | |
177 | template "Conversion Failed" $ pageFrame $ infoBox "Conversion Failed" $ (mytext "We are sorry the converion failed, please contact our us") | |
178 | ||
183 | = do let theIndex | |
184 | = case | |
185 | (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int, String)] of | |
186 | ((k, _) : _) -> k | |
187 | _ -> 1 | |
188 | m <- liftIO (takeMVar t) | |
189 | let uu | |
190 | = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m)) | |
191 | let nextKnownRelativeProgressToBeReached | |
192 | = gogo mylist (progress uu) | |
193 | let lastReachedKnownRelativeProgress = progress uu | |
194 | let timeOfLastReachedKnownRelativeProgress = lastStepTime uu | |
195 | ttime <- liftIO getPOSIXTime | |
196 | let time = (realToFrac ttime) | |
197 | let expectedRuntime | |
198 | = if lastReachedKnownRelativeProgress < 1.0e-2 then 60.0 else | |
199 | (time - (startTime uu)) / lastReachedKnownRelativeProgress | |
200 | let runtime = time - startTime uu | |
201 | let p = lastReachedKnownRelativeProgress + | |
202 | (1 - | |
203 | exp | |
204 | (-(time - timeOfLastReachedKnownRelativeProgress) / | |
205 | (expectedRuntime * | |
206 | (nextKnownRelativeProgressToBeReached - | |
207 | lastReachedKnownRelativeProgress)))) | |
208 | * | |
209 | (nextKnownRelativeProgressToBeReached - | |
210 | lastReachedKnownRelativeProgress) | |
211 | let oldProgressBarValue = barValue uu | |
212 | let progressBarValue | |
213 | = oldProgressBarValue + | |
214 | (max | |
215 | ((p - oldProgressBarValue) * (runtime - (lastRuntime uu)) / 5.0) | |
216 | 0.0) | |
217 | let prog | |
218 | = if lastReachedKnownRelativeProgress == 1.0 then 1000 else | |
219 | if failed uu then 0 else | |
220 | round (progressBarValue * 1000.0) :: Integer | |
221 | liftIO $ | |
222 | if not (member theIndex m) then putMVar t m else | |
223 | putMVar t $!! | |
224 | (Data.Map.Strict.insert theIndex | |
225 | uu{barValue = progressBarValue, lastRuntime = runtime} | |
226 | m) | |
227 | case filename uu of | |
228 | Nothing -> do method GET | |
229 | ok $ | |
230 | template "Converting" $ | |
231 | pageFrame $ | |
232 | infoBox | |
233 | (if not (member theIndex m) then | |
234 | "Not enough resources availiable to process your request! Your request has been dropped! Please download the full version for free and run it on your own computer!" | |
235 | else | |
236 | (if not (failed uu) then "Conversion Running" else | |
237 | "Conversion Failed due to timeout or non zero exit code")) | |
238 | $ | |
239 | do H.meta H.! A.httpEquiv "refresh" H.! A.content "1" | |
240 | H.table $ | |
241 | do H.tr $ | |
242 | do H.td $ | |
243 | do H.progress H.! A.style wwidth H.! | |
244 | A.value (stringValue (show (prog))) | |
245 | H.! A.max "1000" | |
246 | $ "" | |
247 | Just _ -> do method GET | |
248 | ok $ | |
249 | template "Conversion Finished" $ | |
250 | pageFrame $ | |
251 | infoBox "Conversion Finished" $ | |
252 | do H.meta H.! A.httpEquiv "refresh" H.! | |
253 | A.content | |
254 | (stringValue | |
255 | ("0;url=/file/" ++ | |
256 | (show theIndex) ++ "." ++ (extension uu))) | |
257 | H.table $ | |
258 | do H.tr $ | |
259 | do H.td $ | |
260 | do H.progress H.! A.style wwidth H.! | |
261 | A.value (stringValue (show (1000 :: Integer))) | |
262 | H.! A.max "1000" | |
263 | $ "" | |
264 | ||
265 | fileFrame :: | |
266 | MVar (Map Int ProgressInfo) -> String -> ServerPart Response | |
267 | fileFrame t sub | |
268 | = do let theIndex | |
269 | = case | |
270 | (reads (Data.List.takeWhile (/= '.') sub)) :: [(Int, String)] of | |
271 | ((k, _) : _) -> k | |
272 | _ -> 1 | |
273 | m <- liftIO (takeMVar t) | |
274 | liftIO $ putMVar t m | |
275 | let uu | |
276 | = (fromMaybe progressInfoZero (Data.Map.Strict.lookup theIndex m)) | |
277 | case filename uu of | |
278 | Just x -> do f <- serveFile (guessContentTypeM mimeTypes) x | |
279 | _ <- liftIO | |
280 | (forkIO | |
281 | (do threadDelay 2000000 | |
282 | removeFile x)) | |
283 | return f | |
284 | Nothing -> do method GET | |
285 | ok $ | |
286 | template "Conversion Failed" $ | |
287 | pageFrame $ | |
288 | infoBox "Conversion Failed" $ | |
289 | (mytext "We are sorry the converion failed, please contact our us") | |
179 | 290 | |
180 | 291 | currentlyrunning :: Map Int ProgressInfo -> Bool |
181 | currentlyrunning m = not (Data.List.all (\x->(isJust (filename x))||(failed x)) (Data.Map.Strict.elems m)) | |
182 | ||
292 | currentlyrunning m | |
293 | = not | |
294 | (Data.List.all (\ x -> (isJust (filename x)) || (failed x)) | |
295 | (Data.Map.Strict.elems m)) | |
296 | ||
183 | 297 | wwidth2 :: [Char] |
184 | wwidth2="width:400px" | |
298 | wwidth2 = "width:400px" | |
299 | ||
185 | 300 | wwidth :: AttributeValue |
186 | wwidth=stringValue wwidth2 | |
187 | formPage :: MVar (Map Int ProgressInfo) -> String -> ServerPart Response | |
301 | wwidth = stringValue wwidth2 | |
302 | ||
303 | formPage :: | |
304 | MVar (Map Int ProgressInfo) -> String -> ServerPart Response | |
188 | 305 | formPage m s |
189 | 306 | = do decodeBody (defaultBodyPolicy "/tmp/" 0 1000000 1000000) |
190 | 307 | msum [viewForm, processForm] |
191 | where | |
192 | viewForm :: ServerPart Response | |
308 | where viewForm :: ServerPart Response | |
193 | 309 | viewForm |
194 | 310 | = do method GET |
195 | 311 | ok $ |
196 | template "MediaWiki2LaTeX" $ pageFrame $ infoBox "Create Your PDF" $ | |
197 | do mytext "To compile MediaWiki pages via LaTeX to PDF choose any URL from " | |
198 | H.a H.! A.href "http://en.wikipedia.org/" $ "Wikibooks" | |
199 | mytext " or any other website running MediaWiki. If you intent to compile a wikibook make sure you use the link to the printable version of the book." | |
200 | H.br | |
201 | H.div H.! A.style "font-size:20" $ H.b $ mytext "Send your request" | |
202 | H.form H.! A.action "/form" H.! A.enctype "multipart/form-data" H.! | |
203 | A.method "POST" | |
204 | $ H.div $ | |
205 | do | |
206 | H.table $ | |
207 | do H.tr $ | |
208 | do H.td "URL to the Wiki to be converted" | |
209 | H.td $ | |
210 | do H.input H.! A.style wwidth H.! A.type_ "text" H.! A.id "msg" H.! A.name "msg" H.! A.value (stringValue s) | |
211 | H.tr $ | |
212 | do H.td "Output Format" | |
213 | H.td $ do | |
214 | H.select H.! A.style wwidth H.! A.name "output" $ | |
215 | do H.option H.! A.value "pdf" $ "Compiled PDF" | |
216 | H.option H.! A.value "zip" $ "Source ZIP" | |
217 | H.option H.! A.value "epub" $ "EPUB File" | |
218 | H.option H.! A.value "odt" $ "ODT File (Word Processor)" | |
219 | H.tr $ | |
220 | do H.td "Template expansion" | |
221 | H.td $ do | |
222 | H.select H.! A.style wwidth H.! A.name "expansion" $ | |
223 | do H.option H.! A.value "Print" $ "Print" | |
224 | H.option H.! A.value "BookMode" $ "BookMode" | |
225 | H.option H.! A.value "MediaWiki" $ "Mediawiki" | |
226 | H.option H.! A.value "Normal" $ "Normal" | |
227 | H.tr $ | |
228 | do H.td "Paper" | |
229 | H.td $ do | |
230 | H.select H.! A.style wwidth H.! A.name "paper" $ | |
231 | do H.option H.! A.value "A4" $ "A4" | |
232 | H.option H.! A.value "A5" $ "A5" | |
233 | H.option H.! A.value "B5" $ "B5" | |
234 | H.option H.! A.value "letter" $ "letter" | |
235 | H.option H.! A.value "legal" $ "legal" | |
236 | H.option H.! A.value "executive" $ "executive" | |
237 | H.tr $ | |
238 | do H.td "Vector graphics" | |
239 | H.td $ do | |
240 | H.select H.! A.style wwidth H.! A.name "vector" $ | |
241 | do H.option H.! A.value "Rasterize" $ "Rasterize" | |
242 | H.option H.! A.value "Keep Vector Form" $ "Keep Vector Form" | |
243 | H.tr $ | |
244 | do H.td "" | |
245 | H.td $ | |
246 | do H.input H.! A.style (stringValue (wwidth2++";height:60px")) H.! A.type_ "submit" H.! A.value "Start!" | |
247 | H.div H.! A.style "text-decoration:underline" $ mytext "Please note:" | |
248 | H.br | |
249 | mytext "The LaTeX source code will be compiled several times to make sure all references are resolved. The whole process will usually take about one minute but can take up to an hour depending on the extend of your request." | |
250 | ||
251 | ||
252 | ||
312 | template "MediaWiki2LaTeX" $ | |
313 | pageFrame $ | |
314 | infoBox "Create Your PDF" $ | |
315 | do mytext | |
316 | "To compile MediaWiki pages via LaTeX to PDF choose any URL from " | |
317 | H.a H.! A.href "http://en.wikipedia.org/" $ "Wikibooks" | |
318 | mytext | |
319 | " or any other website running MediaWiki. If you intent to compile a wikibook make sure you use the link to the printable version of the book." | |
320 | H.br | |
321 | H.div H.! A.style "font-size:20" $ H.b $ mytext "Send your request" | |
322 | H.form H.! A.action "/form" H.! A.enctype "multipart/form-data" H.! | |
323 | A.method "POST" | |
324 | $ | |
325 | H.div $ | |
326 | do H.table $ | |
327 | do H.tr $ | |
328 | do H.td "URL to the Wiki to be converted" | |
329 | H.td $ | |
330 | do H.input H.! A.style wwidth H.! A.type_ "text" H.! | |
331 | A.id "msg" | |
332 | H.! A.name "msg" | |
333 | H.! A.value (stringValue s) | |
334 | H.tr $ | |
335 | do H.td "Output Format" | |
336 | H.td $ | |
337 | do H.select H.! A.style wwidth H.! A.name "output" $ | |
338 | do H.option H.! A.value "pdf" $ "Compiled PDF" | |
339 | H.option H.! A.value "zip" $ "Source ZIP" | |
340 | H.option H.! A.value "epub" $ "EPUB File" | |
341 | H.option H.! A.value "odt" $ | |
342 | "ODT File (Word Processor)" | |
343 | H.tr $ | |
344 | do H.td "Template expansion" | |
345 | H.td $ | |
346 | do H.select H.! A.style wwidth H.! A.name "expansion" $ | |
347 | do H.option H.! A.value "Print" $ "Print" | |
348 | H.option H.! A.value "BookMode" $ "BookMode" | |
349 | H.option H.! A.value "MediaWiki" $ "Mediawiki" | |
350 | H.option H.! A.value "Normal" $ "Normal" | |
351 | H.tr $ | |
352 | do H.td "Paper" | |
353 | H.td $ | |
354 | do H.select H.! A.style wwidth H.! A.name "paper" $ | |
355 | do H.option H.! A.value "A4" $ "A4" | |
356 | H.option H.! A.value "A5" $ "A5" | |
357 | H.option H.! A.value "B5" $ "B5" | |
358 | H.option H.! A.value "letter" $ "letter" | |
359 | H.option H.! A.value "legal" $ "legal" | |
360 | H.option H.! A.value "executive" $ "executive" | |
361 | H.tr $ | |
362 | do H.td "Vector graphics" | |
363 | H.td $ | |
364 | do H.select H.! A.style wwidth H.! A.name "vector" $ | |
365 | do H.option H.! A.value "Rasterize" $ "Rasterize" | |
366 | H.option H.! A.value "Keep Vector Form" $ | |
367 | "Keep Vector Form" | |
368 | H.tr $ | |
369 | do H.td "" | |
370 | H.td $ | |
371 | do H.input H.! | |
372 | A.style (stringValue (wwidth2 ++ ";height:60px")) | |
373 | H.! A.type_ "submit" | |
374 | H.! A.value "Start!" | |
375 | H.div H.! A.style "text-decoration:underline" $ | |
376 | mytext "Please note:" | |
377 | H.br | |
378 | do mytext "The LaTeX source code will be compiled several times to make sure all references are resolved. The whole process will usually take about one minute." | |
379 | H.br | |
380 | H.br | |
381 | H.div H.! A.style "font-size:large" $ H.b "There is a time limit of one hour (≈ 200 pages in PDF) on this server!" | |
382 | H.br | |
383 | mytext "Requests taking longer will be terminated and a \"Conversion Failed due to timeout or non zero exit code\" message will be displayed. There is no limit in the downloadable versions shown on the right." | |
384 | ||
253 | 385 | getRunmode "Print" = ImperativeState.HTML |
254 | getRunmode "MediaWiki" = ImperativeState.ExpandedTemplates | |
386 | getRunmode "MediaWiki" = ImperativeState.ExpandedTemplates | |
255 | 387 | getRunmode "Normal" = ImperativeState.StandardTemplates |
256 | 388 | getRunmode "BookMode" = ImperativeState.Book |
257 | 389 | getRunmode _ = ImperativeState.HTML |
258 | ||
390 | ||
259 | 391 | processForm :: ServerPart Response |
260 | 392 | processForm |
261 | 393 | = do msg <- lookBS "msg" |
264 | 396 | expansion <- lookBS "expansion" |
265 | 397 | output <- lookBS "output" |
266 | 398 | zzz <- liftIO $ |
267 | do tmpDir <- getTemporaryDirectory | |
268 | (name, handle) <- openTempFile tmpDir ("MediaWiki2LaTeX"++if (toString (toStrict output))== "zip" then "zip" else if (toString (toStrict output))== "epub" then "epub" else if (toString (toStrict output))== "odt" then "odt" else "pdf") | |
269 | hClose handle >> removeFile name | |
270 | act <- mainAction FullConfig{selfTest=Nothing,headers = Nothing, resolution = 300, | |
271 | outputFilename = name, inputUrl = (toString (toStrict msg)), | |
272 | runMode = if (isInfixOf ("Book:"::[Char]) (toString (toStrict msg))) then getRunmode ("BookMode"::[Char]) else getRunmode (toString (toStrict expansion)), paper = (toString (toStrict paperOpt)), vector = (toString (toStrict vectorOpt))=="Keep Vector Form", | |
273 | ImperativeState.copy = Nothing, mainPath = "", server = Nothing,outputType=if (toString (toStrict output)) == "zip" then ImperativeState.ZipArchive else if (toString (toStrict output)) == "epub" then ImperativeState.EPubFile else if (toString (toStrict output)) == "odt" then ImperativeState.OdtFile else ImperativeState.PlainPDF, compile=Nothing, imgctrb=Nothing} | |
274 | yy<-newEmptyMVar | |
275 | mm<-takeMVar m | |
276 | _<-if (not (currentlyrunning mm)) | |
277 | then | |
278 | do _<-forkIO $ do (i,o,e,h) <- runInteractiveCommand ("mediawiki2latex -x "++act) | |
279 | tt<-getPOSIXTime | |
280 | let ss = (if (toString (toStrict output))== "zip" then "zip" else if (toString (toStrict output))== "epub" then "epub" else if (toString (toStrict output))== "odt" then "odt" else "pdf") | |
281 | zz<-forkProgressDriver o m name ss | |
282 | putMVar yy zz | |
283 | ex h i e ((realToFrac tt)+3600.0) m zz ss | |
284 | return () | |
285 | else | |
286 | do putMVar yy (-1) | |
287 | return () | |
288 | putMVar m mm | |
289 | ||
290 | zzzz<-takeMVar yy | |
291 | return zzzz | |
399 | do tmpDir <- getTemporaryDirectory | |
400 | (name, handle) <- openTempFile tmpDir | |
401 | ("MediaWiki2LaTeX" ++ | |
402 | if (toString (toStrict output)) == "zip" then | |
403 | "zip" else | |
404 | if (toString (toStrict output)) == "epub" then | |
405 | "epub" else | |
406 | if (toString (toStrict output)) == "odt" then | |
407 | "odt" else "pdf") | |
408 | hClose handle >> removeFile name | |
409 | act <- mainAction | |
410 | FullConfig{selfTest = Nothing, headers = Nothing, | |
411 | resolution = 300, outputFilename = name, | |
412 | inputUrl = (toString (toStrict msg)), | |
413 | runMode = | |
414 | if | |
415 | (isInfixOf ("Book:" :: [Char]) | |
416 | (toString (toStrict msg))) | |
417 | then getRunmode ("BookMode" :: [Char]) else | |
418 | getRunmode (toString (toStrict expansion)), | |
419 | paper = (toString (toStrict paperOpt)), | |
420 | vector = | |
421 | (toString (toStrict vectorOpt)) == | |
422 | "Keep Vector Form", | |
423 | ImperativeState.copy = Nothing, mainPath = "", | |
424 | server = Nothing, | |
425 | outputType = | |
426 | if (toString (toStrict output)) == "zip" then | |
427 | ImperativeState.ZipArchive else | |
428 | if (toString (toStrict output)) == "epub" then | |
429 | ImperativeState.EPubFile else | |
430 | if (toString (toStrict output)) == "odt" then | |
431 | ImperativeState.OdtFile else | |
432 | ImperativeState.PlainPDF, | |
433 | compile = Nothing, imgctrb = Nothing} | |
434 | yy <- newEmptyMVar | |
435 | mm <- takeMVar m | |
436 | _ <- if (not (currentlyrunning mm)) then | |
437 | do _ <- forkIO $ | |
438 | do (i, o, e, h) <- runInteractiveCommand | |
439 | ("mediawiki2latex -x " ++ act) | |
440 | tt <- getPOSIXTime | |
441 | let ss | |
442 | = (if (toString (toStrict output)) == "zip" | |
443 | then "zip" else | |
444 | if (toString (toStrict output)) == "epub" | |
445 | then "epub" else | |
446 | if (toString (toStrict output)) == "odt" | |
447 | then "odt" else "pdf") | |
448 | zz <- forkProgressDriver o m name ss | |
449 | putMVar yy zz | |
450 | ex h i e ((realToFrac tt) + 3600.0) m zz ss | |
451 | return () | |
452 | else | |
453 | do putMVar yy (-1) | |
454 | return () | |
455 | putMVar m mm | |
456 | zzzz <- takeMVar yy | |
457 | return zzzz | |
292 | 458 | method POST |
293 | ok $ toResponse $ template "Redirect" $ | |
294 | do H.meta H.! A.httpEquiv "refresh" H.! A.content (stringValue ("0;url=/progress/"++(show zzz)++"."++(if (toString (toStrict output))== "zip" then "html" else "html"))) | |
295 | ||
296 | ex :: ProcessHandle -> Handle -> Handle -> Double -> MVar (Map Int ProgressInfo)-> Int -> String -> IO () | |
459 | ok $ | |
460 | toResponse $ | |
461 | template "Redirect" $ | |
462 | do H.meta H.! A.httpEquiv "refresh" H.! | |
463 | A.content | |
464 | (stringValue | |
465 | ("0;url=/progress/" ++ | |
466 | (show zzz) ++ | |
467 | "." ++ | |
468 | (if (toString (toStrict output)) == "zip" then "html" else | |
469 | "html"))) | |
470 | ||
471 | ex :: | |
472 | ProcessHandle -> | |
473 | Handle -> | |
474 | Handle -> | |
475 | Double -> MVar (Map Int ProgressInfo) -> Int -> String -> IO () | |
297 | 476 | ex h i e t m n s |
298 | = do tt<-getPOSIXTime | |
299 | threadDelay 100 | |
300 | if (realToFrac tt) > t | |
301 | then | |
302 | do System.IO.hPutStr i "\n" | |
303 | mm<-takeMVar m | |
304 | putMVar m (Data.Map.Strict.update (\x->Just x{failed=True}) n mm) | |
305 | else return () | |
306 | xi<-hIsOpen i | |
307 | if xi then hFlush i else return () | |
308 | xo<-hIsOpen e | |
309 | _<- if xo then System.IO.hGetContents e else return "" | |
310 | y<-getProcessExitCode h | |
311 | case y of | |
312 | Just ee -> if ExitSuccess == ee | |
313 | then do threadDelay 3000000 | |
314 | mm<-takeMVar m | |
315 | putMVar m (Data.Map.Strict.insertWith (\new old-> old{progress=progress new,lastStepTime=realToFrac tt}) n progressInfoZero{progress=1.0} mm) | |
316 | mmm<-takeMVar m | |
317 | putMVar m mmm | |
318 | case Data.Map.Strict.lookup n mmm of | |
319 | Just yy -> do _ <- takeMVar m | |
320 | putMVar m (Data.Map.Strict.insert n yy{filename=Just s} mmm) | |
321 | _ -> return () | |
322 | else | |
323 | do mm<-takeMVar m | |
324 | putMVar m (Data.Map.Strict.update (\x->Just x{failed=True}) n mm) | |
325 | ||
326 | _-> if (realToFrac tt) > t then return () else ex h i e t m n s | |
477 | = do tt <- getPOSIXTime | |
478 | threadDelay 100 | |
479 | if (realToFrac tt) > t then | |
480 | do System.IO.hPutStr i "\n" | |
481 | mm <- takeMVar m | |
482 | putMVar m | |
483 | (Data.Map.Strict.update (\ x -> Just x{failed = True}) n mm) | |
484 | else return () | |
485 | xi <- hIsOpen i | |
486 | if xi then hFlush i else return () | |
487 | xo <- hIsOpen e | |
488 | _ <- if xo then System.IO.hGetContents e else return "" | |
489 | y <- getProcessExitCode h | |
490 | case y of | |
491 | Just ee -> if ExitSuccess == ee then | |
492 | do threadDelay 3000000 | |
493 | mm <- takeMVar m | |
494 | putMVar m | |
495 | (Data.Map.Strict.insertWith | |
496 | (\ new old -> | |
497 | old{progress = progress new, lastStepTime = realToFrac tt}) | |
498 | n | |
499 | progressInfoZero{progress = 1.0} | |
500 | mm) | |
501 | mmm <- takeMVar m | |
502 | putMVar m mmm | |
503 | case Data.Map.Strict.lookup n mmm of | |
504 | Just yy -> do _ <- takeMVar m | |
505 | putMVar m | |
506 | (Data.Map.Strict.insert n yy{filename = Just s} mmm) | |
507 | _ -> return () | |
508 | else | |
509 | do mm <- takeMVar m | |
510 | putMVar m | |
511 | (Data.Map.Strict.update (\ x -> Just x{failed = True}) n mm) | |
512 | _ -> if (realToFrac tt) > t then return () else ex h i e t m n s | |
513 | ||
327 | 514 | mylist :: [([Char], Double)] |
328 | mylist=[("downloading article and contributor information", 0.02540431143798292),("parsing article text", 0.057625916241286344),("forking threads to download of images and contributor information on them", 0.058045207449988465),("precompiling table columns", 0.08075814224942594),("joining threads to download the images and contributor information on them", 0.37003149457779727),("preparing for PDF generation", 0.5479855803098518),("preparing images for LaTeX document", 0.637605216120732),("generating PDF file. LaTeX run 1 of 4", 0.6911489294291799),("generating PDF file. LaTeX run 2 of 4", 0.7673758195622185),("generating PDF file. LaTeX run 3 of 4", 0.8463397892914045),("generating PDF file. LaTeX run 4 of 4", 0.9231746180088297),("finished", 1.0)] | |
329 | ||
515 | mylist | |
516 | = [("downloading article and contributor information", | |
517 | 2.540431143798292e-2), | |
518 | ("parsing article text", 5.7625916241286344e-2), | |
519 | ("forking threads to download of images and contributor information on them", | |
520 | 5.8045207449988465e-2), | |
521 | ("precompiling table columns", 8.075814224942594e-2), | |
522 | ("joining threads to download the images and contributor information on them", | |
523 | 0.37003149457779727), | |
524 | ("preparing for PDF generation", 0.5479855803098518), | |
525 | ("preparing images for LaTeX document", 0.637605216120732), | |
526 | ("generating PDF file. LaTeX run 1 of 4", 0.6911489294291799), | |
527 | ("generating PDF file. LaTeX run 2 of 4", 0.7673758195622185), | |
528 | ("generating PDF file. LaTeX run 3 of 4", 0.8463397892914045), | |
529 | ("generating PDF file. LaTeX run 4 of 4", 0.9231746180088297), | |
530 | ("finished", 1.0)] | |
330 | 531 | |
331 | 532 | wwait :: Handle -> IO () |
332 | wwait h = do b<-hIsEOF h | |
333 | if b | |
334 | then | |
335 | do threadDelay 1000 | |
336 | wwait h | |
337 | else return () | |
338 | ||
339 | progressDriver::Int->Handle->MVar (Map Int ProgressInfo)->String->IO () | |
340 | progressDriver n o t s = do xo<-hIsOpen o | |
341 | threadDelay 100 | |
342 | tt <- getPOSIXTime | |
343 | l<- if xo then do wwait o | |
344 | hGetLine o | |
345 | else return "" | |
346 | case msum (Data.List.map (\(k,v)-> if isInfixOf k l then Just v else Nothing) mylist) of | |
347 | Just x -> do m<-takeMVar t | |
348 | putMVar t (Data.Map.Strict.insertWith (\new old-> old{progress=progress new,lastStepTime=realToFrac tt}) n progressInfoZero{progress=x} m) | |
349 | _ -> return () | |
350 | m<-readMVar t | |
351 | case Data.Map.Strict.lookup n m of | |
352 | Just yy | (progress yy)==1.0 -> do _<-takeMVar t | |
353 | putMVar t (Data.Map.Strict.insert n yy{filename=Just s} m) | |
354 | Just yy | (failed yy)->return () | |
355 | _ -> progressDriver n o t s | |
356 | progressInfoZero::ProgressInfo | |
357 | progressInfoZero = ProgressInfo {progress=0.0,filename=Nothing,startTime=0.0,failed=False,barValue=0.0,lastStepTime=0.0,lastRuntime=0.0,extension=""} | |
358 | ||
359 | forkProgressDriver::Handle->MVar (Map Int ProgressInfo)->String->String->IO Int | |
533 | wwait h | |
534 | = do b <- hIsEOF h | |
535 | if b then | |
536 | do threadDelay 1000 | |
537 | wwait h | |
538 | else return () | |
539 | ||
540 | progressDriver :: | |
541 | Int -> Handle -> MVar (Map Int ProgressInfo) -> String -> IO () | |
542 | progressDriver n o t s | |
543 | = do xo <- hIsOpen o | |
544 | threadDelay 100 | |
545 | tt <- getPOSIXTime | |
546 | l <- if xo then | |
547 | do wwait o | |
548 | hGetLine o | |
549 | else return "" | |
550 | case | |
551 | msum | |
552 | (Data.List.map | |
553 | (\ (k, v) -> if isInfixOf k l then Just v else Nothing) | |
554 | mylist) | |
555 | of | |
556 | Just x -> do m <- takeMVar t | |
557 | putMVar t | |
558 | (Data.Map.Strict.insertWith | |
559 | (\ new old -> | |
560 | old{progress = progress new, lastStepTime = realToFrac tt}) | |
561 | n | |
562 | progressInfoZero{progress = x} | |
563 | m) | |
564 | _ -> return () | |
565 | m <- readMVar t | |
566 | case Data.Map.Strict.lookup n m of | |
567 | Just yy | (progress yy) == 1.0 -> | |
568 | do _ <- takeMVar t | |
569 | putMVar t (Data.Map.Strict.insert n yy{filename = Just s} m) | |
570 | Just yy | (failed yy) -> return () | |
571 | _ -> progressDriver n o t s | |
572 | ||
573 | progressInfoZero :: ProgressInfo | |
574 | progressInfoZero | |
575 | = ProgressInfo{progress = 0.0, filename = Nothing, startTime = 0.0, | |
576 | failed = False, barValue = 0.0, lastStepTime = 0.0, | |
577 | lastRuntime = 0.0, extension = ""} | |
578 | ||
579 | forkProgressDriver :: | |
580 | Handle -> MVar (Map Int ProgressInfo) -> String -> String -> IO Int | |
360 | 581 | forkProgressDriver o t s ext |
361 | = do m<-takeMVar t | |
362 | tt <- getPOSIXTime | |
363 | let mm = case (keys m) of | |
364 | (x:xs)-> Data.List.maximum (x:xs) | |
365 | _ -> 0 | |
366 | putMVar t (Data.Map.Strict.insert (mm+1) ProgressInfo {progress=0.0,filename=Nothing,startTime=realToFrac tt,barValue=0.0,lastStepTime=0.0,lastRuntime=0.0,extension=ext,failed=False} m) | |
367 | _<-forkIO (progressDriver (mm+1) o t s) | |
368 | return (mm+1) | |
369 | ||
582 | = do m <- takeMVar t | |
583 | tt <- getPOSIXTime | |
584 | let mm | |
585 | = case (keys m) of | |
586 | (x : xs) -> Data.List.maximum (x : xs) | |
587 | _ -> 0 | |
588 | putMVar t | |
589 | (Data.Map.Strict.insert (mm + 1) | |
590 | ProgressInfo{progress = 0.0, filename = Nothing, | |
591 | startTime = realToFrac tt, barValue = 0.0, lastStepTime = 0.0, | |
592 | lastRuntime = 0.0, extension = ext, failed = False} | |
593 | m) | |
594 | _ <- forkIO (progressDriver (mm + 1) o t s) | |
595 | return (mm + 1) |
17 | 17 | import Licenses |
18 | 18 | import Tools |
19 | 19 | import ImperativeState |
20 | ||
20 | ||
21 | 21 | makeUrl2 :: String -> String -> [Char] |
22 | 22 | makeUrl2 theLemma theHost |
23 | 23 | = (unify . exportURL) |
24 | 24 | (URL{url_path = "w/index.php", |
25 | 25 | url_params = |
26 | [("title", (replace2 theLemma "%" "%25")), ("offset", ""), ("limit", "500000"), | |
27 | ("action", "history")], | |
26 | [("title", (replace2 theLemma "%" "%25")), ("offset", ""), | |
27 | ("limit", "500000"), ("action", "history")], | |
28 | 28 | url_type = |
29 | 29 | Absolute |
30 | 30 | (Host{protocol = HTTP True, host = theHost, port = Nothing})}) |
31 | ||
31 | ||
32 | 32 | makeUrl4 :: String -> [Char] |
33 | 33 | makeUrl4 uuu |
34 | 34 | = fromMaybe uuu |
38 | 38 | (unify . exportURL) |
39 | 39 | (URL{url_path = (url_path uu), |
40 | 40 | url_params = |
41 | [("title", (replace2 ti "%" "%25")), ("offset", ""), ("limit", "500000"), | |
42 | ("action", "history")], | |
41 | [("title", (replace2 ti "%" "%25")), ("offset", ""), | |
42 | ("limit", "500000"), ("action", "history")], | |
43 | 43 | url_type = url_type uu})) |
44 | ||
44 | ||
45 | 45 | makeUrl3 :: String -> String -> [Char] |
46 | 46 | makeUrl3 theLemma theHost |
47 | 47 | = (unify . exportURL) |
49 | 49 | url_type = |
50 | 50 | Absolute |
51 | 51 | (Host{protocol = HTTP True, host = theHost, port = Nothing})}) |
52 | ||
52 | ||
53 | 53 | deepGet2 :: [Char] -> [Anything a] -> [Anything a] |
54 | 54 | deepGet2 tag ll = concat $ map go ll |
55 | 55 | where go (Environment Tag (TagAttr t m) l) |
57 | 57 | [Environment Tag (TagAttr tag m) l] ++ (deepGet2 tag l) |
58 | 58 | go (Environment _ _ l) = (deepGet2 tag l) |
59 | 59 | go _ = [] |
60 | ||
60 | ||
61 | 61 | getLicense :: [Anything Char] -> Maybe [Char] |
62 | 62 | getLicense l = (go l) |
63 | where | |
64 | go :: [Anything Char] -> Maybe String | |
63 | where go :: [Anything Char] -> Maybe String | |
65 | 64 | go ll = msum (map (dg ll) licenses) |
66 | 65 | dg ll (x, c) |
67 | 66 | = case deepGet "a" "href" x ll of |
68 | 67 | (_ : _) -> Just c |
69 | 68 | _ -> Nothing |
70 | ||
69 | ||
71 | 70 | getAuthor :: [Anything Char] -> Maybe [Anything Char] |
72 | 71 | getAuthor x = listToMaybe (concat (map go (deepGet2 "tr" x))) |
73 | 72 | where go (Environment _ _ l) |
78 | 77 | _ -> [] |
79 | 78 | _ -> [] |
80 | 79 | go _ = [] |
81 | ||
80 | ||
82 | 81 | simpleContributors :: |
83 | 82 | [Char] -> |
84 | [Char] -> Maybe URL -> ImperativeState-> IO [(String, String, Int, Maybe String)] | |
83 | [Char] -> | |
84 | Maybe URL -> | |
85 | ImperativeState -> IO [(String, String, Int, Maybe String)] | |
85 | 86 | simpleContributors theLemma theHost uu st |
86 | 87 | = do let theUrl3 |
87 | 88 | = case uu of |
115 | 116 | let y = decodeString yy |
116 | 117 | let x = decodeString xx |
117 | 118 | let dd |
118 | = ((deepGet "a" "class" "new mw-userlink" (parseit minparsers x))++(deepGet "a" "class" "mw-userlink" (parseit minparsers x))) :: | |
119 | [Anything Char] | |
119 | = ((deepGet "a" "class" "new mw-userlink" (parseit minparsers x)) | |
120 | ++ (deepGet "a" "class" "mw-userlink" (parseit minparsers x))) | |
121 | :: [Anything Char] | |
120 | 122 | let ll = (filter pre (map go dd)) |
121 | 123 | let n = (nub ll) :: [(String, String)] |
122 | 124 | let out = map go2 (zip (map (count ll) n) n) |
123 | 125 | let ht = (parseit htmlminparsers y) |
124 | 126 | case (getAuthor ht) of |
125 | 127 | Just zz -> return |
126 | [(fst (treeToLaTeX3 zz initialState{urld = analyseNetloc (hostname . fullUrl $ st)}), "", 1 :: Int, getLicense ht)] | |
128 | [(fst | |
129 | (treeToLaTeX3 zz | |
130 | initialState{urld = analyseNetloc (hostname . fullUrl $ st)}), | |
131 | "", 1 :: Int, getLicense ht)] | |
127 | 132 | _ -> return out |
128 | ||
129 | where | |
130 | go :: Anything Char -> (String, String) | |
133 | where go :: Anything Char -> (String, String) | |
131 | 134 | go (Environment Tag (TagAttr _ m) l) |
132 | 135 | = ((shallowFlatten (deepFlatten l)), findWithDefault "" "href" m) |
133 | 136 | go _ = ("", "") |
134 | 137 | go2 (c, (a, h)) = (a, h, c, Nothing) |
135 | ||
138 | ||
136 | 139 | count :: (Eq a) => [a] -> a -> Int |
137 | 140 | count l s = length (filter (== s) l) |
138 | ||
141 | ||
139 | 142 | pre :: (String, String) -> Bool |
140 | 143 | pre s |
141 | 144 | = case (runParser ipaddr () "" (fst s)) of |
142 | 145 | Right _ -> False |
143 | 146 | Left _ -> True |
144 | ||
147 | ||
145 | 148 | intdigit :: Parser Int |
146 | 149 | intdigit |
147 | 150 | = do a <- digit |
148 | 151 | case reads [a] of |
149 | 152 | [(i, [])] -> return i |
150 | 153 | _ -> pzero |
151 | ||
154 | ||
152 | 155 | ipnum3 :: ParsecT String () Identity Int |
153 | 156 | ipnum3 |
154 | 157 | = do a <- intdigit |
155 | 158 | b <- intdigit |
156 | 159 | c <- intdigit |
157 | 160 | return (a * 100 + b * 10 + c) |
158 | ||
161 | ||
159 | 162 | ipnum2 :: ParsecT String () Identity Int |
160 | 163 | ipnum2 |
161 | 164 | = do a <- intdigit |
162 | 165 | b <- intdigit |
163 | 166 | return (a * 10 + b) |
164 | ||
167 | ||
165 | 168 | ipnum1 :: Parser Int |
166 | 169 | ipnum1 = do intdigit |
167 | ||
170 | ||
168 | 171 | ipnum :: ParsecT [Char] () Identity () |
169 | 172 | ipnum |
170 | 173 | = do n <- (try (ipnum3)) <|> (try (ipnum2)) <|> ipnum1 |
171 | 174 | if ((n <= 255) && (n >= 0)) then return () else pzero |
172 | ||
175 | ||
173 | 176 | ipaddr :: |
174 | 177 | Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity |
175 | 178 | () |
176 | 179 | ipaddr = try (ipv4addr) <|> ipv6addr |
177 | ||
180 | ||
178 | 181 | ipv4addr :: |
179 | 182 | Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity |
180 | 183 | () |
187 | 190 | _ <- char '.' |
188 | 191 | _ <- ipnum |
189 | 192 | return () |
190 | ||
193 | ||
191 | 194 | ipv6num :: |
192 | 195 | Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity |
193 | 196 | () |
198 | 201 | _ <- try (hexDigit) <|> return '0' |
199 | 202 | _ <- try (hexDigit) <|> return '0' |
200 | 203 | return () |
201 | ||
204 | ||
202 | 205 | ipv6addr :: |
203 | 206 | Text.Parsec.Prim.ParsecT [Char] () Data.Functor.Identity.Identity |
204 | 207 | () |
79 | 79 | extract pathname |
80 | 80 | = do _ <- createDirectories pathname |
81 | 81 | writeFiles (pathname ++ "/document/headers/") headerFiles |
82 |
8 | 8 | import Data.Maybe |
9 | 9 | import Control.Monad |
10 | 10 | import MyState |
11 | ||
11 | ||
12 | 12 | {-DHUN| the width of a columns as float wrapped in a Just value of the maybe monad if it could be determined. Return the value Nothing of the maybe monad otherwise. The only parameter is of part of the parse tree that describe the opening part of the table cell element. DHUN-} |
13 | ||
13 | ||
14 | 14 | widthOfColumn :: [Anything Char] -> Maybe Float |
15 | 15 | widthOfColumn = msum . (map f) |
16 | 16 | where f (Environment Attribute (Attr (k, v)) _) |
20 | 20 | guard ('%' `elem` v) |
21 | 21 | return (1.0e-2 * z) |
22 | 22 | f _ = Nothing |
23 | ||
23 | ||
24 | 24 | columnMultiplicityForSimple :: [Anything Char] -> Int |
25 | 25 | columnMultiplicityForSimple x |
26 | 26 | = case columnMultiplicity x of |
27 | 27 | Just a -> a |
28 | 28 | _ -> -1 |
29 | ||
29 | ||
30 | 30 | raggedArrayOfWidths :: |
31 | 31 | [Anything Char] -> [Maybe Float] -> [[Maybe Float]] |
32 | 32 | raggedArrayOfWidths ((Environment TableRowSep _ _) : xs) temp |
46 | 46 | (replicate ((columnMultiplicityForSimple x) - 1) [Nothing])))) |
47 | 47 | raggedArrayOfWidths (_ : xs) temp = (raggedArrayOfWidths xs temp) |
48 | 48 | raggedArrayOfWidths [] temp = [temp] |
49 | ||
49 | ||
50 | 50 | numberOfColumns :: [Anything Char] -> Int |
51 | 51 | numberOfColumns a |
52 | 52 | = (maximum ([length x | x <- (raggedArrayOfWidths a [])])) |
53 | ||
53 | ||
54 | 54 | initialListofWidths :: [Anything Char] -> [Maybe Float] |
55 | 55 | initialListofWidths x = replicate (numberOfColumns x) Nothing |
56 | ||
56 | ||
57 | 57 | listMax :: [Maybe Float] -> [Maybe Float] -> [Maybe Float] |
58 | 58 | listMax (Just x : xs) (Just y : ys) |
59 | 59 | = Just (max x y) : listMax xs ys |
61 | 61 | listMax [] (y : ys) = y : listMax [] ys |
62 | 62 | listMax (x : xs) [] = x : listMax xs [] |
63 | 63 | listMax [] [] = [] |
64 | ||
64 | ||
65 | 65 | preliminaryWidths :: |
66 | 66 | [[Maybe Float]] -> [Maybe Float] -> [Maybe Float] |
67 | 67 | preliminaryWidths l k = foldl (listMax) k l |
68 | ||
68 | ||
69 | 69 | standardColumnWitdh :: [Anything Char] -> Maybe Float |
70 | 70 | standardColumnWitdh a |
71 | 71 | = if columns > columnsWithDefinedWidth then |
78 | 78 | columns = numberOfColumns a |
79 | 79 | columnsWithDefinedWidth = length (filter isJust l) |
80 | 80 | sumOfDefinedWidths = sum (map fromJust (filter isJust l)) |
81 | ||
81 | ||
82 | 82 | rawWidths :: [Anything Char] -> [Maybe Float] |
83 | 83 | rawWidths a |
84 | 84 | = (preliminaryWidths (raggedArrayOfWidths a []) |
85 | 85 | (initialListofWidths a)) |
86 | ||
86 | ||
87 | 87 | {-DHUN fallback function for the width of columns if the precompilation procedure for the width of columns is not available yet. So particularly when the precompilation is just running. Takes the parse tree representation of the table as first input parameter. Returns the list of widths of the columns. I am not documenting its subfunctions since the final width of the columns don't have anything to do with the width calculated here, still these preliminary widths are needed for the precompilation procedure DHUN-} |
88 | ||
88 | ||
89 | 89 | columnWidths :: [Anything Char] -> [Float] |
90 | 90 | columnWidths a = w |
91 | 91 | where l = rawWidths a |
95 | 95 | w = fromMaybe (concat (replicate m [f / mf])) $ |
96 | 96 | do ww <- standardColumnWitdh a |
97 | 97 | return [x * f | x <- map (fromMaybe ww) l] |
98 | ||
98 | ||
99 | 99 | {-DHUN| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor DHUN-} |
100 | ||
100 | ||
101 | 101 | scalefactor :: (Fractional a, Ord a) => a -> a |
102 | 102 | scalefactor n | n <= 10 = 12.8 * (n) / 448.0 |
103 | 103 | scalefactor _ = 12.8 * (11.0) / 448.0 |
104 | ||
104 | ||
105 | 105 | {-DHUN| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor DHUN-} |
106 | ||
106 | ||
107 | 107 | tableScale :: Int -> Float |
108 | 108 | tableScale nColumns = (1.0 / n) * (1.0 - (scalefactor n)) |
109 | 109 | where n = fromIntegral nColumns |
110 | ||
110 | ||
111 | 111 | {-DHUN| returns the latex environment name for a table environments. It the float passes as first parameter it 1.0 the result is longtable, otherwise it is tabular. This function is usually called with the width of the current cell in units of the line width as first parameter. Outside any table this value is 1.0 inside a table it is always smaller than 1.0. So this function will return tabular in case of a nested table, which is necessary since longtables can not be nested inside other longtables, but tabulars can be nested within longtables as well as tabulars. DHUN-} |
112 | ||
112 | ||
113 | 113 | tableEnvironment :: Float -> String |
114 | 114 | tableEnvironment 1.0 = "longtable" |
115 | 115 | tableEnvironment _ = "tabular" |
116 | ||
116 | ||
117 | 117 | innerTableSpecifier :: [Float] -> String -> String |
118 | 118 | innerTableSpecifier (f : xs) t |
119 | 119 | = ">{\\RaggedRight}p{" ++ |
120 | 120 | (printf "%0.5f" f) ++ |
121 | 121 | "\\linewidth}" ++ t ++ (innerTableSpecifier xs t) |
122 | 122 | innerTableSpecifier [] _ = [] |
123 | ||
123 | ||
124 | 124 | {-DHUN| Returns the table header which represents the width of the columns of a table in units of the line width. It takes a list of width as second parameter. It is understood that necessary correction for the width to compensate for the space needed by separations of columns have already been applied. The is the first boolean parameter is true rules will be drawn in the table, otherwise they won't. See also documentation of the wdth3 function in the module LatexRenderer. DHUN-} |
125 | ||
125 | ||
126 | 126 | tableSpecifier :: Bool -> [Float] -> String |
127 | 127 | tableSpecifier True f = '|' : (innerTableSpecifier f "|") |
128 | 128 | tableSpecifier False f = (innerTableSpecifier f "") |
129 | ||
129 | ||
130 | 130 | {-DHUN| Takes the multirowmap as first input parameter. See documentation on the function multiRowDictChangeStart in this module for details on the multirowmap. It returns true if there are currently no multirow cells active in the given multirowdict DHUN-} |
131 | ||
131 | ||
132 | 132 | myempty :: Map Int (Int, Int) -> Bool |
133 | 133 | myempty d = [x | x <- Map.toList d, (fst (snd x)) /= 0] == [] |
134 | ||
134 | ||
135 | 135 | {-DHUN| takes the string found in the header symbol of a table or the opening tag of the html table tag. That is the place where the attributes are, but only understood as string so without parsing the attributes as map, and returns a boolean. It this boolean is true the rules of the table need to be drawn, otherwise they must not be drawn DHUN-} |
136 | ||
136 | ||
137 | 137 | seperatingLinesRequested :: String -> Bool |
138 | 138 | seperatingLinesRequested s |
139 | 139 | = (isInfixOf2 "Prettytable" (map toLower s)) || |
140 | 140 | (isInfixOf2 "prettytable" (map toLower s)) || |
141 | 141 | (isInfixOf2 "wikitable" (map toLower s)) |
142 | ||
142 | ||
143 | 143 | {-DHUN| returns the latex symbol for a horizontal line on the last row of a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table DHUN-} |
144 | ||
144 | ||
145 | 145 | rowDelimiter :: Bool -> String |
146 | 146 | rowDelimiter True = "\\\\ \\hline" |
147 | 147 | rowDelimiter False = "" |
148 | ||
148 | ||
149 | 149 | {-DHUN| returns the latex symbol for a horizontal line in a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table DHUN-} |
150 | ||
150 | ||
151 | 151 | horizontalLine :: Bool -> String |
152 | 152 | horizontalLine True = " \\hline" |
153 | 153 | horizontalLine False = "" |
154 | ||
154 | ||
155 | 155 | {-DHUN| return the latex symbol for a partly drawn inner horizontal line in a table, that is a horizontal rule. It has to be drawn only partly since multirow cells intersect with it. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table. The first parameter is a and index that is incremented during the course of this function and has to be 1 when called this function from outside DHUN-} |
156 | ||
156 | ||
157 | 157 | makeCLines :: Int -> Map Int (Int, Int) -> Int -> [Char] |
158 | 158 | makeCLines m d t |
159 | 159 | = if m <= t then |
165 | 165 | where def |
166 | 166 | = "\\cline{" ++ |
167 | 167 | (show m) ++ "-" ++ (show m) ++ "}" ++ (makeCLines (m + 1) d t) |
168 | ||
168 | ||
169 | 169 | {-DHUN| return the latex symbol for an inner horizontal line in a table, that is a horizontal rule. If the first boolean parameter is true the rule is drawn otherwise it is not. If multirow cells interfere with this rule the rule is only drawn in parts as required. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table DHUN-} |
170 | ||
170 | ||
171 | 171 | innerHorizontalLine :: Bool -> Map Int (Int, Int) -> Int -> String |
172 | 172 | innerHorizontalLine b d m |
173 | 173 | = if b then |
174 | 174 | if myempty d then horizontalLine True else ' ' : makeCLines 1 d m |
175 | 175 | else "" |
176 | ||
176 | ||
177 | 177 | {-DHUN| the symbol in latex for separating columns. It is returned if the fist boolean parameter is true otherwise the empty string is returned. This function is usually called with the first parameter being true if the current column was not the first column of a row since the symbol is not needed before the start of the first column of a row. This is a contrast to html where the first cell of a row has its own td or th tag. The mediawiki markup notation is similar to html in this respect. DHUN-} |
178 | ||
178 | ||
179 | 179 | columnSeperator :: Bool -> String |
180 | 180 | columnSeperator True = "&" |
181 | 181 | columnSeperator False = "" |
182 | ||
182 | ||
183 | 183 | {-DHUN| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. If the key is present in the map, and the value found in the map at that key can be parsed as an Integer that integer is returned. If no value for the key could be found in the map or it could not be parsed as an integer the value Nothing of the Maybe monad is returned. DHUN-} |
184 | ||
184 | ||
185 | 185 | genMultiplicity :: String -> [Anything Char] -> Maybe Int |
186 | 186 | genMultiplicity s = msum . (map f) |
187 | 187 | where f (Environment Attribute (Attr (k, v)) _) |
190 | 190 | guard (k == s) |
191 | 191 | return z |
192 | 192 | f _ = Nothing |
193 | ||
193 | ||
194 | 194 | {-DHUN| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. It returns a result of the lookup of the key in the map (so the value). as string wrapped into the Maybe monad. If no value for the key could be found in the map it returns the value Nothing of the Maybe monad. DHUN-} |
195 | ||
195 | ||
196 | 196 | genLookup :: String -> [Anything Char] -> Maybe String |
197 | 197 | genLookup s = msum . (map f) |
198 | 198 | where f (Environment Attribute (Attr (k, v)) _) |
201 | 201 | guard (k == s) |
202 | 202 | return v |
203 | 203 | f _ = Nothing |
204 | ||
204 | ||
205 | 205 | {-DHUN| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute colspan is not present (or did not have a value parseable as Integer) within the first parameter DHUN-} |
206 | ||
206 | ||
207 | 207 | columnMultiplicity :: [Anything Char] -> Maybe Int |
208 | 208 | columnMultiplicity = genMultiplicity "colspan" |
209 | ||
209 | ||
210 | 210 | {-DHUN| the row multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute rowspan is not present (or did not have a value parseable as Integer) within the first parameter DHUN-} |
211 | ||
211 | ||
212 | 212 | rowMultiplicity :: [Anything Char] -> Maybe Int |
213 | 213 | rowMultiplicity = genMultiplicity "rowspan" |
214 | ||
214 | ||
215 | 215 | {-DHUN| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements.The result is an integer that default to zero DHUN-} |
216 | ||
216 | ||
217 | 217 | columnMultiplicityForCounting :: [Anything Char] -> Int |
218 | 218 | columnMultiplicityForCounting = (fromMaybe 1) . columnMultiplicity |
219 | ||
219 | ||
220 | 220 | {-DHUN| return the symbol for the start of a multicolumn cell in latex. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. It takes the list of the final widths of all columns of the table as second parameter. It takes to the column index of the current column as third parameter. The fourth parameter is a boolean if it is true rules will be drawn in the table otherwise they won't. The fifth parameter is the table state. That is the mutable state that exists during rendering of a table. DHUN-} |
221 | ||
221 | ||
222 | 222 | multiColumnStartSymbol :: |
223 | 223 | [Anything Char] -> [Float] -> Int -> Bool -> TableState -> String |
224 | 224 | multiColumnStartSymbol l f i t st |
231 | 231 | _ -> "l" |
232 | 232 | mylist nn |
233 | 233 | = [min 1.0 |
234 | (((1.0 - (scalefactor 1)) * (sum (take nn (drop (i - 1) f)))) | |
235 | / (1.0 - (scalefactor (fromIntegral nn))))] | |
236 | ||
234 | (((1.0 - (scalefactor 1)) * (sum (take nn (drop (i - 1) f)))) / | |
235 | (1.0 - (scalefactor (fromIntegral nn))))] | |
236 | ||
237 | 237 | {-DHUN| return the symbol for the end of a multicolumn cell in latex. The first boolean parameter tells if the cell is actually a multicolumn cell. If it is false the empty string is returned instead DHUN-} |
238 | ||
238 | ||
239 | 239 | multiColumnEndSymbol :: Bool -> String |
240 | 240 | multiColumnEndSymbol True = "}" |
241 | 241 | multiColumnEndSymbol False = "" |
242 | ||
242 | ||
243 | 243 | {-DHUN| return the symbol for the end of a multirow cell in latex. The first boolean parameter tells if the cell is actually a multirow cell. If it is false the empty string is returned instead DHUN-} |
244 | ||
244 | ||
245 | 245 | multiRowEndSymbol :: Bool -> String |
246 | 246 | multiRowEndSymbol True = "}" |
247 | 247 | multiRowEndSymbol False = "" |
248 | ||
248 | ||
249 | 249 | {-DHUN| This function takes a default value as first parameter. It takes a predicate as second parameter. It take a map from Int to a two tuple of Int as firth parameter. It takes a key for that map as fourth parameter. It takes a function mapping a two tuple of Int to the same type as the default value as third parameter. It tries to find a two tuple of Ints (that is value) in the map under the given key. If it finds one and the predicate returns true on the first element of that two tuple it returns the result of the function on the two tuple. In any other case it returns the default value DHUN-} |
250 | ||
250 | ||
251 | 251 | withDefault :: |
252 | 252 | t -> |
253 | 253 | (Int -> Bool) -> |
257 | 257 | do (a, b) <- Map.lookup i d |
258 | 258 | guard $ p a |
259 | 259 | return $ f a b |
260 | ||
260 | ||
261 | 261 | {-DHUN| this function return the vertical separator for column for the table header in latex. The only parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-} |
262 | ||
262 | ||
263 | 263 | verticalSeperator :: Bool -> [Char] |
264 | 264 | verticalSeperator True = "|" |
265 | 265 | verticalSeperator False = "" |
266 | ||
266 | ||
267 | 267 | {-DHUN| the function returns the a string to be inserted into a latex document for multirows at a when a new column (that is a new cell) starts, thats when a column separator, or header column separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-} |
268 | ||
268 | ||
269 | 269 | multiRowSymbol :: Int -> Map Int (Int, Int) -> Bool -> String |
270 | 270 | multiRowSymbol i d t |
271 | 271 | = withDefault "" (> 0) |
278 | 278 | (verticalSeperator t) ++ "}{}&" ++ (multiRowSymbol (i + b) d t)) |
279 | 279 | i |
280 | 280 | d |
281 | ||
281 | ||
282 | 282 | {-DHUN| the function returns the a string to be inserted into a latex document for multirows at a when a new row starts, thats when a row separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-} |
283 | ||
283 | ||
284 | 284 | multiRowSymbolForRowSep :: |
285 | 285 | Int -> Map Int (Int, Int) -> Bool -> String |
286 | 286 | multiRowSymbolForRowSep i d t |
295 | 295 | "}{}" ++ (multiRowSymbolForRowSep (i + b) d) t) |
296 | 296 | i |
297 | 297 | d |
298 | ||
298 | ||
299 | 299 | {-DHUN| the function returns the a string to be inserted into a latex document for multirows at the end of the table. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't DHUN-} |
300 | ||
300 | ||
301 | 301 | multiRowSymbolForTableEnd :: |
302 | 302 | Int -> Map Int (Int, Int) -> Bool -> String |
303 | 303 | multiRowSymbolForTableEnd i d t |
312 | 312 | "}{}" ++ (multiRowSymbolForTableEnd (i + 1) d t)) |
313 | 313 | i |
314 | 314 | d |
315 | ||
315 | ||
316 | 316 | {-DHUN| in case of a multirow, that cell has to be skipped further down. So if I got a multirow in row 1 column 2 with a rowspan of 2 or more I need to expand row 2 column 1 by 1 . So if I passed row 2 column one I am not in row 2 column 2 since that is where the multirow cell resides, I am rather in row 2 cell 3. And if there are more multicolumns involved I am more possible even further right. So this function just tell me how many cells I have to skip. The first parameter is the index of the current column. The second parameter is the multirowdict. See also documentation on the function multiRowDictChangeStart in this module DHUN-} |
317 | ||
317 | ||
318 | 318 | multiRowCount :: Int -> Map Int (Int, Int) -> Int |
319 | 319 | multiRowCount i d |
320 | 320 | = withDefault 0 (/= 0) (\ _ b -> b + (multiRowCount (i + 1) d)) i d |
321 | ||
321 | ||
322 | 322 | {-DHUN| see documentation on multiRowDictChangeStart. This function take the index of the current column as first parameter. This function takes the multiRowDict as first parameter and returns the modified version of it. DHUN-} |
323 | ||
323 | ||
324 | 324 | multiRowDictChangeEnd :: |
325 | 325 | Int -> Map Int (Int, Int) -> Map Int (Int, Int) |
326 | 326 | multiRowDictChangeEnd i d |
329 | 329 | multiRowDictChangeEnd (i + 1) (Map.insert i (a - 1, b) d)) |
330 | 330 | i |
331 | 331 | d |
332 | ||
332 | ||
333 | 333 | {-DHUN| The multiRowDict is a facility for keeping track of cells spanning multiple rows. It is stored as mutable state in the type TableState in the parameter multiRowMap. It is passed to this function as second parameter. This function return an updated version of it. It is a map mapping and Int to a tuple whose both elements are also ints. It the key is the column index and the value is a pair (rowmultiplicity, columnmultiplicity). The rowmultiplicity is the number of rows the cell spans. This number is decrease every time a row ends. So it actually says how many rows the columns spans further down from the current column. The column multiplicity is the number of columns the cell spans. This function take the index of the current column as first parameter. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function calculates only the changes in the multirowdict for the opening environment of the cell. You should not use this function but rather use multiRowDictChange since this also considers the effect by ending of cells DHUN-} |
334 | ||
334 | ||
335 | 335 | multiRowDictChangeStart :: |
336 | 336 | Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int) |
337 | 337 | multiRowDictChangeStart i d l |
339 | 339 | do n <- rowMultiplicity l |
340 | 340 | return (Map.insert i ((n - 1), c) d) |
341 | 341 | where c = (columnMultiplicityForCounting l) |
342 | ||
342 | ||
343 | 343 | {-DHUN| calculate the full change to the multirowdict. See documentation on the function multiRowDictChangeStart in this module for more information on the multirowdict. The first parameter is the index of the current column. The second parameter is the current multirowdict. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function returns the updated multirowdict. DHUN-} |
344 | ||
344 | ||
345 | 345 | multiRowDictChange :: |
346 | 346 | Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int) |
347 | 347 | multiRowDictChange i d l |
348 | 348 | = multiRowDictChangeStart n (multiRowDictChangeEnd i d) l |
349 | 349 | where n = i + (multiRowCount i d) |
350 | ||
350 | ||
351 | 351 | {-DHUN| returns the latex symbol for the start of a multirow cell. That is a cell spanning multiple rows. The second parameter is activeColumn. This is an integer wrapped in the maybe monad. If it is has the value Just then the row is to be renderer in a special mode. This mode is needed to determine the width of the columns. In this special mode no line break occur and width of the paper is infinite, so that the width of the of each column is its respective natural width. So there is no limit on the width of the column. If the value is Nothing this means that the table is typeset in normal mode and the width is to be limited. The first parameter is the inner parse result of the row separator containing information on whether or not the cell is multirow DHUN-} |
352 | ||
352 | ||
353 | 353 | multiRowStartSymbol :: [Anything Char] -> Maybe Int -> String |
354 | 354 | multiRowStartSymbol l m |
355 | 355 | = fromMaybe "" $ |
358 | 358 | "\\multirow{" ++ |
359 | 359 | (show n) ++ |
360 | 360 | "}{" ++ (if isJust m then "*" else "\\linewidth") ++ "}{" |
361 | ||
361 | ||
362 | 362 | {-DHUN| a symbol to be added at the end of header cell in order to make its content bold. The only boolean parameter is use to indicate whether the cell currently under consideration is a header cell. Otherwise the empty string is returned. DHUN-} |
363 | ||
363 | ||
364 | 364 | headendsym :: Bool -> String |
365 | 365 | headendsym False = "" |
366 | 366 | headendsym True = "}" |
367 | ||
367 | ||
368 | 368 | {-DHUN| a symbol to be added at the start of header cell in order to make its content bold DHUN-} |
369 | ||
369 | ||
370 | 370 | headstartsym :: String |
371 | 371 | headstartsym = "{\\bfseries " |
372 | ||
372 | ||
373 | 373 | {-DHUN| the symbol to be inserted into the latex document, for the end of a row in a table. The first boolean parameter is should be true if the current table is not nested in an other one. The second boolean parameter should be true if the column was the last column of the header of the table. The header of the table is repeated by latex each time page wrapping inside the table occurs. See also documentation of the longtable latex package DHUN-} |
374 | ||
374 | ||
375 | 375 | rowendsymb :: Bool -> Bool -> String |
376 | 376 | rowendsymb True True = "\\endhead " |
377 | 377 | rowendsymb _ _ = "\\\\" |
7 | 7 | import System.IO.Strict |
8 | 8 | import Data.Time.Clock.POSIX |
9 | 9 | |
10 | pad :: Int -> String -> String | |
11 | pad n s = if length s < n then pad n (' ':s) else s | |
10 | {-DHUN| Pads a string with spaces. Adds space charactes to the beginning of a string until it has got a desired length. The first paramter is the desired length. The second paramter is the original string. The return value is the padded string. DHUN-} | |
12 | 11 | |
13 | 12 | |
13 | pad :: Int -> String -> String | |
14 | pad n s = if length s < n then pad n (' ' : s) else s | |
15 | ||
16 | {-DHUN| creates a list of line numbers as padded strings. The first parameter is the length of length that each linenumber (a string) should have. The second paramter is the minimum linenumber to start with (inclusive). The third parameter is the maximum linenumber to end with (inclusive). The return value is the list of linenumbers as strings. DHUN-} | |
17 | ||
14 | 18 | linenumbers :: Int -> Int -> Int -> [String] |
15 | linenumbers n mini maxi = if mini == maxi then [pad n (show mini)] else (pad n (show mini)):(linenumbers n (mini+1) maxi) | |
19 | linenumbers n mini maxi | |
20 | = if mini == maxi then [pad n (show mini)] else | |
21 | (pad n (show mini)) : (linenumbers n (mini + 1) maxi) | |
22 | ||
23 | {-DHUN| Prints out a given message together with the current unix timestamp. The first parameter is the message to be printed DHUN-} | |
16 | 24 | |
17 | 25 | myprint :: String -> IO () |
18 | 26 | myprint s |
21 | 29 | hFlush stdout |
22 | 30 | |
23 | 31 | {-DHUN| Write a unicode string to a utf8 encoded file. The first parameter is the filename, the second the contend to be written to the file. DHUN-} |
24 | ||
32 | ||
25 | 33 | writeFile :: FilePath -> String -> IO () |
26 | 34 | writeFile f s |
27 | 35 | = do h <- openFile f WriteMode |
28 | 36 | hSetEncoding h utf8 |
29 | 37 | hPutStr h s |
30 | 38 | hClose h |
31 | ||
39 | ||
32 | 40 | {-DHUN| read a utf8 encoded file fully as a unicode string. The first parameter is the filename. The return value is the content of the file wrapped in the IO monad. DHUN-} |
33 | ||
41 | ||
34 | 42 | readFile :: FilePath -> IO String |
35 | 43 | readFile f |
36 | 44 | = do h <- openFile f ReadMode |
37 | 45 | hSetEncoding h utf8 |
38 | 46 | z <- System.IO.Strict.hGetContents h |
39 | 47 | return z |
40 | ||
48 | ||
41 | 49 | {-DHUN| If the list is not empty it returns the list without the last item, otherwise the empty list- DHUN-} |
42 | ||
50 | ||
43 | 51 | nullinit :: [a] -> [a] |
44 | 52 | nullinit l = if null l then [] else init l |
45 | ||
53 | ||
46 | 54 | {-DHUN| Returns the list with the first element stripped wrapped in a Just of the Maybe monad. If the list is empty returns the value Nothing of the maybe monad. DHUN-} |
47 | ||
55 | ||
48 | 56 | maybeTail :: [a] -> Maybe [a] |
49 | 57 | maybeTail [] = Nothing |
50 | 58 | maybeTail (_ : xs) = Just xs |
51 | ||
59 | ||
52 | 60 | {-DHUN| Returns the first element of a list wrapped in a Just of the Maybe monad. If the list is empty returns the value Nothing of the Maybe monad, DHUN-} |
53 | ||
61 | ||
54 | 62 | maybeHead :: [a] -> Maybe a |
55 | 63 | maybeHead [] = Nothing |
56 | 64 | maybeHead (x : _) = Just x |
57 | ||
65 | ||
58 | 66 | {-DHUN| The first parameter is and item. the second parameter is a list. If the list contains the item it returns the list up to the first occurrence of the item with the item itself excluded, otherwise it returns the empty list. DHUN-} |
59 | ||
67 | ||
60 | 68 | headSplitEq :: (Eq a) => a -> [a] -> [a] |
61 | 69 | headSplitEq c s |
62 | 70 | = case splitOn [c] s of |
63 | 71 | g : _ -> g |
64 | 72 | [] -> [] |
65 | ||
73 | ||
66 | 74 | {-DHUN| Removes all white space characters trailing on the right hand side of a string DHUN-} |
67 | ||
75 | ||
68 | 76 | rtrim :: String -> String |
69 | 77 | rtrim = reverse . (dropWhile isSpace) . reverse |
70 | ||
78 | ||
71 | 79 | {-DHUN| The first parameter is an original item. The second parameter is a replacement item. The third parameter is a list. This function replaces all occurrences of the original item, with the replacement item in the list. DHUN-} |
72 | ||
80 | ||
73 | 81 | replace :: (Eq a) => a -> a -> [a] -> [a] |
74 | 82 | replace src target = map (\ x -> if x == src then target else x) |
75 | ||
83 | ||
76 | 84 | {-DHUN| The first parameter is an input list. The second parameter is an original list that may or may not or may several times be part of the input list. The third parameter is an a replacement list. This function replaces all occurrences of the original list in the input list, with the replacement list. DHUN-} |
77 | ||
85 | ||
78 | 86 | replace2 :: (Eq a) => [a] -> [a] -> [a] -> [a] |
79 | 87 | replace2 hay needle nail |
80 | 88 | | needle `isPrefixOf` hay = |
82 | 90 | replace2 (x : xs) needle nail |
83 | 91 | | otherwise = x : (replace2 xs needle nail) |
84 | 92 | replace2 [] _ _ = [] |
85 | ||
93 | ||
86 | 94 | {-DHUN| The first parameter is an input list. The second parameter is also a list, that might be contained in the input list. If this is the case this function returns true. Otherwise this function returns false DHUN-} |
87 | ||
95 | ||
88 | 96 | isInfixOf2 :: (Eq a) => [a] -> [a] -> Bool |
89 | 97 | isInfixOf2 needle haystack |
90 | 98 | = any (needle `isPrefixOf`) (tails haystack) |
91 | ||
99 | ||
92 | 100 | {-DHUN| Converts a single character in hex notation to an integer. The integer is wrapped in a mMybe monad. If an integer could be found it is wrapped in a Just of the Maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-} |
93 | ||
101 | ||
94 | 102 | unhexChar :: Char -> Maybe Integer |
95 | 103 | unhexChar c = lookup c hexTable |
96 | 104 | where hexTable |
97 | 105 | = zip ['0' .. '9'] [0 .. 9] ++ |
98 | 106 | zip ['a' .. 'f'] [10 .. 15] ++ zip ['A' .. 'F'] [10 .. 15] |
99 | ||
107 | ||
100 | 108 | {-DHUN| Converts a sequence of characters in hex notation to an integer. The integer is wrapped in a Maybe monad. If an integer could be found it is wrapped in a Just of the Maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-} |
101 | ||
109 | ||
102 | 110 | unhex :: String -> Maybe Integer |
103 | 111 | unhex = foldM f 0 |
104 | 112 | where f acc ch |
0 | {-# LANGUAGE StandaloneDeriving #-} | |
0 | 1 | {-DHUN| module for processing urls and downloading their content with repect to mediawiki DHUN-} |
1 | {-# LANGUAGE StandaloneDeriving #-} | |
2 | 2 | module UrlAnalyse |
3 | 3 | (getpage, analyse, analyseFull, unify, WikiUrl, getLemma, |
4 | 4 | FullWikiUrl, hostname, url, alternatives, lemma, wikiUrl, geturl, |
28 | 28 | deriving instance Read URL.Protocol |
29 | 29 | deriving instance Read URL.URLType |
30 | 30 | |
31 | {-DHUN| This represents the main url of a wiki page. So the url of the wiki page that should be converted to latex. It is a tuple wrapped into the maybe monad, to deal with case in which the url could not be parses. The first element of the tuple is just the main url parser with Network.URL The second element is a list of urls. These URLs are possible base urls for wiki pages. So en.wikipedia.org/wiki/Foobar has got the main url en.wikipedia.org/wiki/Foobar and one of the base urls us en.wikipedia.org/wiki/. Base urls are important since the wiki source related to the main url might include subpage in wiki notation that is [[JohnDow]]. The actual url to look up JohnDow is the baseurl plus JohnDow so that is en.wikipedia.org/wiki/JohnDow Since also images in the commons and similar things are possible, the are usually some baseurls to be looked at. This way the base URl have to be a list. DHUN-} | |
31 | {-DHUN| This represents the main url of a wiki page. So the url of the wiki page that should be converted to latex. It is a tuple wrapped into the maybe monad, to deal with case in which the url could not be parsed. The first element of the tuple is just the main url parsed with Network.URL The second element is a list of urls. These URLs are possible base urls for wiki pages. So en.wikipedia.org/wiki/Foobar has got the main url en.wikipedia.org/wiki/Foobar and one of the base urls us en.wikipedia.org/wiki/. Base urls are important since the wiki source related to the main url might include subpage in wiki notation that is [[JohnDow]]. The actual url to look up JohnDow is the baseurl plus JohnDow so that is en.wikipedia.org/wiki/JohnDow Since also images in the commons and similar things are possible, the are usually some baseurls to be looked at. This way the base URLs have to be a list. DHUN-} | |
32 | 32 | |
33 | 33 | type WikiUrl = Maybe (URL, [URL]) |
34 | 34 | |
37 | 37 | wikiUrl :: FullWikiUrl -> WikiUrl |
38 | 38 | wikiUrl fu = return (url fu, alternatives fu) |
39 | 39 | |
40 | {-DHUN| A type describing a reference to an article on a MediaWiki server. The entry url is the url under which the article is located. The entry alternatives is a list of baseurls of the wiki. See documentation on type WikiUrl for more information about baseurls. The entry hostname contains the hostname of the server. The entry lemma contains the lemma (that is the name of the article on the wiki) DHUN-} | |
41 | ||
42 | ||
40 | 43 | data FullWikiUrl = FullWikiUrl{url :: URL, alternatives :: [URL], |
41 | 44 | hostname :: String, lemma :: String} |
42 | 45 | deriving (Eq, Ord) |
43 | 46 | |
47 | ||
48 | {-DHUN| base instance of type FullWikiUrl, to be filled with useful data using the record syntax DHUN-} | |
49 | ||
44 | 50 | fullWikiUrlZero :: FullWikiUrl |
45 | 51 | fullWikiUrlZero |
46 | 52 | = FullWikiUrl{url = |
67 | 73 | "Special:Export"} |
68 | 74 | where p = (url_path u) |
69 | 75 | |
76 | ||
77 | {-DHUN| modify an URL to point to the special page on the wiki to expand the templates useful for command line option -m DHUN-} | |
78 | ||
79 | ||
70 | 80 | modpathForExpansion :: URL -> URL |
71 | 81 | modpathForExpansion u |
72 | 82 | = u{url_path = |
97 | 107 | statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())})) |
98 | 108 | |
99 | 109 | |
110 | {-DHUN| Loads the data stored under an URL from the web. Result will be a ByteString. Mainly useful for loading HTML for further processing, as well as binary image files. DHUN-} | |
111 | ||
112 | ||
100 | 113 | geturl2 :: String -> IO BStr.ByteString |
101 | 114 | geturl2 u |
102 | 115 | = if u=="" then return (BStr.pack []) else Control.Exception.catch |
115 | 128 | fun _ = return (BStr.pack []) |
116 | 129 | statusExceptionHandler :: SomeException -> IO (Network.HTTP.Conduit.Response L.ByteString) |
117 | 130 | statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())})) |
131 | ||
132 | {-DHUN| loads the wiki sourcecode strored under a lemma in on a server running mediawiki. The first parameter is the lemma to look up. The second parameter is the URL to the special:export page on the server. The return value is the source wikitext DHUN-} | |
133 | ||
118 | 134 | |
119 | 135 | geturl4 :: String -> String -> IO String |
120 | 136 | geturl4 s u |
137 | 153 | statusExceptionHandler _ = (return (I.Response {responseBody=L.empty,responseStatus=T.Status {T.statusCode=200,T.statusMessage=BStr.empty}, responseVersion=V.http09,responseHeaders=[],responseCookieJar=I.CJ [],I.responseClose'=I.ResponseClose (return ())})) |
138 | 154 | |
139 | 155 | |
140 | ||
156 | {-DHUN| loads the wikisource of a wiki article from a server running mediawiki, with all mediawiki templates expanded into wiki text. The first parameter is the url to special:expand templates page on the server. The second parameter is the wikitext source including the mediawiki templates to be expanded. The third parameter is the name of the lemma on the server. DHUN-} | |
141 | 157 | |
142 | 158 | |
143 | 159 | geturl3 :: String -> String -> String -> IO String |
183 | 199 | getTextContent2 z |
184 | 200 | = catchJust myfun (getTextContent z) (\ _ -> return Nothing) |
185 | 201 | |
202 | {-DHUN| this function extracts the expanded wiki source of out of the result String returned by the Special:ExpandTemplates function of mediawiki. This function returns its result wrapped in a maybe monad so it can return the maybe value Nothing in case of failure but does not break the flow of control. It is also wrapped in the IO monad since the xml parser used is bound to the IO monad DHUN-} | |
203 | ||
186 | 204 | getExpandedTextContent :: String -> IO (Maybe String) |
187 | 205 | getExpandedTextContent z |
188 | 206 | = do h <- runX |
193 | 211 | x <- return . toText $ h |
194 | 212 | return (seq x x) |
195 | 213 | |
214 | {-DHUN| this function extracts the expanded wiki source of out of the result String returned by the Special:ExpandTemplates function of mediawiki. This function returns its result wrapped in a maybe monad so it can return the maybe value Nothing in case of failure but does not break the flow of control. It is also wrapped in the IO monad since the xml parser used is bound to the IO monad. Possible IO errors are caught an rethrown as Nothing in the Maybe Monad DHUN-} | |
215 | ||
216 | ||
196 | 217 | getExpandedTextContent2 :: String -> IO (Maybe String) |
197 | 218 | getExpandedTextContent2 z |
198 | 219 | = catchJust myfun (getExpandedTextContent z) |
214 | 235 | (listToMaybe $ concat (map maybeToList lll)) >>= |
215 | 236 | (return . decodeString) |
216 | 237 | |
217 | ||
238 | {-DHUN| Loads a page from a wiki when mediawiki2latex is running with command line option --bookmode. The first parameter is the lemma to load from the wiki the second parameter is the WikiUrl to the server hosting the wiki DHUN-} | |
218 | 239 | |
219 | 240 | |
220 | 241 | getBookpage :: String -> WikiUrl -> IO (Maybe String) |
226 | 247 | (return) |
227 | 248 | where |
228 | 249 | go x =if (x==(UTF8Str.fromString [])) then Nothing else Just (UTF8Str.toString x) |
250 | ||
251 | ||
252 | {-DHUN| Loads the wikitext of an article form a mediawiki server when mediawiki2latex is running with the --mediawiki option. This function downloads the orignial wikitext source without expanding the templates. This is going to happen later by call to getExpandedPage. The first parmeter is lemma to load. The second paramerter is the WikiUrl to the server hosting the wiki. The return value is a pair. The first element of it is the wikitext source of the article. The second element of it is the URL under which the article was downloaded DHUN-} | |
253 | ||
229 | 254 | getpage2 :: String -> WikiUrl -> IO (Maybe (String, URL)) |
230 | 255 | getpage2 ss u |
231 | 256 | = do l <- mapM ((geturl4 ss) . unify . exportURL . modpath) (parses u) |
235 | 260 | where go (Just xx, uu) = [(decodeString xx, uu)] |
236 | 261 | go _ = [] |
237 | 262 | |
263 | {-DHUN| This function expands all templates in a wikitext source using MediaWiki. The first parameter is lemma to be processed. The second parameter is the wikitext source of the article stored under the lemma. The third parameter is url to Special:ExpandTemplates page on the mediawiki server. The return value is the wikitext source with all templates expanded by MediaWiki DHUN-} | |
264 | ||
238 | 265 | getExpandedPage :: String -> String -> URL -> IO (Maybe String) |
239 | 266 | getExpandedPage ss d u |
240 | 267 | = do l <- mapM |
301 | 328 | case (url_type u) of |
302 | 329 | Absolute h -> return (URL.host h) |
303 | 330 | _ -> mzero |
304 | ||
331 | ||
332 | {-DHUN| Parse an URL supplied as string in the first parameter into a FullWikiUrl which is returned. See documentation on the types WikiUrl and FullWikiUrl for more information DHUN-} | |
333 | ||
334 | ||
305 | 335 | analyseFull :: String -> Maybe FullWikiUrl |
306 | 336 | analyseFull theUrl |
307 | 337 | = do ana <- analyse theUrl |
45 | 45 | l = zip [0..] (map (\x->zip [0..] (map (bf.(strip "\n\r\t ")) ((drop 1) ((splitOn "|") x)))) (take 8 (drop 2 sp))) |
46 | 46 | |
47 | 47 | |
48 | ||
49 | ||
50 | ||
51 | ||
52 | ||
53 | ||
54 | ||
55 |
8 | 8 | import MyState |
9 | 9 | import Data.List |
10 | 10 | import Data.String.HT (trim) |
11 | ||
11 | ||
12 | 12 | {-DHUN| add latex label elements to the parse tree. chapter section subsection and so on will be referenced by these labels. It takes the initial UrlState as second parameter. And the parse tree to be processed as first parameter. It returns a tuple. The first element is them modified urlstate and the second is the modified parse tree with the labels added. The field sUrlState of UrlState contains the page name of the downloaded page currently being processed by the algorithm. the filed mUrlState of UrlState is the number of the currenty label. And ,UrlState is a mapping from combined page and chapter, section etc. names (like urls) to their label numbers. DHUN-} |
13 | ||
13 | ||
14 | 14 | makeLables :: |
15 | 15 | [Anything Char] -> UrlState -> (UrlState, [Anything Char]) |
16 | 16 | makeLables ll states |
17 | 17 | = let (f, s) = mapAccumL makeLablesForNode states ll in |
18 | 18 | (f, concat s) |
19 | where | |
20 | makeLablesForNode :: | |
19 | where makeLablesForNode :: | |
21 | 20 | UrlState -> Anything Char -> (UrlState, [Anything Char]) |
22 | 21 | makeLablesForNode st (Environment DhunUrl ss l) |
23 | 22 | = (st{iUrlState = (iUrlState st) + 1, sUrlState = yy, |
37 | 36 | = (fst zz, [Environment e s (snd $ zz)]) |
38 | 37 | where zz = makeLables l st |
39 | 38 | makeLablesForNode st x = (st, [x]) |
40 | ||
39 | ||
41 | 40 | {-DHUN| remove superfluous br html tags from the parse tree. Always run before converting the parse tree to latex DHUN-} |
42 | ||
41 | ||
43 | 42 | removeBr :: [Anything Char] -> [Anything Char] |
44 | 43 | removeBr ((C '\n') : ((Environment Tag (TagAttr "br" _) _) : xs)) |
45 | 44 | = (C '\n') : removeBr xs |
58 | 57 | = (Environment SpaceIndent x (removeBr l)) : removeBr xs |
59 | 58 | removeBr (x : xs) = x : removeBr xs |
60 | 59 | removeBr [] = [] |
61 | ||
60 | ||
62 | 61 | {-DHUN| checks if a given string is an image inclusion in wiki notation. In wiki notation an image is included by [[Image:FooBar.png]], but image may be replace by localized versions like Bild in German, so this function checks for those and return true if it seems to be an image DHUN-} |
63 | ||
62 | ||
64 | 63 | isImage :: String -> Bool |
65 | 64 | isImage x |
66 | 65 | = ([z | z <- map (++ ":") imgtags, z `isPrefixOf` (map toLower x)] |
67 | 66 | /= []) |
68 | ||
67 | ||
69 | 68 | {-DHUN| flattens a parse tree shallowly. that is take all characters on the surface level of the parse tree and combines them into a single string. It does not decent into substructures of the parse and so neglects all characters there and does not return those with the exception of character in SpaceIndent environments directly attached to the surface level DHUN-} |
70 | ||
69 | ||
71 | 70 | shallowFlatten :: [Anything Char] -> String |
72 | 71 | shallowFlatten ((C a) : xs) = a : (shallowFlatten xs) |
72 | shallowFlatten ((Environment HtmlChar (Str "amp") a) : xs) | |
73 | = '&' : (shallowFlatten xs) | |
74 | shallowFlatten ((Environment HtmlChar (Str "lt") a) : xs) | |
75 | = '<' : (shallowFlatten xs) | |
76 | shallowFlatten ((Environment HtmlChar (Str "gt") a) : xs) | |
77 | = '>' : (shallowFlatten xs) | |
73 | 78 | shallowFlatten ((Environment SpaceIndent _ l) : xs) |
74 | 79 | = '\n' : ((shallowFlatten l) ++ (shallowFlatten xs)) |
75 | 80 | shallowFlatten (_ : xs) = shallowFlatten xs |
76 | 81 | shallowFlatten [] = [] |
77 | ||
82 | ||
78 | 83 | {-DHUN| A link in wiki notation is given by [foorbar.com a caption]. This function returns the location the link points to so foobar.com as String. It takes the parse tree representation of the link as input parameter DHUN-} |
79 | ||
84 | ||
80 | 85 | linkLocation :: [Anything Char] -> String |
81 | 86 | linkLocation l |
82 | 87 | = case yy of |
89 | 94 | [] -> "" |
90 | 95 | (g : _) -> g) |
91 | 96 | |
92 | ||
93 | 97 | normalizeExtensionHtml :: String -> String |
94 | 98 | normalizeExtensionHtml ('s' : ('v' : ('g' : _))) = "png" |
95 | 99 | normalizeExtensionHtml ('j' : ('p' : ('e' : ('g' : _)))) = "jpg" |
102 | 106 | normalizeExtensionHtml (x : xs) = x : (normalizeExtension xs) |
103 | 107 | normalizeExtensionHtml [] = [] |
104 | 108 | |
105 | ||
106 | 109 | {-DHUN| changes the extension for a filename given in the wiki source to the extension to be used in the latex document. For example gif documents are converted to png documents. So this function converts the string 'gif' to the string 'png' DHUN-} |
107 | ||
110 | ||
108 | 111 | normalizeExtension :: String -> String |
109 | 112 | normalizeExtension ('s' : ('v' : ('g' : _))) = "\\SVGExtension" |
110 | 113 | normalizeExtension ('j' : ('p' : ('e' : ('g' : _)))) = "jpg" |
116 | 119 | normalizeExtension (' ' : xs) = normalizeExtension xs |
117 | 120 | normalizeExtension (x : xs) = x : (normalizeExtension xs) |
118 | 121 | normalizeExtension [] = [] |
119 | ||
122 | ||
120 | 123 | {-DHUN| changes the extension for a filename given in the wiki source to the extension to be used in as filename when storing the image in the latex tree. DHUN-} |
121 | ||
124 | ||
122 | 125 | normalizeExtension2 :: String -> String |
123 | 126 | normalizeExtension2 ('s' : ('v' : ('g' : _))) = "svg" |
124 | 127 | normalizeExtension2 ('j' : ('p' : ('e' : ('g' : _)))) = "jpg" |
130 | 133 | normalizeExtension2 (' ' : xs) = normalizeExtension xs |
131 | 134 | normalizeExtension2 (x : xs) = x : (normalizeExtension xs) |
132 | 135 | normalizeExtension2 [] = [] |
133 | ||
136 | ||
134 | 137 | {-DHUN| returns the extension of a filename. DHUN-} |
135 | ||
138 | ||
136 | 139 | fileNameToExtension :: String -> String |
137 | 140 | fileNameToExtension s = last (splitOn "." (map toLower s)) |
138 | ||
141 | ||
139 | 142 | {-DHUN| a predicate that can be run on an element of a parse tree that returns true if the element is a wikilink. A wikilink is denoted as [[Foobar]] in the wiki notation, an links to an other mediawiki page on the same or a different wiki DHUN-} |
140 | ||
143 | ||
141 | 144 | isWikiLink :: (Anything Char) -> Bool |
142 | 145 | isWikiLink (Environment Wikilink _ []) = False |
143 | 146 | isWikiLink (Environment Wikilink _ _) = True |
144 | 147 | isWikiLink _ = False |
145 | ||
148 | ||
146 | 149 | {-DHUN| changes Math elements on the surface level of a parse tree to bigmath elements. Those will be rendered as equation environments. Normal math is usually only display be the dollar math environment in latex. DHUN-} |
147 | ||
150 | ||
148 | 151 | shallowEnlargeMath :: [Anything Char] -> [Anything Char] |
149 | 152 | shallowEnlargeMath ((Environment Math s l) : xs) |
150 | 153 | = (Environment BigMath s l) : shallowEnlargeMath xs |
151 | 154 | shallowEnlargeMath (x : xs) = x : shallowEnlargeMath xs |
152 | 155 | shallowEnlargeMath [] = [] |
153 | ||
156 | ||
154 | 157 | {-DHUN| this function modified chapter headings. It is used when converting a chapter heading from the parse tree to latex. In the wiki often the page name is used as chapter heading and thus only the parts after the slashes and colons are to be taken into account. Also underscores have to be replaced by spaces DHUN-} |
155 | ||
158 | ||
156 | 159 | chapterTransform :: String -> String |
157 | 160 | chapterTransform s |
158 | 161 | = replace '_' ' ' (last (splitOn ":" (last (splitOn "/" s)))) |
159 | ||
162 | ||
160 | 163 | {-DHUN| returns the separator to separate items in enumeration, itemizations and so on. Currently this is always \\item{} but this may change depending on which latex package is used to display enumerations and so on. Takes the char for this type of enumeration etc. in wiki notation. That is a hash for enumeration and a asterisk for itemization and so on DHUN-} |
161 | ||
164 | ||
162 | 165 | itemSeperator :: Char -> String |
163 | 166 | itemSeperator c = itemSeperator2 [c] |
164 | ||
167 | ||
165 | 168 | {-DHUN| see documentation on itemSeperator. The only difference is that this function takes a string containing a single character instead of the single character itself DHUN-} |
166 | ||
169 | ||
167 | 170 | itemSeperator2 :: String -> String |
168 | 171 | itemSeperator2 "#" = "\\item{}" |
169 | 172 | itemSeperator2 ":" = "\\item{}" |
170 | 173 | itemSeperator2 ";" = "\\item{}" |
171 | 174 | itemSeperator2 "*" = "\\item{}" |
172 | 175 | itemSeperator2 _ = "\\item{}" |
173 | ||
176 | ||
174 | 177 | {-DHUN| returns the name of a latex environment for an itemization enumeration etc.. The first parameter is a string in wiki notation and declare which type environment should be used. The second parameter is a float giving the width of the current cell in units of the line width when inside a table and is 1.0 if currently not inside any table. DHUN-} |
175 | ||
178 | ||
176 | 179 | itemEnvironmentName :: String -> Float -> String |
177 | 180 | itemEnvironmentName "#" _ = "myenumerate" |
178 | 181 | itemEnvironmentName ":" _ = "myquote" |
179 | 182 | itemEnvironmentName ";" _ = "mydescription" |
180 | 183 | itemEnvironmentName "*" _ = "myitemize" |
181 | 184 | itemEnvironmentName _ _ = "list" |
182 | ||
185 | ||
183 | 186 | {-DHUN| returns additional parameter for the opening of a latex environment for an itemization enumeration etc. The second parameter is a float giving the width of the current cell in units of the line width when inside a table and is 1.0 if currently not inside any table. DHUN-} |
184 | ||
187 | ||
185 | 188 | itemEnvironmentParameters :: String -> Float -> String |
186 | 189 | itemEnvironmentParameters "#" _ = "" |
187 | 190 | itemEnvironmentParameters ":" _ = "" |
188 | 191 | itemEnvironmentParameters ";" _ = "" |
189 | 192 | itemEnvironmentParameters "*" _ = "" |
190 | 193 | itemEnvironmentParameters _ _ = "{\\labelitemi}{\\leftmargin=1em}" |
191 | ||
194 | ||
192 | 195 | {-DHUN| do multple replacements in a row. The first argument is haystack. The second one a list of pair of a needle and a corresponding nail. The haystack with each needle replaced by a nail is returned DHUN-} |
193 | ||
196 | ||
194 | 197 | multireplace :: (Eq a) => [a] -> [([a], [a])] -> [a] |
195 | 198 | multireplace haystack ((needle, nail) : xs) |
196 | 199 | = multireplace (replace2 haystack needle nail) xs |
197 | 200 | multireplace haystack [] = haystack |
198 | ||
201 | ||
199 | 202 | {-DHUN| converts a mathematical expression (that is something within a math tag in the wiki) to an latex expression. Essentially the wiki is using latex. But it allows for some extra features that are take care of in this transformation DHUN-} |
200 | ||
203 | ||
201 | 204 | mathTransform :: [Anything Char] -> String |
202 | 205 | mathTransform x |
203 | 206 | = multireplace (replace '\n' ' ' (shallowFlatten x)) replist |
204 | ||
207 | ||
205 | 208 | {-DHUN| list of replacements to be applied to contents of math tags in wiki notation for use in the latex equation environment DHUN-} |
206 | ||
209 | ||
207 | 210 | replist :: [([Char], [Char])] |
208 | 211 | replist |
209 | 212 | = [("\\or", "\\vee{}"), ("%", "\\%"), ("\\and", "\\wedge{}"), |
211 | 214 | ("\\end{align}", "\\end{aligned}"), ("\\\\%", "\\%"), |
212 | 215 | ("\\part ", "\\partial "), ("\\part{", "\\partial{"), ("\\;", ""), |
213 | 216 | ("\\|", "\\Vert"), ("\\!", ""), ("\\part\\", "\\partial\\")] |
214 | ||
217 | ||
215 | 218 | {-DHUN| helper function for line breaking in source code and preformatted block. Not to be called from outside this module. Converts character to parse tree entities, to be process by breakLinesHelper3 DHUN-} |
216 | ||
219 | ||
217 | 220 | breakLinesHelper4 :: [Anything Char] -> [Anything Char] |
218 | 221 | breakLinesHelper4 ((C '\n') : xs) |
219 | 222 | = (Environment Tag (TagAttr "br" Map.empty) []) : |
222 | 225 | breakLinesHelper4 ((C ' ') : xs) = Quad : breakLinesHelper4 xs |
223 | 226 | breakLinesHelper4 (x : xs) = x : breakLinesHelper4 xs |
224 | 227 | breakLinesHelper4 [] = [] |
225 | ||
228 | ||
226 | 229 | {-DHUN| the width of a tab character in spaces DHUN-} |
227 | ||
230 | ||
228 | 231 | tabwidth :: Int |
229 | 232 | tabwidth = 4 |
230 | ||
233 | ||
231 | 234 | {-DHUN| helper function for line breaking in source code and preformatted block. Not to be called from outside this module. Inserts br tags where line breaks are needed, the first parameter is an integer which represents the current column in the text, it should be zero when this function is called externally. The second parameter is an integer an represents the maximum length of the line in characters. The third input parameter is the code block to which the line breaks should be added in parse tree notation. The function returns the code block with added br tags for the line breaks in parse tree notation DHUN-} |
232 | ||
235 | ||
233 | 236 | breakLinesHelper3 :: |
234 | 237 | Int -> Int -> [Anything Char] -> [Anything Char] |
235 | 238 | breakLinesHelper3 _ m ((Environment Tag (TagAttr "br" y) []) : xs) |
257 | 260 | = (xx /= Quad) && |
258 | 261 | (xx /= (Environment Tag (TagAttr "br" Map.empty) [])) |
259 | 262 | breakLinesHelper3 _ _ [] = [] |
260 | ||
263 | ||
261 | 264 | {-DHUN| Breaks lines in source code and preformatted block. Inserts br tags where line breaks are needed. The first parameter is an integer an represents the maximum length of the line in characters. The second input parameter is the code block to which the line breaks should be added in parse tree notation. The function returns the code block with added br tags for the line breaks in parse tree notation DHUN-} |
262 | ||
265 | ||
263 | 266 | breakLines3 :: Int -> [Anything Char] -> [Anything Char] |
264 | 267 | breakLines3 m s |
265 | 268 | = rebreak (breakLinesHelper3 0 m (breakLinesHelper4 s)) |
266 | ||
269 | ||
267 | 270 | {-DHUN| Adds quads in between double br line breaks, needed since double \\newline is not allowed in latex DHUN-} |
268 | ||
271 | ||
269 | 272 | rebreak :: [Anything Char] -> [Anything Char] |
270 | 273 | rebreak |
271 | 274 | ((Environment Tag (TagAttr "br" a) l) : |
274 | 277 | Quad : (rebreak ((Environment Tag (TagAttr "br" a2) l2) : xs)) |
275 | 278 | rebreak (x : xs) = x : (rebreak xs) |
276 | 279 | rebreak [] = [] |
277 | ||
280 | ||
278 | 281 | {-DHUN| Replaces several parse tree item representations of white space characters with the corresponding whitespace characters themselves in parse tree notation. DHUN-} |
279 | ||
282 | ||
280 | 283 | renormalize :: Anything Char -> Anything Char |
281 | 284 | renormalize (Environment Tag (TagAttr "br" _) []) = C '\n' |
282 | 285 | renormalize Quad = C ' ' |
11 | 11 | import Data.Char |
12 | 12 | import Data.List.Split |
13 | 13 | import Control.Monad |
14 | ||
14 | ||
15 | 15 | {-DHUN| takes a list of language prefixes and sister project prefixes and return a the first one or two elements of that list as tuple of Strings wrapped into maybe monads, returns nothing at the appropriate elements to the tuple if the list does not have enough elements. The first element of the tuple is populated first DHUN-} |
16 | ||
16 | ||
17 | 17 | getprefixes :: [String] -> (Maybe String, Maybe String) |
18 | 18 | getprefixes ss |
19 | 19 | = case ss of |
22 | 22 | case xs of |
23 | 23 | [] -> Nothing |
24 | 24 | (y : _) -> Just $ filter (not . isSpace) (map toLower y)) |
25 | ||
25 | ||
26 | 26 | {-DHUN| in the wiki source a heading is given by == foo bar == where the number of space gives the level of the heading. In latex this is done with string like 'section. This function translates from wiki notation to latex notation DHUN-} |
27 | ||
27 | ||
28 | 28 | getsec :: String -> String |
29 | 29 | getsec "=" = "chapter" |
30 | 30 | getsec "==" = "section" |
33 | 33 | getsec "=====" = "paragraph" |
34 | 34 | getsec "======" = "subparagraph" |
35 | 35 | getsec _ = "subparagraph" |
36 | ||
36 | ||
37 | 37 | {-DHUN| in the wiki source a heading is given by == foo bar == where the number of space gives the level of the heading. In latex this is done with string like 'section'. Some of with need additional commands after the actual heading command. Those are returned by this function DHUN-} |
38 | ||
38 | ||
39 | 39 | getsecpost :: String -> String |
40 | 40 | getsecpost "=" = "\n\\myminitoc\n" |
41 | 41 | getsecpost "==" = "" |
44 | 44 | getsecpost "=====" = "{$\\text{ }$}\\newline" |
45 | 45 | getsecpost "======" = "{$\\text{ }$}\\newline" |
46 | 46 | getsecpost _ = "{$\\text{ }$}\\newline" |
47 | ||
47 | ||
48 | 48 | {-DHUN| a predicate that returns true if the string is a valid image size in wiki notation. In the wiki an image is include like [[Image:FoorBar.png|300px]] where 300px means that the image should be displayed at a width of 300 pixel. This function takes strings and looks whether it looks like an integer number flowed by 'px' and is thus a valid width definition for an image in wiki notation DHUN-} |
49 | ||
49 | ||
50 | 50 | isImageSize :: String -> Bool |
51 | 51 | isImageSize x |
52 | 52 | = if (isSuffixOf "px" x) then |
53 | 53 | if (reads (take ((length x) - 2) x)) == ([] :: [(Float, [Char])]) |
54 | 54 | then False else True |
55 | 55 | else False |
56 | ||
56 | ||
57 | 57 | {-DHUN| this predicate returns true if the string seems to be a caption of an image in wiki notation. And image give like [[Image:JohnDow.png|foo bar]] where 'foo bar' is the caption of the image. So this function just tests whether the string looks like one of the reserved keyword of mediawiki or like an image size definition (see function isImageSize) and return true if none of that fits DHUN-} |
58 | ||
58 | ||
59 | 59 | isCaption :: String -> Bool |
60 | 60 | isCaption x |
61 | 61 | = if isImageSize x then False else |
62 | 62 | if x `elem` ["thumb", "right", "left", "center"] then False else |
63 | 63 | True |
64 | ||
64 | ||
65 | 65 | {-DHUN| escapes all charters of a string for use a a link with the hyperref package in latex DHUN-} |
66 | ||
66 | ||
67 | 67 | escapelink :: String -> String |
68 | 68 | escapelink s = concat (map chartransforlink s) |
69 | ||
69 | ||
70 | 70 | {-DHUN| analyzes a host name. If it belongs to a wikimedia project it returns an UrlInfo value otherwise it returns a WikiBaseUrl value. In any case it is the type WikiUrlData. This information is needed when building writing down links to subpages in latex or downloading images from the wiki. DHUN-} |
71 | ||
71 | ||
72 | 72 | analyseNetloc :: String -> WikiUrlData |
73 | 73 | analyseNetloc nl = myurldata |
74 | 74 | where langm |
94 | 94 | splits = splitOn "." nl |
95 | 95 | (prefix, prefix2) = getprefixes splits |
96 | 96 | wikiset = Set.fromList . Map.elems . Map.fromList $ multilangwikis |
97 | ||
97 | ||
98 | 98 | {-DHUN| splits a wikilink in caption and link target and returns the target. So a wkilink is given like [[FooBar|John Dow]]. This link links to the lemma FooBar and is displayed with the caption 'John Dow'. So this function returns FooBar for that link. DHUN-} |
99 | ||
99 | ||
100 | 100 | localWikiLinkLocation :: String -> String |
101 | 101 | localWikiLinkLocation s = headSplitEq '|' s |
102 | ||
102 | ||
103 | 103 | {-DHUN| This function takes the contend of a wikilink in wiki markup notation as first input parameter in parse tree notation. It takes the state of the Latex Renderer as second input parameter. It returns the absolute url of the given wikilink as string DHUN-} |
104 | ||
104 | ||
105 | 105 | wikiLinkLocationesc :: [Anything Char] -> MyState -> String |
106 | 106 | wikiLinkLocationesc l st |
107 | 107 | = getUrlFromWikiLinkInfoesc . |
108 | 108 | getWikiLinkInfo (shallowFlatten l) . urld |
109 | 109 | $ st |
110 | ||
110 | ||
111 | 111 | {-DHUN| converts a WikiLinkInfo value pointing to a page on a wiki to the full url of that page as string. It is escaped for the use with the hyperref latex package DHUN-} |
112 | ||
112 | ||
113 | 113 | getUrlFromWikiLinkInfoesc :: WikiLinkInfo -> String |
114 | 114 | getUrlFromWikiLinkInfoesc i |
115 | 115 | = case (urldata i) of |
119 | 119 | (wikitype x) ++ ".org/wiki/" ++ (escapelink (urlEncode (page i))) |
120 | 120 | BaseUrl (WikiBaseUrl x) -> "https://" ++ |
121 | 121 | x ++ "/wiki/" ++ (escapelink (urlEncode (page i))) |
122 | ||
122 | ||
123 | 123 | {-DHUN| converts a WikiLinkInfo value pointing to a page on a wiki to the full url of that page as string. It is not is escaped in any way, so ready for use in a webbrowers DHUN-} |
124 | ||
124 | ||
125 | 125 | getUrlFromWikiLinkInfo :: WikiLinkInfo -> String |
126 | 126 | getUrlFromWikiLinkInfo i |
127 | 127 | = case (urldata i) of |
130 | 130 | "." ++ (wikitype x) ++ ".org/wiki/" ++ (urlEncode (page i)) |
131 | 131 | BaseUrl (WikiBaseUrl x) -> "https://" ++ |
132 | 132 | x ++ "/wiki/" ++ (urlEncode (page i)) |
133 | ||
133 | ||
134 | 134 | {-DHUN| converts a WikiUrlData value pointing to a wiki and a string (like a part of a url) pointing to a page relative to that wiki and returns the full url of that page as string DHUN-} |
135 | ||
135 | ||
136 | 136 | wikiUrlDataToString :: WikiUrlData -> String -> String |
137 | 137 | wikiUrlDataToString w i |
138 | 138 | = case w of |
139 | 139 | UrlInfo x -> "https://" ++ |
140 | 140 | (language x) ++ "." ++ (wikitype x) ++ ".org" ++ i |
141 | 141 | BaseUrl (WikiBaseUrl x) -> "https://" ++ x ++ i |
142 | ||
142 | ||
143 | 143 | {-DHUN| This function takes the contend of a wikilink in wiki markup notation as first input parameter. So for the wiki link (mediawiki markup notation) [[en:JohnDow|John Dow]] this is 'en:JohnDow|John Dow' (the single quotes were added to make the string distinguishable from the text and are not part of the passed parameter). It takes the WikiUrlData value describing the wiki the page currently being processed belongs to as second parameter. It returns a WikiLinkInfo value describing the target of the link given as first parameter when evaluated with respect to the second parameter DHUN-} |
144 | ||
144 | ||
145 | 145 | getWikiLinkInfo :: String -> WikiUrlData -> WikiLinkInfo |
146 | 146 | getWikiLinkInfo s i = WikiLinkInfo{urldata = udata, page = pagen} |
147 | 147 | where udata |
14 | 14 | import System.Info |
15 | 15 | import Compiler (runCompile) |
16 | 16 | import Tools (replace2) |
17 | ||
17 | ||
18 | 18 | {-DHUN| Data structure to repesent a single option on the command line. DHUN-} |
19 | ||
19 | ||
20 | 20 | data Flag = Verbose |
21 | 21 | | Vector |
22 | 22 | | Version |
39 | 39 | | Odt |
40 | 40 | | Server String |
41 | 41 | deriving (Show, Eq) |
42 | ||
42 | ||
43 | 43 | {-DHUN| String constant on for the version command line option. DHUN-} |
44 | ||
44 | ||
45 | 45 | versionOption :: String |
46 | 46 | versionOption = "version" |
47 | 47 | |
50 | 50 | featuredOption :: String |
51 | 51 | featuredOption = "featured" |
52 | 52 | |
53 | ||
54 | 53 | {-DHUN| String constant on for the resolution command line option. DHUN-} |
55 | ||
54 | ||
56 | 55 | resolutionOption :: String |
57 | 56 | resolutionOption = "resolution" |
58 | ||
57 | ||
59 | 58 | {-DHUN| String constant on for the output command line option. DHUN-} |
60 | ||
59 | ||
61 | 60 | output :: String |
62 | 61 | output = "output" |
63 | 62 | |
64 | 63 | {-DHUN| String constant on for the zip command line option. DHUN-} |
65 | ||
64 | ||
66 | 65 | zip :: String |
67 | 66 | zip = "zip" |
68 | 67 | |
69 | ||
70 | 68 | {-DHUN| String constant on for the hex command line option. DHUN-} |
71 | ||
69 | ||
72 | 70 | hexen :: String |
73 | 71 | hexen = "hex" |
74 | ||
72 | ||
75 | 73 | {-DHUN| String constant on for the templates command line option. DHUN-} |
76 | ||
74 | ||
77 | 75 | templates :: String |
78 | 76 | templates = "templates" |
79 | ||
77 | ||
80 | 78 | {-DHUN| String constant on for the headers command line option. DHUN-} |
81 | ||
79 | ||
82 | 80 | headers :: String |
83 | 81 | headers = "headers" |
84 | ||
82 | ||
85 | 83 | {-DHUN| String constant on for the url command line option. DHUN-} |
86 | ||
84 | ||
87 | 85 | url :: String |
88 | 86 | url = "url" |
89 | ||
87 | ||
90 | 88 | {-DHUN| String constant on for the medaiwiki command line option. DHUN-} |
91 | ||
89 | ||
92 | 90 | mediawiki :: String |
93 | 91 | mediawiki = "mediawiki" |
94 | ||
92 | ||
95 | 93 | {-DHUN| String constant on for the book-namespace command line option. DHUN-} |
96 | ||
94 | ||
97 | 95 | bookmode :: String |
98 | 96 | bookmode = "bookmode" |
99 | 97 | |
100 | 98 | {-DHUN| String constant on for the html command line option. DHUN-} |
101 | ||
99 | ||
102 | 100 | html :: String |
103 | 101 | html = "html" |
104 | ||
102 | ||
105 | 103 | {-DHUN| String constant on for the paper command line option. DHUN-} |
106 | ||
104 | ||
107 | 105 | paperOption :: String |
108 | 106 | paperOption = "paper" |
109 | ||
107 | ||
110 | 108 | {-DHUN| String constant on for the internal command line option. DHUN-} |
111 | ||
109 | ||
112 | 110 | internal :: String |
113 | 111 | internal = "internal" |
114 | ||
112 | ||
115 | 113 | {-DHUN| String constant on for the vector command line option. DHUN-} |
116 | ||
114 | ||
117 | 115 | vectorOption :: String |
118 | 116 | vectorOption = "vector" |
119 | ||
117 | ||
120 | 118 | {-DHUN| String constant on for the copy command line option. DHUN-} |
121 | ||
119 | ||
122 | 120 | copyOption :: String |
123 | 121 | copyOption = "copy" |
124 | ||
122 | ||
125 | 123 | {-DHUN| String constant on for the server command line option. DHUN-} |
126 | ||
124 | ||
127 | 125 | serverOption :: String |
128 | 126 | serverOption = "server" |
129 | 127 | |
130 | 128 | {-DHUN| String constant on for the epub command line option. DHUN-} |
131 | ||
129 | ||
132 | 130 | epubOption :: String |
133 | 131 | epubOption = "epub" |
134 | 132 | |
135 | 133 | {-DHUN| String constant on for the odt command line option. DHUN-} |
136 | ||
134 | ||
137 | 135 | odtOption :: String |
138 | 136 | odtOption = "odt" |
139 | 137 | |
140 | ||
141 | 138 | {-DHUN| Datastructure describing all possible command line options DHUN-} |
142 | ||
139 | ||
143 | 140 | options :: [OptDescr Flag] |
144 | 141 | options |
145 | 142 | = [Option ['V', '?', 'v'] [versionOption, "help"] (NoArg Version) |
167 | 164 | "use book-namespace mode for expansion", |
168 | 165 | Option ['z'] [Main.zip] (NoArg Main.Zip) |
169 | 166 | "output zip archive of latex source", |
170 | Option ['b'] [epubOption] (NoArg Main.EPub) | |
171 | "output epub file", | |
172 | Option ['d'] [odtOption] (NoArg Main.Odt) | |
173 | "output odt file", | |
167 | Option ['b'] [epubOption] (NoArg Main.EPub) "output epub file", | |
168 | Option ['d'] [odtOption] (NoArg Main.Odt) "output odt file", | |
174 | 169 | Option ['g'] [vectorOption] (NoArg Main.Vector) |
175 | 170 | "keep vector graphics in vector form", |
176 | 171 | Option ['i'] [internal] (NoArg Main.InternalTemplates) |
179 | 174 | "use user supplied latex headers", |
180 | 175 | Option ['c'] [copyOption] (ReqArg Main.Copy "DIRECTORY") |
181 | 176 | "copy LaTeX tree to DIRECTORY"] |
182 | ||
177 | ||
183 | 178 | {-DHUN| parsed the options given on the command line via the getopt library DHUN-} |
184 | ||
179 | ||
185 | 180 | compilerOpts :: [String] -> IO ([Flag], [String]) |
186 | 181 | compilerOpts argv |
187 | 182 | = case getOpt Permute options argv of |
188 | 183 | (o, n, []) -> return (o, n) |
189 | 184 | (_, _, errs) -> ioError |
190 | 185 | (userError (concat errs ++ usageInfo header options)) |
191 | ||
186 | ||
192 | 187 | {-DHUN| header string for the usage help DHUN-} |
193 | ||
188 | ||
194 | 189 | header :: String |
195 | 190 | header = "Usage: mediawiki2latex [OPTION...]" |
196 | ||
191 | ||
197 | 192 | {-DHUN| header string giving the current version string of mediawiki2latex DHUN-} |
198 | ||
193 | ||
199 | 194 | versionHeader :: String |
200 | 195 | versionHeader |
201 | = "mediawiki2latex version 7.32\n" ++ (usageInfo header options) | |
202 | ||
196 | = "mediawiki2latex version 7.33\n" ++ (usageInfo header options) | |
197 | ||
203 | 198 | {-DHUN| print the version string of mediawiki2latex. Takes the output of the compilerOpts function as input. Prints the version string if no options were given or the version command was given does noting otherwise DHUN-} |
204 | ||
199 | ||
205 | 200 | printVersion :: (Eq a) => ([Flag], [a]) -> IO () |
206 | 201 | printVersion o |
207 | 202 | = if (Version `elem` (fst o)) || o == ([], []) then |
208 | 203 | putStrLn versionHeader >> exitSuccess else return () |
209 | ||
204 | ||
210 | 205 | {-DHUN| checks whether the given option exists exactly once in the given (getopt parsed) command line. Takes a predicate (returning a Maybe type) as first input parameter. Takes the decription string for the option under consideration as second parameter. Takes the (getopt parsed) command line as third input parameter. Return an either monad giving Right together with the value of the option in cases the option exists exacatly once in the command line, gives Left with an Error values otherwise DHUN-} |
211 | ||
206 | ||
212 | 207 | exactlyOne :: (a -> Maybe b) -> String -> [a] -> Either MyError b |
213 | 208 | exactlyOne predicate s o |
214 | 209 | = case filter isJust (map predicate o) of |
215 | 210 | ((Just x) : []) -> Right x |
216 | 211 | _ -> Left (NotExcatlyOneError s) |
217 | ||
212 | ||
218 | 213 | {-DHUN| checks whether the given option exists at most once in the given (getopt parsed) command line. Takes a predicate (returning a Maybe type) as first input parameter. Takes the decription string for the option under consideration as second parameter. Takes the (getopt parsed) command line as third input parameter. Return an either monad giving Right together with the value of the option in cases the option exists at most once once in the command line, gives Left with an Error values otherwise DHUN-} |
219 | ||
214 | ||
220 | 215 | atMostOne :: |
221 | 216 | (a1 -> Maybe a) -> String -> [a1] -> Either MyError (Maybe a) |
222 | 217 | atMostOne predicate s o |
224 | 219 | (x : []) -> Right x |
225 | 220 | ([]) -> Right Nothing |
226 | 221 | _ -> Left (NotAtMostOneError s) |
227 | ||
222 | ||
228 | 223 | {-DHUN| predicate for the resolution option. see atMostOne and exactlyOne functions for details DHUN-} |
229 | ||
224 | ||
230 | 225 | resolutionPredicate :: Flag -> Maybe String |
231 | 226 | resolutionPredicate (Resolution x) = Just x |
232 | 227 | resolutionPredicate _ = Nothing |
233 | ||
228 | ||
234 | 229 | {-DHUN| predicate for the copy option. see atMostOne and exactlyOne functions for details DHUN-} |
235 | ||
230 | ||
236 | 231 | copyPredicate :: Flag -> Maybe String |
237 | 232 | copyPredicate (Copy x) = Just x |
238 | 233 | copyPredicate _ = Nothing |
239 | ||
234 | ||
240 | 235 | {-DHUN| predicate for the output option. see atMostOne and exactlyOne functions for details DHUN-} |
241 | ||
236 | ||
242 | 237 | outputPredicate :: Flag -> Maybe String |
243 | 238 | outputPredicate (Output x) = Just x |
244 | 239 | outputPredicate _ = Nothing |
245 | ||
240 | ||
246 | 241 | {-DHUN| predicate for the input option. see atMostOne and exactlyOne functions for details DHUN-} |
247 | ||
242 | ||
248 | 243 | inputPredicate :: Flag -> Maybe String |
249 | 244 | inputPredicate (Input x) = Just x |
250 | 245 | inputPredicate _ = Nothing |
251 | ||
246 | ||
252 | 247 | {-DHUN| predicate for the templates option. see atMostOne and exactlyOne functions for details DHUN-} |
253 | ||
248 | ||
254 | 249 | templatesPredicate :: Flag -> Maybe String |
255 | 250 | templatesPredicate (Templates x) = Just x |
256 | 251 | templatesPredicate _ = Nothing |
257 | ||
252 | ||
258 | 253 | {-DHUN| predicate for the headers option. see atMostOne and exactlyOne functions for details DHUN-} |
259 | ||
254 | ||
260 | 255 | headersPredicate :: Flag -> Maybe String |
261 | 256 | headersPredicate (Headers x) = Just x |
262 | 257 | headersPredicate _ = Nothing |
263 | ||
258 | ||
264 | 259 | {-DHUN| predicate for the hex option. see atMostOne and exactlyOne functions for details DHUN-} |
265 | ||
260 | ||
266 | 261 | hexPredicate :: Flag -> Maybe String |
267 | 262 | hexPredicate (Hex x) = Just x |
268 | 263 | hexPredicate _ = Nothing |
269 | ||
264 | ||
270 | 265 | {-DHUN| predicate for the server option. see atMostOne and exactlyOne functions for details DHUN-} |
271 | ||
266 | ||
272 | 267 | serverPredicate :: Flag -> Maybe String |
273 | 268 | serverPredicate (Server x) = Just x |
274 | 269 | serverPredicate _ = Nothing |
275 | ||
276 | 270 | |
277 | 271 | featuredPredicate :: Flag -> Maybe String |
278 | 272 | featuredPredicate (Featured x) = Just x |
279 | 273 | featuredPredicate _ = Nothing |
280 | 274 | |
281 | 275 | {-DHUN| predicate for the paper option. see atMostOne and exactlyOne functions for details DHUN-} |
282 | ||
276 | ||
283 | 277 | paperPredicate :: Flag -> Maybe String |
284 | 278 | paperPredicate (Paper x) = Just x |
285 | 279 | paperPredicate _ = Nothing |
286 | ||
280 | ||
287 | 281 | {-DHUN| default images resolution. All images with a right resolution will be dithered to this resolution unless is is overridden with the resolution command line option DHUN-} |
288 | ||
282 | ||
289 | 283 | defaultResolution :: Integer |
290 | 284 | defaultResolution = 300 |
291 | ||
285 | ||
292 | 286 | {-DHUN| the default paper format DHUN-} |
293 | ||
287 | ||
294 | 288 | defaultPaper :: String |
295 | 289 | defaultPaper = "A4" |
296 | ||
290 | ||
297 | 291 | {-DHUN| function to count the number of given command lines options which are part of a certain set of possible command line options. Takes a Maybe value representing whether the command line option is present and returns 1 on Just and 0 otherwise DHUN-} |
298 | ||
292 | ||
299 | 293 | maybeToInt :: (Num a) => Maybe t -> a |
300 | 294 | maybeToInt (Just _) = 1 |
301 | 295 | maybeToInt _ = 0 |
302 | ||
296 | ||
303 | 297 | {-DHUN| function to count the number of given command lines option which are part of a certain set of possible command line options. Takes a Bool values representing whether the command line option is present and returns 1 on True and 0 otherwise DHUN-} |
304 | ||
298 | ||
305 | 299 | boolToInt :: (Num a) => Bool -> a |
306 | 300 | boolToInt True = 1 |
307 | 301 | boolToInt _ = 0 |
308 | ||
302 | ||
309 | 303 | {-DHUN| Caculates a configuration information for the run of program from the options given on the command line. It takes the pathname of the current working directory as first input parameter. It takes the (getopt parsed) command line as second input parameter.It returns an Either Monad. In case the command line made sence the Right type containg the configuation is returned otherwise the Left values with a Error Values desribing the problem is returned DHUN-} |
310 | ||
304 | ||
311 | 305 | checkOpts :: FilePath -> [Flag] -> Either MyError FullConfig |
312 | 306 | checkOpts cwd o |
313 | 307 | = do serverVal <- atMostOne serverPredicate serverOption o |
314 | 308 | featuredVal <- atMostOne featuredPredicate featuredOption o |
315 | 309 | case featuredVal of |
316 | Just x -> case (reads ("("++ (replace2 x ":" ",")++")"))::[((Integer,Integer),String)] of | |
317 | [((s,e), _)] | (s<=e) -> return | |
318 | FullConfig{ImperativeState.headers = Nothing, | |
319 | resolution = 300, outputFilename = "", | |
320 | inputUrl = "", runMode = ImperativeState.HTML, | |
321 | paper = "A4", vector = False, | |
322 | ImperativeState.copy = Nothing, mainPath = "", | |
323 | server = Nothing, selfTest = Just (s,e), outputType=PlainPDF, compile=Nothing, imgctrb=Nothing} | |
324 | ||
310 | Just x -> case | |
311 | (reads ("(" ++ (replace2 x ":" ",") ++ ")")) :: | |
312 | [((Integer, Integer), String)] | |
313 | of | |
314 | [((s, e), _)] | (s <= e) -> | |
315 | return | |
316 | FullConfig{ImperativeState.headers = Nothing, | |
317 | resolution = 300, outputFilename = "", | |
318 | inputUrl = "", runMode = ImperativeState.HTML, | |
319 | paper = "A4", vector = False, | |
320 | ImperativeState.copy = Nothing, mainPath = "", | |
321 | server = Nothing, selfTest = Just (s, e), | |
322 | outputType = PlainPDF, compile = Nothing, | |
323 | imgctrb = Nothing} | |
325 | 324 | _ -> Left (NotIntegerPairError featuredOption) |
326 | _ -> case serverVal of | |
327 | Just x -> case reads x of | |
328 | [(z, _)] -> return | |
329 | FullConfig{ImperativeState.headers = Nothing, | |
330 | resolution = 300, outputFilename = "", | |
331 | inputUrl = "", runMode = ImperativeState.HTML, | |
332 | paper = "A4", vector = False, | |
333 | ImperativeState.copy = Nothing, mainPath = "", | |
334 | server = Just z, outputType=PlainPDF, selfTest=Nothing, compile=Nothing, imgctrb= Nothing} | |
335 | _ -> Left (NotIntegerError serverOption) | |
336 | _ -> do hexVal <- atMostOne hexPredicate hexen o | |
337 | case hexVal of | |
338 | Just x -> do return ((read . unhex) x) | |
339 | _ -> do resolutionOpt <- atMostOne resolutionPredicate | |
340 | resolutionOption | |
341 | o | |
342 | resolutionVal <- case resolutionOpt of | |
343 | (Just x) -> case reads x of | |
344 | [(z, _)] -> Right z | |
345 | _ -> Left | |
346 | (NotIntegerError | |
347 | resolutionOption) | |
348 | _ -> Right defaultResolution | |
349 | outputVal <- exactlyOne outputPredicate output o | |
350 | inputVal <- exactlyOne inputPredicate url o | |
351 | templatesVal <- atMostOne templatesPredicate templates o | |
352 | headersVal <- atMostOne headersPredicate templates o | |
353 | copyVal <- atMostOne copyPredicate copyOption o | |
354 | paperOpt <- atMostOne paperPredicate paperOption o | |
355 | paperVal <- case paperOpt of | |
356 | Just x -> if | |
357 | x `elem` | |
358 | ["A4", "A5", "B5", "letter", "legal", | |
359 | "executive"] | |
360 | then Right x else Left PaperError | |
361 | _ -> Right defaultPaper | |
362 | let mediaWikiVal = (MediaWiki `elem` o) | |
363 | let bookModeVal = (BookMode `elem` o) | |
364 | let htmlVal = (Main.HTML `elem` o) | |
365 | let zipVal = (Main.Zip `elem` o) | |
366 | let epubVal = (Main.EPub `elem` o) | |
367 | let odtVal = (Main.Odt `elem` o) | |
368 | let temVal = (Main.InternalTemplates `elem` o) | |
369 | let vectorVal = (Main.Vector `elem` o) | |
370 | let mysum | |
371 | = (boolToInt temVal) + (boolToInt mediaWikiVal) + | |
372 | (boolToInt htmlVal) | |
373 | + (maybeToInt templatesVal) + (boolToInt bookModeVal) | |
374 | if mysum > 1 then Left ToManyOptionsError else Right () | |
375 | if ((boolToInt zipVal) + (boolToInt epubVal) + (boolToInt odtVal)) > (1 :: Integer) then Left ToManyOutputOptionsError else Right () | |
376 | runModeVal <- if mysum == (1 :: Integer) then | |
377 | case templatesVal of | |
378 | Just xx -> Right (UserTemplateFile xx) | |
379 | _ -> if mediaWikiVal then Right ExpandedTemplates | |
380 | else | |
381 | if htmlVal then Right ImperativeState.HTML | |
382 | else if bookModeVal then Right ImperativeState.Book | |
383 | else Right (StandardTemplates) | |
384 | else Right ImperativeState.HTML | |
385 | return | |
386 | (FullConfig{ImperativeState.headers = | |
387 | headersVal >>= (return . (cwd </>)), | |
388 | resolution = resolutionVal, selfTest=Nothing,outputFilename = outputVal, | |
389 | inputUrl = inputVal, runMode = runModeVal, | |
390 | paper = paperVal, vector = vectorVal, | |
391 | copy = copyVal >>= (return . (cwd </>)), | |
392 | mainPath = cwd ++ (if os == "linux" then "" else "\\"), | |
393 | server = Nothing, outputType=if zipVal then ZipArchive else if epubVal then EPubFile else if odtVal then OdtFile else PlainPDF,compile=Nothing,imgctrb=Nothing}) | |
394 | ||
325 | _ -> case serverVal of | |
326 | Just x -> case reads x of | |
327 | [(z, _)] -> return | |
328 | FullConfig{ImperativeState.headers = Nothing, | |
329 | resolution = 300, outputFilename = "", | |
330 | inputUrl = "", | |
331 | runMode = ImperativeState.HTML, | |
332 | paper = "A4", vector = False, | |
333 | ImperativeState.copy = Nothing, | |
334 | mainPath = "", server = Just z, | |
335 | outputType = PlainPDF, | |
336 | selfTest = Nothing, compile = Nothing, | |
337 | imgctrb = Nothing} | |
338 | _ -> Left (NotIntegerError serverOption) | |
339 | _ -> do hexVal <- atMostOne hexPredicate hexen o | |
340 | case hexVal of | |
341 | Just x -> do return ((read . unhex) x) | |
342 | _ -> do resolutionOpt <- atMostOne resolutionPredicate | |
343 | resolutionOption | |
344 | o | |
345 | resolutionVal <- case resolutionOpt of | |
346 | (Just x) -> case reads x of | |
347 | [(z, _)] -> Right z | |
348 | _ -> Left | |
349 | (NotIntegerError | |
350 | resolutionOption) | |
351 | _ -> Right defaultResolution | |
352 | outputVal <- exactlyOne outputPredicate output o | |
353 | inputVal <- exactlyOne inputPredicate url o | |
354 | templatesVal <- atMostOne templatesPredicate templates o | |
355 | headersVal <- atMostOne headersPredicate templates o | |
356 | copyVal <- atMostOne copyPredicate copyOption o | |
357 | paperOpt <- atMostOne paperPredicate paperOption o | |
358 | paperVal <- case paperOpt of | |
359 | Just x -> if | |
360 | x `elem` | |
361 | ["A4", "A5", "B5", "letter", | |
362 | "legal", "executive"] | |
363 | then Right x else | |
364 | Left PaperError | |
365 | _ -> Right defaultPaper | |
366 | let mediaWikiVal = (MediaWiki `elem` o) | |
367 | let bookModeVal = (BookMode `elem` o) | |
368 | let htmlVal = (Main.HTML `elem` o) | |
369 | let zipVal = (Main.Zip `elem` o) | |
370 | let epubVal = (Main.EPub `elem` o) | |
371 | let odtVal = (Main.Odt `elem` o) | |
372 | let temVal = (Main.InternalTemplates `elem` o) | |
373 | let vectorVal = (Main.Vector `elem` o) | |
374 | let mysum | |
375 | = (boolToInt temVal) + (boolToInt mediaWikiVal) + | |
376 | (boolToInt htmlVal) | |
377 | + (maybeToInt templatesVal) | |
378 | + (boolToInt bookModeVal) | |
379 | if mysum > 1 then Left ToManyOptionsError else Right () | |
380 | if | |
381 | ((boolToInt zipVal) + (boolToInt epubVal) + | |
382 | (boolToInt odtVal)) | |
383 | > (1 :: Integer) | |
384 | then Left ToManyOutputOptionsError else Right () | |
385 | runModeVal <- if mysum == (1 :: Integer) then | |
386 | case templatesVal of | |
387 | Just xx -> Right (UserTemplateFile xx) | |
388 | _ -> if mediaWikiVal then | |
389 | Right ExpandedTemplates else | |
390 | if htmlVal then | |
391 | Right ImperativeState.HTML else | |
392 | if bookModeVal then | |
393 | Right ImperativeState.Book | |
394 | else | |
395 | Right (StandardTemplates) | |
396 | else Right ImperativeState.HTML | |
397 | return | |
398 | (FullConfig{ImperativeState.headers = | |
399 | headersVal >>= (return . (cwd </>)), | |
400 | resolution = resolutionVal, | |
401 | selfTest = Nothing, | |
402 | outputFilename = outputVal, | |
403 | inputUrl = inputVal, runMode = runModeVal, | |
404 | paper = paperVal, vector = vectorVal, | |
405 | copy = copyVal >>= (return . (cwd </>)), | |
406 | mainPath = | |
407 | cwd ++ (if os == "linux" then "" else "\\"), | |
408 | server = Nothing, | |
409 | outputType = | |
410 | if zipVal then ZipArchive else | |
411 | if epubVal then EPubFile else | |
412 | if odtVal then OdtFile else PlainPDF, | |
413 | compile = Nothing, imgctrb = Nothing}) | |
414 | ||
395 | 415 | {-DHUN| main entry point of mediawiki2latex DHUN-} |
396 | ||
416 | ||
397 | 417 | main :: IO () |
398 | 418 | main |
399 | 419 | = do a <- getArgs |
403 | 423 | cwd <- getCurrentDirectory |
404 | 424 | case (checkOpts cwd (fst o)) of |
405 | 425 | Right x -> case (compile x) of |
406 | Just dir -> do _ <- (runStateT (runExceptT (runCompile dir)) stz) | |
407 | return () | |
408 | _-> case (imgctrb x) of | |
409 | Just dir -> do _ <- (runStateT (runExceptT (runCtrb dir)) stz) | |
410 | return () | |
411 | _-> case (server x) of | |
412 | Just zz -> serve zz | |
413 | _ -> do print x | |
414 | (xx, _) <- (runStateT (runExceptT (All.all x)) | |
415 | stz) | |
416 | case xx of | |
417 | Left n -> print n | |
418 | _ -> return () | |
426 | Just dir -> do _ <- (runStateT (runExceptT (runCompile dir)) stz) | |
427 | return () | |
428 | _ -> case (imgctrb x) of | |
429 | Just dir -> do _ <- (runStateT (runExceptT (runCtrb dir)) stz) | |
430 | return () | |
431 | _ -> case (server x) of | |
432 | Just zz -> serve zz | |
433 | _ -> do print x | |
434 | (xx, _) <- (runStateT (runExceptT (All.all x)) | |
435 | stz) | |
436 | case xx of | |
437 | Left n -> print n | |
438 | _ -> return () | |
419 | 439 | Left y -> print y |
420 | 440 | return () |
3 | 3 | import Language.Haskell.Exts.Pretty |
4 | 4 | import Data.Maybe |
5 | 5 | import System.Environment |
6 | ||
6 | ||
7 | 7 | {-DHUN| save function to access an list by index. If an element at that index exists the element is returned wrapped in a Just of the maybe monad. Otherwise the value Nothing of the Maybe monad is returned DHUN-} |
8 | 8 | ind l n = if length l > n then Just $ l !! n else Nothing |
9 | ||
9 | ||
10 | 10 | {-DHUN| Takes a String as an single input parameter. It has to be the source code of module written in Haskell. It return a parse tree of that module DHUN-} |
11 | ||
12 | parseModuleFromFile :: String -> Module | |
13 | 11 | parseModuleFromFile inp = fromParseResult $ parseFileContents inp |
14 | ||
12 | ||
15 | 13 | {-DHUN| the main function, see documentation at the head of this module DHUN-} |
16 | ||
14 | ||
17 | 15 | main :: IO () |
18 | 16 | main |
19 | 17 | = do args <- getArgs |