Codebase list coq / upstream/8.4_beta+dfsg lib / xml_parser.ml
upstream/8.4_beta+dfsg

Tree @upstream/8.4_beta+dfsg (Download .tar.gz)

xml_parser.ml @upstream/8.4_beta+dfsgraw · history · blame

(*
 * Xml Light, an small Xml parser/printer with DTD support.
 * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
 * Copyright (C) 2003 Jacques Garrigue
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)

open Printf

type xml = 
        | Element of (string * (string * string) list * xml list)
        | PCData of string

type error_pos = {
        eline : int;
        eline_start : int;
        emin : int;
        emax : int;
}

type error_msg =
        | UnterminatedComment
        | UnterminatedString
        | UnterminatedEntity
        | IdentExpected
        | CloseExpected
        | NodeExpected
        | AttributeNameExpected
        | AttributeValueExpected
        | EndOfTagExpected of string
        | EOFExpected

type error = error_msg * error_pos

exception Error of error

exception File_not_found of string

type t = {
	mutable check_eof : bool;
	mutable concat_pcdata : bool;
}

type source = 
	| SFile of string
	| SChannel of in_channel
	| SString of string
	| SLexbuf of Lexing.lexbuf

type state = {
	source : Lexing.lexbuf;
	stack : Xml_lexer.token Stack.t;
	xparser : t;
}

exception Internal_error of error_msg
exception NoMoreData

let xml_error = ref (fun _ -> assert false)
let file_not_found = ref (fun _ -> assert false)

let is_blank s =
  let len = String.length s in
  let break = ref true in
  let i = ref 0 in
  while !break && !i < len do
    let c = s.[!i] in
    (* no '\r' because we replaced them in the lexer *)
    if c = ' ' || c = '\n' || c = '\t' then incr i
    else break := false
  done;
  !i = len

let _raises e f =
	xml_error := e;
	file_not_found := f

let make () =
	{
		check_eof = true;
		concat_pcdata = true;
	}

let check_eof p v = p.check_eof <- v
let concat_pcdata p v = p.concat_pcdata <- v

let pop s =
	try
		Stack.pop s.stack
	with
		Stack.Empty ->
			Xml_lexer.token s.source

let push t s =
	Stack.push t s.stack

let canonicalize l =
  let has_elt = List.exists (function Element _ -> true | _ -> false) l in
  if has_elt then List.filter (function PCData s -> not (is_blank s) | _ -> true) l
  else l

let rec read_node s =
	match pop s with
	| Xml_lexer.PCData s -> PCData s
	| Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
	| Xml_lexer.Tag (tag, attr, false) ->
          let elements = read_elems ~tag s in
          Element (tag, attr, canonicalize elements)
	| t ->
		push t s;
		raise NoMoreData
and
	read_elems ?tag s =
		let elems = ref [] in
		(try
                  while true do
                    let node = read_node s in
                    match node, !elems with
                    | PCData c , (PCData c2) :: q ->
                      elems := PCData (c2 ^ c) :: q
                    | _, l ->
                      elems := node :: l
                  done
		with
			NoMoreData -> ());
		match pop s with
		| Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
		| Xml_lexer.Eof when tag = None -> List.rev !elems
		| t ->
			match tag with
			| None -> raise (Internal_error EOFExpected)
			| Some s -> raise (Internal_error (EndOfTagExpected s))

let rec read_xml s =
  let node = read_node s in
  match node with
  | Element _ -> node
  | PCData c ->
    if is_blank c then read_xml s
    else raise (Xml_lexer.Error Xml_lexer.ENodeExpected)

let convert = function
	| Xml_lexer.EUnterminatedComment -> UnterminatedComment
	| Xml_lexer.EUnterminatedString -> UnterminatedString
	| Xml_lexer.EIdentExpected -> IdentExpected
	| Xml_lexer.ECloseExpected -> CloseExpected
	| Xml_lexer.ENodeExpected -> NodeExpected
	| Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
	| Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
	| Xml_lexer.EUnterminatedEntity -> 	UnterminatedEntity

let do_parse xparser source =
	try
		Xml_lexer.init source;
		let s = { source = source; xparser = xparser; stack = Stack.create(); } in
		let x = read_xml s in
		if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
		Xml_lexer.close source;
		x
	with
		| NoMoreData ->
			Xml_lexer.close source;
			raise (!xml_error NodeExpected source)
		| Internal_error e ->
			Xml_lexer.close source;
			raise (!xml_error e source)
		| Xml_lexer.Error e ->
			Xml_lexer.close source;
			raise (!xml_error (convert e) source)

let parse p = function
	| SChannel ch -> do_parse p (Lexing.from_channel ch)
	| SString str -> do_parse p (Lexing.from_string str)
	| SLexbuf lex -> do_parse p lex
	| SFile fname ->
		let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
		try
			let x = do_parse p (Lexing.from_channel ch) in
			close_in ch;
			x
		with
			e ->
				close_in ch;
				raise e


let error_msg = function
        | UnterminatedComment -> "Unterminated comment"
        | UnterminatedString -> "Unterminated string"
        | UnterminatedEntity -> "Unterminated entity"
        | IdentExpected -> "Ident expected"
        | CloseExpected -> "Element close expected"
        | NodeExpected -> "Xml node expected"
        | AttributeNameExpected -> "Attribute name expected"
        | AttributeValueExpected -> "Attribute value expected"
        | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
        | EOFExpected -> "End of file expected"

let error (msg,pos) =
        if pos.emin = pos.emax then
                sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
        else
                sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
        
let line e = e.eline

let range e = 
        e.emin - e.eline_start , e.emax - e.eline_start

let abs_range e =
        e.emin , e.emax

let pos source =
        let line, lstart, min, max = Xml_lexer.pos source in
        {
                eline = line;
                eline_start = lstart;
                emin = min;
                emax = max;
        }

let () = _raises (fun x p -> 
        (* local cast : Xml.error_msg -> error_msg *)
        Error (x, pos p))
        (fun f -> File_not_found f)