This commit is contained in:
		
							
								
								
									
										280
									
								
								src/glxml.gleam
									
									
									
									
									
								
							
							
						
						
									
										280
									
								
								src/glxml.gleam
									
									
									
									
									
								
							@@ -7,7 +7,12 @@ import gleam/result
 | 
				
			|||||||
import gleam/string
 | 
					import gleam/string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub type Declaration {
 | 
					pub type Declaration {
 | 
				
			||||||
  Declaration(versioninfo: String, encoding: String, standalone: Bool)
 | 
					  XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
 | 
				
			||||||
 | 
					  GEntityDecl
 | 
				
			||||||
 | 
					  PEntityDecl(name: String, decl: String)
 | 
				
			||||||
 | 
					  ElementDecl
 | 
				
			||||||
 | 
					  AttListDecl
 | 
				
			||||||
 | 
					  NotationDecl
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub type Entity {
 | 
					pub type Entity {
 | 
				
			||||||
@@ -16,8 +21,17 @@ pub type Entity {
 | 
				
			|||||||
  PublicExternalEntity(literal: String, pubidliteral: String)
 | 
					  PublicExternalEntity(literal: String, pubidliteral: String)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub type ExternalID {
 | 
				
			||||||
 | 
					  SystemID(system_literal: String)
 | 
				
			||||||
 | 
					  PublicID(system_literal: String, public_literal: String)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub type DocType {
 | 
					pub type DocType {
 | 
				
			||||||
  DocType(name: String, entities: dict.Dict(String, Entity))
 | 
					  DocType(
 | 
				
			||||||
 | 
					    name: String,
 | 
				
			||||||
 | 
					    external_id: Option(ExternalID),
 | 
				
			||||||
 | 
					    entities: dict.Dict(String, Entity),
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub type Document {
 | 
					pub type Document {
 | 
				
			||||||
@@ -387,7 +401,7 @@ fn process_reference(
 | 
				
			|||||||
  doctype: Option(DocType),
 | 
					  doctype: Option(DocType),
 | 
				
			||||||
) -> Result(String, Nil) {
 | 
					) -> Result(String, Nil) {
 | 
				
			||||||
  case doctype {
 | 
					  case doctype {
 | 
				
			||||||
    Some(DocType(_, entities)) -> {
 | 
					    Some(DocType(_, _, entities)) -> {
 | 
				
			||||||
      get_reference(entities, ref)
 | 
					      get_reference(entities, ref)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    None -> {
 | 
					    None -> {
 | 
				
			||||||
@@ -431,13 +445,249 @@ fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
 | 
				
			|||||||
fn parse_prolog(
 | 
					fn parse_prolog(
 | 
				
			||||||
  doc: String,
 | 
					  doc: String,
 | 
				
			||||||
) -> Result(#(Declaration, Option(DocType), String), Nil) {
 | 
					) -> Result(#(Declaration, Option(DocType), String), Nil) {
 | 
				
			||||||
  let #(decl, doc) = case parse_decl(doc) {
 | 
					  let #(decl, doc) =
 | 
				
			||||||
    Ok(#(decl, doc)) -> #(decl, doc)
 | 
					    parse_decl(doc) |> result.unwrap(#(XMLDecl("1.0", "UTF-8", False), doc))
 | 
				
			||||||
    _ -> #(Declaration("1.0", "UTF-8", False), doc)
 | 
					
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  let doc = parse_misc(doc)
 | 
					  let doc = parse_misc(doc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Ok(#(decl, None, doc))
 | 
					  let #(doctype, doc) =
 | 
				
			||||||
 | 
					    parse_doctype(doc)
 | 
				
			||||||
 | 
					    |> result.map(fn(d) { #(Some(d.0), d.1) })
 | 
				
			||||||
 | 
					    |> result.unwrap(#(None, doc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Ok(#(decl, doctype, doc))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
 | 
				
			||||||
 | 
					  case doc {
 | 
				
			||||||
 | 
					    "<!DOCTYPE" <> tail -> {
 | 
				
			||||||
 | 
					      let doc = trim_space(tail)
 | 
				
			||||||
 | 
					      use #(name, doc) <- result.try(parse_name(doc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      let #(external_id, doc) =
 | 
				
			||||||
 | 
					        parse_external_id(doc) |> result.unwrap(#(None, doc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      let doc = trim_space(doc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      let #(int_subset, doc) =
 | 
				
			||||||
 | 
					        parse_int_subset(doc) |> result.unwrap(#([], doc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      case doc {
 | 
				
			||||||
 | 
					        ">" <> tail -> Ok(#(DocType(name, external_id, dict.new()), tail))
 | 
				
			||||||
 | 
					        _ -> Error(Nil)
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _ -> Error(Nil)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_int_subset(doc: String) -> Result(#(List(Declaration), String), Nil) {
 | 
				
			||||||
 | 
					  let doc = trim_space(doc)
 | 
				
			||||||
 | 
					  case doc {
 | 
				
			||||||
 | 
					    "[" <> tail -> {
 | 
				
			||||||
 | 
					      use #(decl_list, doc) <- result.try(do_parse_int_subset(tail, []))
 | 
				
			||||||
 | 
					      case doc {
 | 
				
			||||||
 | 
					        "]" <> tail -> {
 | 
				
			||||||
 | 
					          Ok(#(decl_list, tail))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ -> Error(Nil)
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _ -> Error(Nil)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn do_parse_int_subset(
 | 
				
			||||||
 | 
					  doc: String,
 | 
				
			||||||
 | 
					  decl_list: List(Declaration),
 | 
				
			||||||
 | 
					) -> Result(#(List(Declaration), String), Nil) {
 | 
				
			||||||
 | 
					  let doc = trim_space(doc)
 | 
				
			||||||
 | 
					  case doc {
 | 
				
			||||||
 | 
					    "%" <> tail -> {
 | 
				
			||||||
 | 
					      use #(name, doc) <- result.try(parse_name(tail))
 | 
				
			||||||
 | 
					      case doc {
 | 
				
			||||||
 | 
					        ";" <> tail -> {
 | 
				
			||||||
 | 
					          case get_entity_replacement(name, decl_list) {
 | 
				
			||||||
 | 
					            Some(decl) -> {
 | 
				
			||||||
 | 
					              do_parse_int_subset(decl <> tail, decl_list)
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            _ -> Error(Nil)
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        _ -> Error(Nil)
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _ -> {
 | 
				
			||||||
 | 
					      todo
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn get_entity_replacement(
 | 
				
			||||||
 | 
					  entity: String,
 | 
				
			||||||
 | 
					  decl_list: List(Declaration),
 | 
				
			||||||
 | 
					) -> Option(String) {
 | 
				
			||||||
 | 
					  list.find_map(decl_list, fn(decl) {
 | 
				
			||||||
 | 
					    case decl {
 | 
				
			||||||
 | 
					      PEntityDecl(name, decl) if name == entity -> Ok(Some(decl))
 | 
				
			||||||
 | 
					      _ -> Error(Nil)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  })
 | 
				
			||||||
 | 
					  |> result.unwrap(None)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
 | 
				
			||||||
 | 
					  let doc = trim_space(doc)
 | 
				
			||||||
 | 
					  case doc {
 | 
				
			||||||
 | 
					    "SYSTEM" <> tail -> {
 | 
				
			||||||
 | 
					      let doc = trim_space(tail)
 | 
				
			||||||
 | 
					      use #(system_literal, doc) <- result.try(parse_system_literal(
 | 
				
			||||||
 | 
					        doc,
 | 
				
			||||||
 | 
					        None,
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
 | 
					      ))
 | 
				
			||||||
 | 
					      Ok(#(Some(SystemID(system_literal:)), doc))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    "PUBLIC" <> tail -> {
 | 
				
			||||||
 | 
					      let doc = trim_space(tail)
 | 
				
			||||||
 | 
					      use #(public_literal, doc) <- result.try(parse_public_literal(
 | 
				
			||||||
 | 
					        doc,
 | 
				
			||||||
 | 
					        None,
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
 | 
					      ))
 | 
				
			||||||
 | 
					      let doc = trim_space(doc)
 | 
				
			||||||
 | 
					      use #(system_literal, doc) <- result.try(parse_system_literal(
 | 
				
			||||||
 | 
					        doc,
 | 
				
			||||||
 | 
					        None,
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
 | 
					      ))
 | 
				
			||||||
 | 
					      Ok(#(Some(PublicID(system_literal:, public_literal:)), doc))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _ -> Error(Nil)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_public_literal(
 | 
				
			||||||
 | 
					  doc: String,
 | 
				
			||||||
 | 
					  quote: Option(String),
 | 
				
			||||||
 | 
					  literal: String,
 | 
				
			||||||
 | 
					) -> Result(#(String, String), Nil) {
 | 
				
			||||||
 | 
					  case doc, quote {
 | 
				
			||||||
 | 
					    "\"" as q <> tail, None | "'" as q <> tail, None ->
 | 
				
			||||||
 | 
					      parse_public_literal(tail, Some(q), "")
 | 
				
			||||||
 | 
					    "", _ -> Error(Nil)
 | 
				
			||||||
 | 
					    _, None -> Error(Nil)
 | 
				
			||||||
 | 
					    "\"" <> tail, Some("\"") -> Ok(#(literal, tail))
 | 
				
			||||||
 | 
					    "'" <> tail, Some("'") -> Ok(#(literal, tail))
 | 
				
			||||||
 | 
					    " " as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "\r" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "\n" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "0" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "1" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "2" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "3" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "4" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "5" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "6" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "7" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "8" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "9" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "a" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "b" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "c" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "d" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "e" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "f" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "g" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "h" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "i" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "j" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "k" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "l" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "m" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "n" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "o" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "p" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "q" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "r" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "s" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "t" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "u" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "v" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "w" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "x" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "y" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "z" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "A" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "B" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "C" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "D" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "E" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "F" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "G" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "H" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "I" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "J" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "K" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "L" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "M" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "N" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "O" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "P" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "Q" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "R" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "S" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "T" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "U" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "V" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "W" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "X" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "Y" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "Z" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "-" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "(" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | ")" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "+" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "," as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "." as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "/" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | ":" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "=" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "?" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | ";" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "!" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "*" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "#" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "@" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "$" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "_" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "%" as char <> tail, Some(_)
 | 
				
			||||||
 | 
					    | "'" as char <> tail, Some("\"")
 | 
				
			||||||
 | 
					    -> {
 | 
				
			||||||
 | 
					      parse_public_literal(tail, quote, literal <> char)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _, _ -> Error(Nil)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_system_literal(
 | 
				
			||||||
 | 
					  doc: String,
 | 
				
			||||||
 | 
					  quote: Option(String),
 | 
				
			||||||
 | 
					  literal: String,
 | 
				
			||||||
 | 
					) -> Result(#(String, String), Nil) {
 | 
				
			||||||
 | 
					  case doc, quote {
 | 
				
			||||||
 | 
					    "\"" as q <> tail, None | "'" as q <> tail, None ->
 | 
				
			||||||
 | 
					      parse_system_literal(tail, Some(q), "")
 | 
				
			||||||
 | 
					    "", _ -> Error(Nil)
 | 
				
			||||||
 | 
					    _, None -> Error(Nil)
 | 
				
			||||||
 | 
					    "\"" <> tail, Some("\"") -> Ok(#(literal, tail))
 | 
				
			||||||
 | 
					    "'" <> tail, Some("'") -> Ok(#(literal, tail))
 | 
				
			||||||
 | 
					    _, _ -> {
 | 
				
			||||||
 | 
					      let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      parse_system_literal(tail, quote, literal <> char)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn parse_misc(doc: String) -> String {
 | 
					fn parse_misc(doc: String) -> String {
 | 
				
			||||||
@@ -463,18 +713,14 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
 | 
				
			|||||||
  case doc {
 | 
					  case doc {
 | 
				
			||||||
    "<?xml" <> tail -> {
 | 
					    "<?xml" <> tail -> {
 | 
				
			||||||
      use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
 | 
					      use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
 | 
				
			||||||
      let #(encoding, doc) = case parse_encodingdecl(doc) {
 | 
					      let #(encoding, doc) =
 | 
				
			||||||
        Ok(e) -> e
 | 
					        parse_encodingdecl(doc) |> result.unwrap(#("", doc))
 | 
				
			||||||
        Error(_) -> #("", doc)
 | 
					      let #(standalone, doc) =
 | 
				
			||||||
      }
 | 
					        parse_standalone(doc) |> result.unwrap(#(False, doc))
 | 
				
			||||||
      let #(standalone, doc) = case parse_standalone(doc) {
 | 
					 | 
				
			||||||
        Ok(e) -> e
 | 
					 | 
				
			||||||
        Error(_) -> #(False, doc)
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      case trim_space(doc) {
 | 
					      case trim_space(doc) {
 | 
				
			||||||
        "?>" <> tail ->
 | 
					        "?>" <> tail ->
 | 
				
			||||||
          Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
 | 
					          Ok(#(XMLDecl(versioninfo:, encoding:, standalone:), tail))
 | 
				
			||||||
        _ -> Error(Nil)
 | 
					        _ -> Error(Nil)
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user