858 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Gleam
		
	
	
	
	
	
			
		
		
	
	
			858 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			Gleam
		
	
	
	
	
	
import gleam/bool
 | 
						|
import gleam/dict
 | 
						|
import gleam/int
 | 
						|
import gleam/list
 | 
						|
import gleam/option.{type Option, None, Some}
 | 
						|
import gleam/result
 | 
						|
import gleam/string
 | 
						|
 | 
						|
pub type Declaration {
 | 
						|
  Declaration(versioninfo: String, encoding: String, standalone: Bool)
 | 
						|
}
 | 
						|
 | 
						|
pub type Entity {
 | 
						|
  InternalEntity(value: String)
 | 
						|
  SystemExternalEntity(literal: String)
 | 
						|
  PublicExternalEntity(literal: String, pubidliteral: String)
 | 
						|
}
 | 
						|
 | 
						|
pub type DocType {
 | 
						|
  DocType(name: String, entities: dict.Dict(String, Entity))
 | 
						|
}
 | 
						|
 | 
						|
pub type Document {
 | 
						|
  Document(
 | 
						|
    decl: Declaration,
 | 
						|
    doctype: Option(DocType),
 | 
						|
    element: Option(Element),
 | 
						|
  )
 | 
						|
}
 | 
						|
 | 
						|
pub type Attribute {
 | 
						|
  Attribute(name: String, value: String)
 | 
						|
}
 | 
						|
 | 
						|
pub type Element {
 | 
						|
  EmptyElem(name: String, attrs: List(Attribute))
 | 
						|
  Element(name: String, attrs: List(Attribute), elements: List(Element))
 | 
						|
  Text(content: String)
 | 
						|
  Comment(content: String)
 | 
						|
  CData(content: String)
 | 
						|
  PI(name: String, content: String)
 | 
						|
  Whitespace
 | 
						|
}
 | 
						|
 | 
						|
pub fn main() {
 | 
						|
  parse_document(
 | 
						|
    "<?xml version=\"1.1\" encoding='UTF-8'?>\r\n   <!-- hello-world -->   \n<b><a attr='ha &#38;ha' battr='baba' ref='&'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
 | 
						|
  )
 | 
						|
  |> echo
 | 
						|
}
 | 
						|
 | 
						|
pub fn default_entities() -> dict.Dict(String, Entity) {
 | 
						|
  dict.from_list([
 | 
						|
    #("lt", InternalEntity("<")),
 | 
						|
    #("gt", InternalEntity(">")),
 | 
						|
    #("amp", InternalEntity("&")),
 | 
						|
    #("apos", InternalEntity("'")),
 | 
						|
    #("quot", InternalEntity(""")),
 | 
						|
  ])
 | 
						|
}
 | 
						|
 | 
						|
fn parse_document(doc: String) -> Result(Document, Nil) {
 | 
						|
  use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
 | 
						|
  use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
 | 
						|
  use #(element, doc) <- result.try(parse_element(doc, doctype))
 | 
						|
  let doc = parse_misc(doc)
 | 
						|
 | 
						|
  case doc {
 | 
						|
    "" -> Ok(Document(decl, doctype, Some(element)))
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_element(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(#(Element, String), Nil) {
 | 
						|
  try_parsers(
 | 
						|
    [parse_empty_elem(_, doctype), parse_tagged_elem(_, doctype)],
 | 
						|
    doc,
 | 
						|
  )
 | 
						|
}
 | 
						|
 | 
						|
fn parse_empty_elem(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(#(Element, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<" <> tail -> {
 | 
						|
      use #(name, doc) <- result.try(parse_name(tail))
 | 
						|
      use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
 | 
						|
      let doc = trim_space(doc)
 | 
						|
      case doc {
 | 
						|
        "/>" <> tail -> Ok(#(EmptyElem(name, attrs), tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_tagged_elem(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(#(Element, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<" <> tail -> {
 | 
						|
      use #(name, doc) <- result.try(parse_name(tail))
 | 
						|
      use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
 | 
						|
      let doc = trim_space(doc)
 | 
						|
      case doc {
 | 
						|
        ">" <> tail -> {
 | 
						|
          use #(content, doc) <- result.try(parse_content(tail, doctype, []))
 | 
						|
 | 
						|
          case doc {
 | 
						|
            "</" <> tail -> {
 | 
						|
              use #(close_name, doc) <- result.try(parse_name(tail))
 | 
						|
              let doc = trim_space(doc)
 | 
						|
              case doc {
 | 
						|
                ">" <> tail -> {
 | 
						|
                  use <- bool.guard(
 | 
						|
                    when: name != close_name,
 | 
						|
                    return: Error(Nil),
 | 
						|
                  )
 | 
						|
 | 
						|
                  Ok(#(Element(name, attrs, content), tail))
 | 
						|
                }
 | 
						|
                _ -> Error(Nil)
 | 
						|
              }
 | 
						|
            }
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_content(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
  content: List(Element),
 | 
						|
) -> Result(#(List(Element), String), Nil) {
 | 
						|
  use #(chardata, doc) <- result.try(parse_chardata(doc, doctype, ""))
 | 
						|
  let new_content = case chardata {
 | 
						|
    "" -> content
 | 
						|
    _ -> [Text(chardata), ..content]
 | 
						|
  }
 | 
						|
 | 
						|
  case doc {
 | 
						|
    "<" <> _ -> {
 | 
						|
      case
 | 
						|
        try_parsers(
 | 
						|
          [
 | 
						|
            parse_element(_, doctype),
 | 
						|
            parse_comment,
 | 
						|
            parse_cdata,
 | 
						|
            parse_pi,
 | 
						|
          ],
 | 
						|
          doc,
 | 
						|
        )
 | 
						|
      {
 | 
						|
        Ok(#(element, doc)) -> parse_content(doc, doctype, [element, ..content])
 | 
						|
        Error(_) -> Ok(#(list.reverse(new_content), doc))
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Ok(#(list.reverse(new_content), doc))
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_pi(doc: String) -> Result(#(Element, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<?" <> tail -> {
 | 
						|
      use #(name, doc) <- result.try(parse_name(tail))
 | 
						|
      use <- bool.guard(
 | 
						|
        when: string.lowercase(name) == "xml",
 | 
						|
        return: Error(Nil),
 | 
						|
      )
 | 
						|
      let doc = trim_space(doc)
 | 
						|
      use #(content, doc) <- result.try(parse_pi_content(doc, ""))
 | 
						|
      case doc {
 | 
						|
        "?>" <> tail -> Ok(#(PI(name, content), tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_pi_content(doc: String, pi: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "?>" <> _ -> Ok(#(pi, doc))
 | 
						|
    "" -> Error(Nil)
 | 
						|
    _ -> {
 | 
						|
      case parse_char(doc) {
 | 
						|
        Ok(#(char, doc)) -> parse_pi_content(doc, pi <> char)
 | 
						|
        Error(_) -> Ok(#(pi, doc))
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_cdata(doc: String) -> Result(#(Element, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<![CDATA[" <> tail -> {
 | 
						|
      use #(cdata, doc) <- result.try(do_parse_cdata(tail, ""))
 | 
						|
 | 
						|
      Ok(#(CData(cdata), doc))
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn do_parse_cdata(doc: String, cdata: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "]]>" <> tail -> Ok(#(cdata, tail))
 | 
						|
    "" -> Error(Nil)
 | 
						|
    _ -> {
 | 
						|
      case parse_char(doc) {
 | 
						|
        Ok(#(char, doc)) -> do_parse_cdata(doc, cdata <> char)
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_chardata(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
  chardata: String,
 | 
						|
) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "]]>" <> _ -> Error(Nil)
 | 
						|
    "<" <> _ -> Ok(#(chardata, doc))
 | 
						|
    "&" <> _ -> {
 | 
						|
      use #(refval, doc) <- result.try(parse_reference(doc, doctype))
 | 
						|
 | 
						|
      parse_chardata(doc, doctype, chardata <> refval)
 | 
						|
    }
 | 
						|
 | 
						|
    "" -> Ok(#("", ""))
 | 
						|
    _ -> {
 | 
						|
      let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
 | 
						|
      parse_chardata(tail, doctype, chardata <> char)
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_attributes(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
  attrs: List(Attribute),
 | 
						|
) -> Result(#(List(Attribute), String), Nil) {
 | 
						|
  case parse_attribute(doc, doctype) {
 | 
						|
    Ok(#(attr, doc)) -> parse_attributes(doc, doctype, [attr, ..attrs])
 | 
						|
    Error(_) -> Ok(#(list.reverse(attrs), doc))
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_attribute(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(#(Attribute, String), Nil) {
 | 
						|
  let doc = trim_space(doc)
 | 
						|
  use #(name, doc) <- result.try(parse_name(doc))
 | 
						|
  case doc {
 | 
						|
    "=" <> tail -> {
 | 
						|
      case tail {
 | 
						|
        "\"" <> tail -> {
 | 
						|
          let #(value, doc) =
 | 
						|
            parse_multiple_optional(
 | 
						|
              tail,
 | 
						|
              try_parsers(
 | 
						|
                [
 | 
						|
                  fn(doc) {
 | 
						|
                    case string.pop_grapheme(doc) {
 | 
						|
                      Ok(#(char, _doc))
 | 
						|
                        if char == "<" || char == "&" || char == "\""
 | 
						|
                      -> Error(Nil)
 | 
						|
                      Ok(#(char, doc)) -> Ok(#(char, doc))
 | 
						|
                      Error(_) -> Error(Nil)
 | 
						|
                    }
 | 
						|
                  },
 | 
						|
                  parse_reference(_, doctype),
 | 
						|
                ],
 | 
						|
                _,
 | 
						|
              ),
 | 
						|
              "",
 | 
						|
            )
 | 
						|
          case doc {
 | 
						|
            "\"" <> tail -> Ok(#(Attribute(name, value), tail))
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        "'" <> tail -> {
 | 
						|
          let #(value, doc) =
 | 
						|
            parse_multiple_optional(
 | 
						|
              tail,
 | 
						|
              try_parsers(
 | 
						|
                [
 | 
						|
                  fn(doc) {
 | 
						|
                    case string.pop_grapheme(doc) {
 | 
						|
                      Ok(#(char, _doc))
 | 
						|
                        if char == "<" || char == "&" || char == "'"
 | 
						|
                      -> Error(Nil)
 | 
						|
                      Ok(#(char, doc)) -> Ok(#(char, doc))
 | 
						|
                      Error(_) -> Error(Nil)
 | 
						|
                    }
 | 
						|
                  },
 | 
						|
                  parse_reference(_, doctype),
 | 
						|
                ],
 | 
						|
                _,
 | 
						|
              ),
 | 
						|
              "",
 | 
						|
            )
 | 
						|
          case doc {
 | 
						|
            "'" <> tail -> Ok(#(Attribute(name, value), tail))
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_reference(
 | 
						|
  doc: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "&#" <> tail -> {
 | 
						|
      case tail {
 | 
						|
        "x" <> tail -> {
 | 
						|
          use #(digits, doc) <- result.try(parse_multiple(tail, parse_hex_digit))
 | 
						|
 | 
						|
          case doc {
 | 
						|
            ";" <> tail -> {
 | 
						|
              use value <- result.try(int.base_parse(digits, 16))
 | 
						|
              use codepoint <- result.try(string.utf_codepoint(value))
 | 
						|
              Ok(#(string.from_utf_codepoints([codepoint]), tail))
 | 
						|
            }
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        _ -> {
 | 
						|
          use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit))
 | 
						|
          case doc {
 | 
						|
            ";" <> tail -> {
 | 
						|
              use value <- result.try(int.base_parse(digits, 10))
 | 
						|
              use codepoint <- result.try(string.utf_codepoint(value))
 | 
						|
 | 
						|
              Ok(#(string.from_utf_codepoints([codepoint]), tail))
 | 
						|
            }
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
    "&" <> tail -> {
 | 
						|
      use #(name, doc) <- result.try(parse_name(tail))
 | 
						|
 | 
						|
      case doc {
 | 
						|
        ";" <> tail -> {
 | 
						|
          use value <- result.try(process_reference(name, doctype))
 | 
						|
          Ok(#("", value <> tail))
 | 
						|
        }
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn process_reference(
 | 
						|
  ref: String,
 | 
						|
  doctype: Option(DocType),
 | 
						|
) -> Result(String, Nil) {
 | 
						|
  case doctype {
 | 
						|
    Some(DocType(_, entities)) -> {
 | 
						|
      get_reference(entities, ref)
 | 
						|
    }
 | 
						|
    None -> {
 | 
						|
      get_reference(default_entities(), ref)
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn get_reference(
 | 
						|
  entities: dict.Dict(String, Entity),
 | 
						|
  ref: String,
 | 
						|
) -> Result(String, Nil) {
 | 
						|
  case dict.get(entities, ref) {
 | 
						|
    Ok(InternalEntity(val)) -> Ok(val)
 | 
						|
    Ok(PublicExternalEntity(_, _)) | Ok(SystemExternalEntity(_)) -> Error(Nil)
 | 
						|
    Error(_) -> {
 | 
						|
      case entities == default_entities() {
 | 
						|
        True -> Error(Nil)
 | 
						|
        False -> get_reference(default_entities(), ref)
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_name(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case parse_name_start_char(doc) {
 | 
						|
    Ok(#(char, tail)) -> {
 | 
						|
      do_parse_name(tail, char)
 | 
						|
    }
 | 
						|
    Error(_) -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
 | 
						|
  case parse_name_char(doc) {
 | 
						|
    Ok(#(char, tail)) -> do_parse_name(tail, name <> char)
 | 
						|
    Error(_) -> Ok(#(name, doc))
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_prolog(
 | 
						|
  doc: String,
 | 
						|
) -> Result(#(Declaration, Option(DocType), String), Nil) {
 | 
						|
  let #(decl, doc) = case parse_decl(doc) {
 | 
						|
    Ok(#(decl, doc)) -> #(decl, doc)
 | 
						|
    _ -> #(Declaration("1.0", "UTF-8", False), doc)
 | 
						|
  }
 | 
						|
  let doc = parse_misc(doc)
 | 
						|
 | 
						|
  Ok(#(decl, None, doc))
 | 
						|
}
 | 
						|
 | 
						|
fn parse_misc(doc: String) -> String {
 | 
						|
  case
 | 
						|
    try_parsers(
 | 
						|
      [
 | 
						|
        parse_comment,
 | 
						|
        fn(doc) {
 | 
						|
          parse_space(doc)
 | 
						|
          |> result.map(fn(sp) { #(Whitespace, sp.1) })
 | 
						|
        },
 | 
						|
        parse_pi,
 | 
						|
      ],
 | 
						|
      doc,
 | 
						|
    )
 | 
						|
  {
 | 
						|
    Ok(#(_element, doc)) -> parse_misc(doc)
 | 
						|
    Error(Nil) -> doc
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<?xml" <> tail -> {
 | 
						|
      use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
 | 
						|
      let #(encoding, doc) = case parse_encodingdecl(doc) {
 | 
						|
        Ok(e) -> e
 | 
						|
        Error(_) -> #("", doc)
 | 
						|
      }
 | 
						|
      let #(standalone, doc) = case parse_standalone(doc) {
 | 
						|
        Ok(e) -> e
 | 
						|
        Error(_) -> #(False, doc)
 | 
						|
      }
 | 
						|
 | 
						|
      case trim_space(doc) {
 | 
						|
        "?>" <> tail ->
 | 
						|
          Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case trim_space(doc) {
 | 
						|
    "version=" <> tail -> {
 | 
						|
      use #(version, doc) <- result.try(parse_version(tail))
 | 
						|
      Ok(#(version, doc))
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_version(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "\"1." <> tail -> {
 | 
						|
      use #(version, doc) <- result.try(do_parse_version(tail, "1."))
 | 
						|
      case doc {
 | 
						|
        "\"" <> tail -> Ok(#(version, tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    "'1." <> tail -> {
 | 
						|
      use #(version, doc) <- result.try(do_parse_version(tail, "1."))
 | 
						|
      case doc {
 | 
						|
        "'" <> tail -> Ok(#(version, tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn do_parse_version(
 | 
						|
  doc: String,
 | 
						|
  version: String,
 | 
						|
) -> Result(#(String, String), Nil) {
 | 
						|
  case parse_digit(doc) {
 | 
						|
    Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit)
 | 
						|
    Error(_) if version == "" -> Error(Nil)
 | 
						|
    Error(_) -> Ok(#(version, doc))
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case trim_space(doc) {
 | 
						|
    "encoding=" <> tail -> {
 | 
						|
      case tail {
 | 
						|
        "\"" <> tail -> {
 | 
						|
          use #(encoding, doc) <- result.try(parse_encoding(tail))
 | 
						|
          case doc {
 | 
						|
            "\"" <> tail -> Ok(#(encoding, tail))
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        "'" <> tail -> {
 | 
						|
          use #(encoding, doc) <- result.try(parse_encoding(tail))
 | 
						|
          case doc {
 | 
						|
            "'" <> tail -> Ok(#(encoding, tail))
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        }
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case parse_alpha(doc) {
 | 
						|
    Ok(#(char, doc)) -> {
 | 
						|
      Ok(parse_multiple_optional(
 | 
						|
        doc,
 | 
						|
        try_parsers(
 | 
						|
          [
 | 
						|
            parse_alpha,
 | 
						|
            parse_digit,
 | 
						|
            fn(doc) {
 | 
						|
              case doc {
 | 
						|
                "." as char <> tail | "_" as char <> tail | "-" as char <> tail ->
 | 
						|
                  Ok(#(char, tail))
 | 
						|
                _ -> Error(Nil)
 | 
						|
              }
 | 
						|
            },
 | 
						|
          ],
 | 
						|
          _,
 | 
						|
        ),
 | 
						|
        char,
 | 
						|
      ))
 | 
						|
    }
 | 
						|
    Error(_) -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
 | 
						|
  case trim_space(doc) {
 | 
						|
    "standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
 | 
						|
      Ok(#(True, tail))
 | 
						|
    "standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_digit(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "0" as digit <> tail
 | 
						|
    | "1" as digit <> tail
 | 
						|
    | "2" as digit <> tail
 | 
						|
    | "3" as digit <> tail
 | 
						|
    | "4" as digit <> tail
 | 
						|
    | "5" as digit <> tail
 | 
						|
    | "6" as digit <> tail
 | 
						|
    | "7" as digit <> tail
 | 
						|
    | "8" as digit <> tail
 | 
						|
    | "9" as digit <> tail -> Ok(#(digit, tail))
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
 | 
						|
  case str {
 | 
						|
    "0" as digit <> tail
 | 
						|
    | "1" as digit <> tail
 | 
						|
    | "2" as digit <> tail
 | 
						|
    | "3" as digit <> tail
 | 
						|
    | "4" as digit <> tail
 | 
						|
    | "5" as digit <> tail
 | 
						|
    | "6" as digit <> tail
 | 
						|
    | "7" as digit <> tail
 | 
						|
    | "8" as digit <> tail
 | 
						|
    | "9" as digit <> tail
 | 
						|
    | "a" as digit <> tail
 | 
						|
    | "b" as digit <> tail
 | 
						|
    | "c" as digit <> tail
 | 
						|
    | "d" as digit <> tail
 | 
						|
    | "e" as digit <> tail
 | 
						|
    | "f" as digit <> tail
 | 
						|
    | "A" as digit <> tail
 | 
						|
    | "B" as digit <> tail
 | 
						|
    | "C" as digit <> tail
 | 
						|
    | "D" as digit <> tail
 | 
						|
    | "E" as digit <> tail
 | 
						|
    | "F" as digit <> tail -> Ok(#(digit, tail))
 | 
						|
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_alpha(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "a" as char <> tail
 | 
						|
    | "b" as char <> tail
 | 
						|
    | "c" as char <> tail
 | 
						|
    | "d" as char <> tail
 | 
						|
    | "e" as char <> tail
 | 
						|
    | "f" as char <> tail
 | 
						|
    | "g" as char <> tail
 | 
						|
    | "h" as char <> tail
 | 
						|
    | "i" as char <> tail
 | 
						|
    | "j" as char <> tail
 | 
						|
    | "k" as char <> tail
 | 
						|
    | "l" as char <> tail
 | 
						|
    | "m" as char <> tail
 | 
						|
    | "n" as char <> tail
 | 
						|
    | "o" as char <> tail
 | 
						|
    | "p" as char <> tail
 | 
						|
    | "q" as char <> tail
 | 
						|
    | "r" as char <> tail
 | 
						|
    | "s" as char <> tail
 | 
						|
    | "t" as char <> tail
 | 
						|
    | "u" as char <> tail
 | 
						|
    | "v" as char <> tail
 | 
						|
    | "w" as char <> tail
 | 
						|
    | "x" as char <> tail
 | 
						|
    | "y" as char <> tail
 | 
						|
    | "z" as char <> tail
 | 
						|
    | "A" as char <> tail
 | 
						|
    | "B" as char <> tail
 | 
						|
    | "C" as char <> tail
 | 
						|
    | "D" as char <> tail
 | 
						|
    | "E" as char <> tail
 | 
						|
    | "F" as char <> tail
 | 
						|
    | "G" as char <> tail
 | 
						|
    | "H" as char <> tail
 | 
						|
    | "I" as char <> tail
 | 
						|
    | "J" as char <> tail
 | 
						|
    | "K" as char <> tail
 | 
						|
    | "L" as char <> tail
 | 
						|
    | "M" as char <> tail
 | 
						|
    | "N" as char <> tail
 | 
						|
    | "O" as char <> tail
 | 
						|
    | "P" as char <> tail
 | 
						|
    | "Q" as char <> tail
 | 
						|
    | "R" as char <> tail
 | 
						|
    | "S" as char <> tail
 | 
						|
    | "T" as char <> tail
 | 
						|
    | "U" as char <> tail
 | 
						|
    | "V" as char <> tail
 | 
						|
    | "W" as char <> tail
 | 
						|
    | "X" as char <> tail
 | 
						|
    | "Y" as char <> tail
 | 
						|
    | "Z" as char <> tail -> Ok(#(char, tail))
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_comment(doc: String) -> Result(#(Element, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "<!--" <> tail -> {
 | 
						|
      let #(comment, doc) = do_parse_comment(tail)
 | 
						|
      case doc {
 | 
						|
        "-->" <> tail -> Ok(#(Comment(comment), tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn do_parse_comment(doc: String) -> #(String, String) {
 | 
						|
  parse_multiple_optional(
 | 
						|
    doc,
 | 
						|
    try_parsers(
 | 
						|
      [
 | 
						|
        parse_char_except_dash,
 | 
						|
        fn(doc) {
 | 
						|
          case doc {
 | 
						|
            "-" <> tail -> {
 | 
						|
              use #(char, doc) <- result.try(parse_char_except_dash(tail))
 | 
						|
              Ok(#("-" <> char, doc))
 | 
						|
            }
 | 
						|
            _ -> Error(Nil)
 | 
						|
          }
 | 
						|
        },
 | 
						|
      ],
 | 
						|
      _,
 | 
						|
    ),
 | 
						|
    "",
 | 
						|
  )
 | 
						|
}
 | 
						|
 | 
						|
fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    "-" <> _ -> Error(Nil)
 | 
						|
    _ -> parse_char(doc)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_char(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case string.pop_grapheme(doc) {
 | 
						|
    Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail))
 | 
						|
    Ok(#("\t", tail)) -> Ok(#("\t", tail))
 | 
						|
    Ok(#("\n", tail)) -> Ok(#("\n", tail))
 | 
						|
    Ok(#("\r", tail)) -> Ok(#("\r", tail))
 | 
						|
    Ok(#(char, tail)) -> {
 | 
						|
      let assert [codepoint] = string.to_utf_codepoints(char)
 | 
						|
      case string.utf_codepoint_to_int(codepoint) {
 | 
						|
        i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    Error(_) -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_name_start_char(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case string.pop_grapheme(doc) {
 | 
						|
    Ok(#(":", tail)) -> Ok(#(":", tail))
 | 
						|
    Ok(#("_", tail)) -> Ok(#("_", tail))
 | 
						|
    Ok(#(char, tail)) -> {
 | 
						|
      let assert [codepoint] = string.to_utf_codepoints(char)
 | 
						|
      case string.utf_codepoint_to_int(codepoint) {
 | 
						|
        i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xF8 && i <= 0x2FF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x370 && i <= 0x37D -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    Error(_) -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case string.pop_grapheme(doc) {
 | 
						|
    Ok(#(":", tail)) -> Ok(#(":", tail))
 | 
						|
    Ok(#("_", tail)) -> Ok(#("_", tail))
 | 
						|
    Ok(#("-", tail)) -> Ok(#("-", tail))
 | 
						|
    Ok(#(".", tail)) -> Ok(#(".", tail))
 | 
						|
    Ok(#(char, tail)) -> {
 | 
						|
      let assert [codepoint] = string.to_utf_codepoints(char)
 | 
						|
      case string.utf_codepoint_to_int(codepoint) {
 | 
						|
        i if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
 | 
						|
        i if i == 0xB7 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xF8 && i <= 0x37D -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x203F && i <= 0x2040 -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
 | 
						|
        i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
 | 
						|
        i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
 | 
						|
        _ -> Error(Nil)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    Error(_) -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn trim_space(doc: String) -> String {
 | 
						|
  case parse_space(doc) {
 | 
						|
    Ok(#(_, doc)) -> trim_space(doc)
 | 
						|
    Error(_) -> doc
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_space(doc: String) -> Result(#(String, String), Nil) {
 | 
						|
  case doc {
 | 
						|
    " " as ws <> tail
 | 
						|
    | "\t" as ws <> tail
 | 
						|
    | "\n" as ws <> tail
 | 
						|
    | "\r" as ws <> tail -> Ok(#(ws, tail))
 | 
						|
    _ -> Error(Nil)
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn try_parsers(
 | 
						|
  over list: List(fn(String) -> Result(#(a, String), Nil)),
 | 
						|
  against static_data: String,
 | 
						|
) -> Result(#(a, String), Nil) {
 | 
						|
  case list {
 | 
						|
    [] -> Error(Nil)
 | 
						|
    [first, ..rest] ->
 | 
						|
      case first(static_data) {
 | 
						|
        Error(_) -> try_parsers(rest, static_data)
 | 
						|
        Ok(r) -> Ok(r)
 | 
						|
      }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
pub fn parse_multiple(
 | 
						|
  to_parse str: String,
 | 
						|
  with to_run: fn(String) -> Result(#(String, String), Nil),
 | 
						|
) -> Result(#(String, String), Nil) {
 | 
						|
  case parse_multiple_optional(str, to_run, "") {
 | 
						|
    #("", _) -> Error(Nil)
 | 
						|
    #(r, rest) -> Ok(#(r, rest))
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
fn parse_multiple_optional(
 | 
						|
  to_parse str: String,
 | 
						|
  with to_run: fn(String) -> Result(#(String, String), Nil),
 | 
						|
  acc ret: String,
 | 
						|
) -> #(String, String) {
 | 
						|
  case str {
 | 
						|
    "" -> #(ret, str)
 | 
						|
    _ ->
 | 
						|
      case to_run(str) {
 | 
						|
        Ok(#(r, rest)) -> parse_multiple_optional(rest, to_run, ret <> r)
 | 
						|
        Error(_) -> #(ret, str)
 | 
						|
      }
 | 
						|
  }
 | 
						|
}
 |