858 lines
22 KiB
Gleam
858 lines
22 KiB
Gleam
import gleam/bool
|
|
import gleam/dict
|
|
import gleam/int
|
|
import gleam/list
|
|
import gleam/option.{type Option, None, Some}
|
|
import gleam/result
|
|
import gleam/string
|
|
|
|
pub type Declaration {
|
|
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
|
}
|
|
|
|
pub type Entity {
|
|
InternalEntity(value: String)
|
|
SystemExternalEntity(literal: String)
|
|
PublicExternalEntity(literal: String, pubidliteral: String)
|
|
}
|
|
|
|
pub type DocType {
|
|
DocType(name: String, entities: dict.Dict(String, Entity))
|
|
}
|
|
|
|
pub type Document {
|
|
Document(
|
|
decl: Declaration,
|
|
doctype: Option(DocType),
|
|
element: Option(Element),
|
|
)
|
|
}
|
|
|
|
pub type Attribute {
|
|
Attribute(name: String, value: String)
|
|
}
|
|
|
|
pub type Element {
|
|
EmptyElem(name: String, attrs: List(Attribute))
|
|
Element(name: String, attrs: List(Attribute), elements: List(Element))
|
|
Text(content: String)
|
|
Comment(content: String)
|
|
CData(content: String)
|
|
PI(name: String, content: String)
|
|
Whitespace
|
|
}
|
|
|
|
pub fn main() {
|
|
parse_document(
|
|
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b><a attr='ha &#38;ha' battr='baba' ref='&'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
|
|
)
|
|
|> echo
|
|
}
|
|
|
|
pub fn default_entities() -> dict.Dict(String, Entity) {
|
|
dict.from_list([
|
|
#("lt", InternalEntity("<")),
|
|
#("gt", InternalEntity(">")),
|
|
#("amp", InternalEntity("&")),
|
|
#("apos", InternalEntity("'")),
|
|
#("quot", InternalEntity(""")),
|
|
])
|
|
}
|
|
|
|
fn parse_document(doc: String) -> Result(Document, Nil) {
|
|
use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
|
|
use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
|
|
use #(element, doc) <- result.try(parse_element(doc, doctype))
|
|
let doc = parse_misc(doc)
|
|
|
|
case doc {
|
|
"" -> Ok(Document(decl, doctype, Some(element)))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_element(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(#(Element, String), Nil) {
|
|
try_parsers(
|
|
[parse_empty_elem(_, doctype), parse_tagged_elem(_, doctype)],
|
|
doc,
|
|
)
|
|
}
|
|
|
|
fn parse_empty_elem(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(#(Element, String), Nil) {
|
|
case doc {
|
|
"<" <> tail -> {
|
|
use #(name, doc) <- result.try(parse_name(tail))
|
|
use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
|
|
let doc = trim_space(doc)
|
|
case doc {
|
|
"/>" <> tail -> Ok(#(EmptyElem(name, attrs), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_tagged_elem(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(#(Element, String), Nil) {
|
|
case doc {
|
|
"<" <> tail -> {
|
|
use #(name, doc) <- result.try(parse_name(tail))
|
|
use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
|
|
let doc = trim_space(doc)
|
|
case doc {
|
|
">" <> tail -> {
|
|
use #(content, doc) <- result.try(parse_content(tail, doctype, []))
|
|
|
|
case doc {
|
|
"</" <> tail -> {
|
|
use #(close_name, doc) <- result.try(parse_name(tail))
|
|
let doc = trim_space(doc)
|
|
case doc {
|
|
">" <> tail -> {
|
|
use <- bool.guard(
|
|
when: name != close_name,
|
|
return: Error(Nil),
|
|
)
|
|
|
|
Ok(#(Element(name, attrs, content), tail))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_content(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
content: List(Element),
|
|
) -> Result(#(List(Element), String), Nil) {
|
|
use #(chardata, doc) <- result.try(parse_chardata(doc, doctype, ""))
|
|
let new_content = case chardata {
|
|
"" -> content
|
|
_ -> [Text(chardata), ..content]
|
|
}
|
|
|
|
case doc {
|
|
"<" <> _ -> {
|
|
case
|
|
try_parsers(
|
|
[
|
|
parse_element(_, doctype),
|
|
parse_comment,
|
|
parse_cdata,
|
|
parse_pi,
|
|
],
|
|
doc,
|
|
)
|
|
{
|
|
Ok(#(element, doc)) -> parse_content(doc, doctype, [element, ..content])
|
|
Error(_) -> Ok(#(list.reverse(new_content), doc))
|
|
}
|
|
}
|
|
_ -> Ok(#(list.reverse(new_content), doc))
|
|
}
|
|
}
|
|
|
|
fn parse_pi(doc: String) -> Result(#(Element, String), Nil) {
|
|
case doc {
|
|
"<?" <> tail -> {
|
|
use #(name, doc) <- result.try(parse_name(tail))
|
|
use <- bool.guard(
|
|
when: string.lowercase(name) == "xml",
|
|
return: Error(Nil),
|
|
)
|
|
let doc = trim_space(doc)
|
|
use #(content, doc) <- result.try(parse_pi_content(doc, ""))
|
|
case doc {
|
|
"?>" <> tail -> Ok(#(PI(name, content), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_pi_content(doc: String, pi: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"?>" <> _ -> Ok(#(pi, doc))
|
|
"" -> Error(Nil)
|
|
_ -> {
|
|
case parse_char(doc) {
|
|
Ok(#(char, doc)) -> parse_pi_content(doc, pi <> char)
|
|
Error(_) -> Ok(#(pi, doc))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_cdata(doc: String) -> Result(#(Element, String), Nil) {
|
|
case doc {
|
|
"<![CDATA[" <> tail -> {
|
|
use #(cdata, doc) <- result.try(do_parse_cdata(tail, ""))
|
|
|
|
Ok(#(CData(cdata), doc))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn do_parse_cdata(doc: String, cdata: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"]]>" <> tail -> Ok(#(cdata, tail))
|
|
"" -> Error(Nil)
|
|
_ -> {
|
|
case parse_char(doc) {
|
|
Ok(#(char, doc)) -> do_parse_cdata(doc, cdata <> char)
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_chardata(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
chardata: String,
|
|
) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"]]>" <> _ -> Error(Nil)
|
|
"<" <> _ -> Ok(#(chardata, doc))
|
|
"&" <> _ -> {
|
|
use #(refval, doc) <- result.try(parse_reference(doc, doctype))
|
|
|
|
parse_chardata(doc, doctype, chardata <> refval)
|
|
}
|
|
|
|
"" -> Ok(#("", ""))
|
|
_ -> {
|
|
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
|
|
parse_chardata(tail, doctype, chardata <> char)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_attributes(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
attrs: List(Attribute),
|
|
) -> Result(#(List(Attribute), String), Nil) {
|
|
case parse_attribute(doc, doctype) {
|
|
Ok(#(attr, doc)) -> parse_attributes(doc, doctype, [attr, ..attrs])
|
|
Error(_) -> Ok(#(list.reverse(attrs), doc))
|
|
}
|
|
}
|
|
|
|
fn parse_attribute(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(#(Attribute, String), Nil) {
|
|
let doc = trim_space(doc)
|
|
use #(name, doc) <- result.try(parse_name(doc))
|
|
case doc {
|
|
"=" <> tail -> {
|
|
case tail {
|
|
"\"" <> tail -> {
|
|
let #(value, doc) =
|
|
parse_multiple_optional(
|
|
tail,
|
|
try_parsers(
|
|
[
|
|
fn(doc) {
|
|
case string.pop_grapheme(doc) {
|
|
Ok(#(char, _doc))
|
|
if char == "<" || char == "&" || char == "\""
|
|
-> Error(Nil)
|
|
Ok(#(char, doc)) -> Ok(#(char, doc))
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
},
|
|
parse_reference(_, doctype),
|
|
],
|
|
_,
|
|
),
|
|
"",
|
|
)
|
|
case doc {
|
|
"\"" <> tail -> Ok(#(Attribute(name, value), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
"'" <> tail -> {
|
|
let #(value, doc) =
|
|
parse_multiple_optional(
|
|
tail,
|
|
try_parsers(
|
|
[
|
|
fn(doc) {
|
|
case string.pop_grapheme(doc) {
|
|
Ok(#(char, _doc))
|
|
if char == "<" || char == "&" || char == "'"
|
|
-> Error(Nil)
|
|
Ok(#(char, doc)) -> Ok(#(char, doc))
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
},
|
|
parse_reference(_, doctype),
|
|
],
|
|
_,
|
|
),
|
|
"",
|
|
)
|
|
case doc {
|
|
"'" <> tail -> Ok(#(Attribute(name, value), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_reference(
|
|
doc: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"&#" <> tail -> {
|
|
case tail {
|
|
"x" <> tail -> {
|
|
use #(digits, doc) <- result.try(parse_multiple(tail, parse_hex_digit))
|
|
|
|
case doc {
|
|
";" <> tail -> {
|
|
use value <- result.try(int.base_parse(digits, 16))
|
|
use codepoint <- result.try(string.utf_codepoint(value))
|
|
Ok(#(string.from_utf_codepoints([codepoint]), tail))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> {
|
|
use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit))
|
|
case doc {
|
|
";" <> tail -> {
|
|
use value <- result.try(int.base_parse(digits, 10))
|
|
use codepoint <- result.try(string.utf_codepoint(value))
|
|
|
|
Ok(#(string.from_utf_codepoints([codepoint]), tail))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"&" <> tail -> {
|
|
use #(name, doc) <- result.try(parse_name(tail))
|
|
|
|
case doc {
|
|
";" <> tail -> {
|
|
use value <- result.try(process_reference(name, doctype))
|
|
Ok(#("", value <> tail))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn process_reference(
|
|
ref: String,
|
|
doctype: Option(DocType),
|
|
) -> Result(String, Nil) {
|
|
case doctype {
|
|
Some(DocType(_, entities)) -> {
|
|
get_reference(entities, ref)
|
|
}
|
|
None -> {
|
|
get_reference(default_entities(), ref)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_reference(
|
|
entities: dict.Dict(String, Entity),
|
|
ref: String,
|
|
) -> Result(String, Nil) {
|
|
case dict.get(entities, ref) {
|
|
Ok(InternalEntity(val)) -> Ok(val)
|
|
Ok(PublicExternalEntity(_, _)) | Ok(SystemExternalEntity(_)) -> Error(Nil)
|
|
Error(_) -> {
|
|
case entities == default_entities() {
|
|
True -> Error(Nil)
|
|
False -> get_reference(default_entities(), ref)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_name(doc: String) -> Result(#(String, String), Nil) {
|
|
case parse_name_start_char(doc) {
|
|
Ok(#(char, tail)) -> {
|
|
do_parse_name(tail, char)
|
|
}
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
|
|
case parse_name_char(doc) {
|
|
Ok(#(char, tail)) -> do_parse_name(tail, name <> char)
|
|
Error(_) -> Ok(#(name, doc))
|
|
}
|
|
}
|
|
|
|
fn parse_prolog(
|
|
doc: String,
|
|
) -> Result(#(Declaration, Option(DocType), String), Nil) {
|
|
let #(decl, doc) = case parse_decl(doc) {
|
|
Ok(#(decl, doc)) -> #(decl, doc)
|
|
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
|
}
|
|
let doc = parse_misc(doc)
|
|
|
|
Ok(#(decl, None, doc))
|
|
}
|
|
|
|
fn parse_misc(doc: String) -> String {
|
|
case
|
|
try_parsers(
|
|
[
|
|
parse_comment,
|
|
fn(doc) {
|
|
parse_space(doc)
|
|
|> result.map(fn(sp) { #(Whitespace, sp.1) })
|
|
},
|
|
parse_pi,
|
|
],
|
|
doc,
|
|
)
|
|
{
|
|
Ok(#(_element, doc)) -> parse_misc(doc)
|
|
Error(Nil) -> doc
|
|
}
|
|
}
|
|
|
|
fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
|
|
case doc {
|
|
"<?xml" <> tail -> {
|
|
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
|
|
let #(encoding, doc) = case parse_encodingdecl(doc) {
|
|
Ok(e) -> e
|
|
Error(_) -> #("", doc)
|
|
}
|
|
let #(standalone, doc) = case parse_standalone(doc) {
|
|
Ok(e) -> e
|
|
Error(_) -> #(False, doc)
|
|
}
|
|
|
|
case trim_space(doc) {
|
|
"?>" <> tail ->
|
|
Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
|
|
case trim_space(doc) {
|
|
"version=" <> tail -> {
|
|
use #(version, doc) <- result.try(parse_version(tail))
|
|
Ok(#(version, doc))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_version(doc: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"\"1." <> tail -> {
|
|
use #(version, doc) <- result.try(do_parse_version(tail, "1."))
|
|
case doc {
|
|
"\"" <> tail -> Ok(#(version, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
"'1." <> tail -> {
|
|
use #(version, doc) <- result.try(do_parse_version(tail, "1."))
|
|
case doc {
|
|
"'" <> tail -> Ok(#(version, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn do_parse_version(
|
|
doc: String,
|
|
version: String,
|
|
) -> Result(#(String, String), Nil) {
|
|
case parse_digit(doc) {
|
|
Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit)
|
|
Error(_) if version == "" -> Error(Nil)
|
|
Error(_) -> Ok(#(version, doc))
|
|
}
|
|
}
|
|
|
|
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
|
|
case trim_space(doc) {
|
|
"encoding=" <> tail -> {
|
|
case tail {
|
|
"\"" <> tail -> {
|
|
use #(encoding, doc) <- result.try(parse_encoding(tail))
|
|
case doc {
|
|
"\"" <> tail -> Ok(#(encoding, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
"'" <> tail -> {
|
|
use #(encoding, doc) <- result.try(parse_encoding(tail))
|
|
case doc {
|
|
"'" <> tail -> Ok(#(encoding, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
|
|
case parse_alpha(doc) {
|
|
Ok(#(char, doc)) -> {
|
|
Ok(parse_multiple_optional(
|
|
doc,
|
|
try_parsers(
|
|
[
|
|
parse_alpha,
|
|
parse_digit,
|
|
fn(doc) {
|
|
case doc {
|
|
"." as char <> tail | "_" as char <> tail | "-" as char <> tail ->
|
|
Ok(#(char, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
},
|
|
],
|
|
_,
|
|
),
|
|
char,
|
|
))
|
|
}
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
|
|
case trim_space(doc) {
|
|
"standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
|
|
Ok(#(True, tail))
|
|
"standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_digit(doc: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"0" as digit <> tail
|
|
| "1" as digit <> tail
|
|
| "2" as digit <> tail
|
|
| "3" as digit <> tail
|
|
| "4" as digit <> tail
|
|
| "5" as digit <> tail
|
|
| "6" as digit <> tail
|
|
| "7" as digit <> tail
|
|
| "8" as digit <> tail
|
|
| "9" as digit <> tail -> Ok(#(digit, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
|
|
case str {
|
|
"0" as digit <> tail
|
|
| "1" as digit <> tail
|
|
| "2" as digit <> tail
|
|
| "3" as digit <> tail
|
|
| "4" as digit <> tail
|
|
| "5" as digit <> tail
|
|
| "6" as digit <> tail
|
|
| "7" as digit <> tail
|
|
| "8" as digit <> tail
|
|
| "9" as digit <> tail
|
|
| "a" as digit <> tail
|
|
| "b" as digit <> tail
|
|
| "c" as digit <> tail
|
|
| "d" as digit <> tail
|
|
| "e" as digit <> tail
|
|
| "f" as digit <> tail
|
|
| "A" as digit <> tail
|
|
| "B" as digit <> tail
|
|
| "C" as digit <> tail
|
|
| "D" as digit <> tail
|
|
| "E" as digit <> tail
|
|
| "F" as digit <> tail -> Ok(#(digit, tail))
|
|
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_alpha(doc: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"a" as char <> tail
|
|
| "b" as char <> tail
|
|
| "c" as char <> tail
|
|
| "d" as char <> tail
|
|
| "e" as char <> tail
|
|
| "f" as char <> tail
|
|
| "g" as char <> tail
|
|
| "h" as char <> tail
|
|
| "i" as char <> tail
|
|
| "j" as char <> tail
|
|
| "k" as char <> tail
|
|
| "l" as char <> tail
|
|
| "m" as char <> tail
|
|
| "n" as char <> tail
|
|
| "o" as char <> tail
|
|
| "p" as char <> tail
|
|
| "q" as char <> tail
|
|
| "r" as char <> tail
|
|
| "s" as char <> tail
|
|
| "t" as char <> tail
|
|
| "u" as char <> tail
|
|
| "v" as char <> tail
|
|
| "w" as char <> tail
|
|
| "x" as char <> tail
|
|
| "y" as char <> tail
|
|
| "z" as char <> tail
|
|
| "A" as char <> tail
|
|
| "B" as char <> tail
|
|
| "C" as char <> tail
|
|
| "D" as char <> tail
|
|
| "E" as char <> tail
|
|
| "F" as char <> tail
|
|
| "G" as char <> tail
|
|
| "H" as char <> tail
|
|
| "I" as char <> tail
|
|
| "J" as char <> tail
|
|
| "K" as char <> tail
|
|
| "L" as char <> tail
|
|
| "M" as char <> tail
|
|
| "N" as char <> tail
|
|
| "O" as char <> tail
|
|
| "P" as char <> tail
|
|
| "Q" as char <> tail
|
|
| "R" as char <> tail
|
|
| "S" as char <> tail
|
|
| "T" as char <> tail
|
|
| "U" as char <> tail
|
|
| "V" as char <> tail
|
|
| "W" as char <> tail
|
|
| "X" as char <> tail
|
|
| "Y" as char <> tail
|
|
| "Z" as char <> tail -> Ok(#(char, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_comment(doc: String) -> Result(#(Element, String), Nil) {
|
|
case doc {
|
|
"<!--" <> tail -> {
|
|
let #(comment, doc) = do_parse_comment(tail)
|
|
case doc {
|
|
"-->" <> tail -> Ok(#(Comment(comment), tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn do_parse_comment(doc: String) -> #(String, String) {
|
|
parse_multiple_optional(
|
|
doc,
|
|
try_parsers(
|
|
[
|
|
parse_char_except_dash,
|
|
fn(doc) {
|
|
case doc {
|
|
"-" <> tail -> {
|
|
use #(char, doc) <- result.try(parse_char_except_dash(tail))
|
|
Ok(#("-" <> char, doc))
|
|
}
|
|
_ -> Error(Nil)
|
|
}
|
|
},
|
|
],
|
|
_,
|
|
),
|
|
"",
|
|
)
|
|
}
|
|
|
|
fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
"-" <> _ -> Error(Nil)
|
|
_ -> parse_char(doc)
|
|
}
|
|
}
|
|
|
|
fn parse_char(doc: String) -> Result(#(String, String), Nil) {
|
|
case string.pop_grapheme(doc) {
|
|
Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail))
|
|
Ok(#("\t", tail)) -> Ok(#("\t", tail))
|
|
Ok(#("\n", tail)) -> Ok(#("\n", tail))
|
|
Ok(#("\r", tail)) -> Ok(#("\r", tail))
|
|
Ok(#(char, tail)) -> {
|
|
let assert [codepoint] = string.to_utf_codepoints(char)
|
|
case string.utf_codepoint_to_int(codepoint) {
|
|
i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail))
|
|
i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail))
|
|
i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_name_start_char(doc: String) -> Result(#(String, String), Nil) {
|
|
case string.pop_grapheme(doc) {
|
|
Ok(#(":", tail)) -> Ok(#(":", tail))
|
|
Ok(#("_", tail)) -> Ok(#("_", tail))
|
|
Ok(#(char, tail)) -> {
|
|
let assert [codepoint] = string.to_utf_codepoints(char)
|
|
case string.utf_codepoint_to_int(codepoint) {
|
|
i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
|
|
i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
|
|
i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
|
|
i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
|
|
i if i >= 0xF8 && i <= 0x2FF -> Ok(#(char, tail))
|
|
i if i >= 0x370 && i <= 0x37D -> Ok(#(char, tail))
|
|
i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
|
|
i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
|
|
i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
|
|
i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
|
|
i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
|
|
i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
|
|
i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
|
|
i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
|
|
case string.pop_grapheme(doc) {
|
|
Ok(#(":", tail)) -> Ok(#(":", tail))
|
|
Ok(#("_", tail)) -> Ok(#("_", tail))
|
|
Ok(#("-", tail)) -> Ok(#("-", tail))
|
|
Ok(#(".", tail)) -> Ok(#(".", tail))
|
|
Ok(#(char, tail)) -> {
|
|
let assert [codepoint] = string.to_utf_codepoints(char)
|
|
case string.utf_codepoint_to_int(codepoint) {
|
|
i if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
|
|
i if i == 0xB7 -> Ok(#(char, tail))
|
|
i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
|
|
i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
|
|
i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
|
|
i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
|
|
i if i >= 0xF8 && i <= 0x37D -> Ok(#(char, tail))
|
|
i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
|
|
i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
|
|
i if i >= 0x203F && i <= 0x2040 -> Ok(#(char, tail))
|
|
i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
|
|
i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
|
|
i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
|
|
i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
|
|
i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
|
|
i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
Error(_) -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn trim_space(doc: String) -> String {
|
|
case parse_space(doc) {
|
|
Ok(#(_, doc)) -> trim_space(doc)
|
|
Error(_) -> doc
|
|
}
|
|
}
|
|
|
|
fn parse_space(doc: String) -> Result(#(String, String), Nil) {
|
|
case doc {
|
|
" " as ws <> tail
|
|
| "\t" as ws <> tail
|
|
| "\n" as ws <> tail
|
|
| "\r" as ws <> tail -> Ok(#(ws, tail))
|
|
_ -> Error(Nil)
|
|
}
|
|
}
|
|
|
|
fn try_parsers(
|
|
over list: List(fn(String) -> Result(#(a, String), Nil)),
|
|
against static_data: String,
|
|
) -> Result(#(a, String), Nil) {
|
|
case list {
|
|
[] -> Error(Nil)
|
|
[first, ..rest] ->
|
|
case first(static_data) {
|
|
Error(_) -> try_parsers(rest, static_data)
|
|
Ok(r) -> Ok(r)
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn parse_multiple(
|
|
to_parse str: String,
|
|
with to_run: fn(String) -> Result(#(String, String), Nil),
|
|
) -> Result(#(String, String), Nil) {
|
|
case parse_multiple_optional(str, to_run, "") {
|
|
#("", _) -> Error(Nil)
|
|
#(r, rest) -> Ok(#(r, rest))
|
|
}
|
|
}
|
|
|
|
fn parse_multiple_optional(
|
|
to_parse str: String,
|
|
with to_run: fn(String) -> Result(#(String, String), Nil),
|
|
acc ret: String,
|
|
) -> #(String, String) {
|
|
case str {
|
|
"" -> #(ret, str)
|
|
_ ->
|
|
case to_run(str) {
|
|
Ok(#(r, rest)) -> parse_multiple_optional(rest, to_run, ret <> r)
|
|
Error(_) -> #(ret, str)
|
|
}
|
|
}
|
|
}
|