feat: Further element parsing
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-09 18:18:16 +01:00
parent 0ea34f11f3
commit 8e0e6f988a

View File

@@ -35,11 +35,12 @@ pub type Attribute {
pub type Element { pub type Element {
EmptyElem(name: String, attrs: List(Attribute)) EmptyElem(name: String, attrs: List(Attribute))
Element(name: String, attrs: List(Attribute), elements: List(Element)) Element(name: String, attrs: List(Attribute), elements: List(Element))
Text(content: String)
} }
pub fn main() { pub fn main() {
parse_document( parse_document(
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/>", "<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b><a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/></b>",
) )
|> echo |> echo
} }
@@ -70,7 +71,10 @@ fn parse_element(
doc: String, doc: String,
doctype: Option(DocType), doctype: Option(DocType),
) -> Result(#(Element, String), Nil) { ) -> Result(#(Element, String), Nil) {
try_parsers([parse_empty_elem(_, doctype)], doc) try_parsers(
[parse_empty_elem(_, doctype), parse_tagged_elem(_, doctype)],
doc,
)
} }
fn parse_empty_elem( fn parse_empty_elem(
@@ -91,6 +95,89 @@ fn parse_empty_elem(
} }
} }
fn parse_tagged_elem(
doc: String,
doctype: Option(DocType),
) -> Result(#(Element, String), Nil) {
case doc {
"<" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
let doc = trim_space(doc)
case doc {
">" <> tail -> {
use #(content, doc) <- result.try(parse_content(tail, doctype, []))
case doc {
"</" <> tail -> {
use #(close_name, doc) <- result.try(parse_name(tail))
let doc = trim_space(doc)
case doc {
">" <> tail -> {
use <- bool.guard(
when: name != close_name,
return: Error(Nil),
)
Ok(#(Element(name, attrs, content), tail))
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_content(
doc: String,
doctype: Option(DocType),
content: List(Element),
) -> Result(#(List(Element), String), Nil) {
use #(chardata, doc) <- result.try(parse_chardata(doc, doctype, ""))
let new_content = case chardata {
"" -> content
_ -> [Text(chardata), ..content]
}
case doc {
"<" <> _ -> {
case try_parsers([parse_element(_, doctype)], doc) {
Ok(#(element, doc)) -> parse_content(doc, doctype, [element, ..content])
Error(_) -> Ok(#(list.reverse(new_content), doc))
}
}
_ -> Ok(#(list.reverse(new_content), doc))
}
}
fn parse_chardata(
doc: String,
doctype: Option(DocType),
chardata: String,
) -> Result(#(String, String), Nil) {
case doc {
"]]>" <> _ -> Error(Nil)
"<" <> _ -> Ok(#(chardata, doc))
"&" <> _ -> {
use #(refval, doc) <- result.try(parse_reference(doc, doctype))
parse_chardata(doc, doctype, chardata <> refval)
}
"" -> Ok(#("", ""))
_ -> {
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
parse_chardata(tail, doctype, chardata <> char)
}
}
}
fn parse_attributes( fn parse_attributes(
doc: String, doc: String,
doctype: Option(DocType), doctype: Option(DocType),