diff --git a/src/glxml.gleam b/src/glxml.gleam index 082a923..b1ec66e 100644 --- a/src/glxml.gleam +++ b/src/glxml.gleam @@ -35,11 +35,12 @@ pub type Attribute { pub type Element { EmptyElem(name: String, attrs: List(Attribute)) Element(name: String, attrs: List(Attribute), elements: List(Element)) + Text(content: String) } pub fn main() { parse_document( - "\r\n \n", + "\r\n \n", ) |> echo } @@ -70,7 +71,10 @@ fn parse_element( doc: String, doctype: Option(DocType), ) -> Result(#(Element, String), Nil) { - try_parsers([parse_empty_elem(_, doctype)], doc) + try_parsers( + [parse_empty_elem(_, doctype), parse_tagged_elem(_, doctype)], + doc, + ) } fn parse_empty_elem( @@ -91,6 +95,89 @@ fn parse_empty_elem( } } +fn parse_tagged_elem( + doc: String, + doctype: Option(DocType), +) -> Result(#(Element, String), Nil) { + case doc { + "<" <> tail -> { + use #(name, doc) <- result.try(parse_name(tail)) + use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, [])) + let doc = trim_space(doc) + case doc { + ">" <> tail -> { + use #(content, doc) <- result.try(parse_content(tail, doctype, [])) + + case doc { + " tail -> { + use #(close_name, doc) <- result.try(parse_name(tail)) + let doc = trim_space(doc) + case doc { + ">" <> tail -> { + use <- bool.guard( + when: name != close_name, + return: Error(Nil), + ) + + Ok(#(Element(name, attrs, content), tail)) + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn parse_content( + doc: String, + doctype: Option(DocType), + content: List(Element), +) -> Result(#(List(Element), String), Nil) { + use #(chardata, doc) <- result.try(parse_chardata(doc, doctype, "")) + let new_content = case chardata { + "" -> content + _ -> [Text(chardata), ..content] + } + + case doc { + "<" <> _ -> { + case try_parsers([parse_element(_, doctype)], doc) { + Ok(#(element, doc)) -> parse_content(doc, doctype, [element, ..content]) + Error(_) -> Ok(#(list.reverse(new_content), doc)) + } + } + _ -> Ok(#(list.reverse(new_content), doc)) + } +} + +fn parse_chardata( + doc: String, + doctype: Option(DocType), + chardata: String, +) -> Result(#(String, String), Nil) { + case doc { + "]]>" <> _ -> Error(Nil) + "<" <> _ -> Ok(#(chardata, doc)) + "&" <> _ -> { + use #(refval, doc) <- result.try(parse_reference(doc, doctype)) + + parse_chardata(doc, doctype, chardata <> refval) + } + + "" -> Ok(#("", "")) + _ -> { + let assert Ok(#(char, tail)) = string.pop_grapheme(doc) + parse_chardata(tail, doctype, chardata <> char) + } + } +} + fn parse_attributes( doc: String, doctype: Option(DocType),