feat: Started adding references
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-09 14:56:04 +01:00
parent b6e36923b3
commit 616173c5c0

View File

@@ -39,16 +39,16 @@ pub type Element {
pub fn main() { pub fn main() {
parse_document( parse_document(
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='ha&#x20;ha' battr='baba' ref='&ref;'/>", "<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/>",
) )
|> echo |> echo
} }
pub fn default_entities() -> dict.Dict(String, Entity) { pub fn default_entities() -> dict.Dict(String, Entity) {
dict.from_list([ dict.from_list([
#("lt", InternalEntity("&#38;#60;")), #("lt", InternalEntity("&#60;")),
#("gt", InternalEntity("#62;")), #("gt", InternalEntity("&#62;")),
#("amp", InternalEntity("&#38;#38;")), #("amp", InternalEntity("&#38;")),
#("apos", InternalEntity("&#39;")), #("apos", InternalEntity("&#39;")),
#("quot", InternalEntity("&#34;")), #("quot", InternalEntity("&#34;")),
]) ])
@@ -60,7 +60,7 @@ fn parse_document(doc: String) -> Result(Document, Nil) {
use #(element, doc) <- result.try(parse_element(doc, doctype)) use #(element, doc) <- result.try(parse_element(doc, doctype))
let doc = parse_misc(doc) let doc = parse_misc(doc)
case doc |> echo { case doc {
"" -> Ok(Document(decl, doctype, Some(element))) "" -> Ok(Document(decl, doctype, Some(element)))
_ -> Error(Nil) _ -> Error(Nil)
} }
@@ -77,7 +77,7 @@ fn parse_empty_elem(
doc: String, doc: String,
doctype: Option(DocType), doctype: Option(DocType),
) -> Result(#(Element, String), Nil) { ) -> Result(#(Element, String), Nil) {
case doc |> echo { case doc {
"<" <> tail -> { "<" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail)) use #(name, doc) <- result.try(parse_name(tail))
use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, [])) use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
@@ -184,19 +184,19 @@ fn parse_reference(
";" <> tail -> { ";" <> tail -> {
use value <- result.try(int.base_parse(digits, 16)) use value <- result.try(int.base_parse(digits, 16))
use codepoint <- result.try(string.utf_codepoint(value)) use codepoint <- result.try(string.utf_codepoint(value))
Ok(#("", string.from_utf_codepoints([codepoint]) <> tail)) Ok(#(string.from_utf_codepoints([codepoint]), tail))
} }
_ -> Error(Nil) _ -> Error(Nil)
} }
} }
_ -> { _ -> {
use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit)) use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit))
case doc { case doc {
";" <> tail -> { ";" <> tail -> {
use value <- result.try(int.base_parse(digits, 10)) use value <- result.try(int.base_parse(digits, 10))
use codepoint <- result.try(string.utf_codepoint(value)) use codepoint <- result.try(string.utf_codepoint(value))
Ok(#("", string.from_utf_codepoints([codepoint]) <> tail))
Ok(#(string.from_utf_codepoints([codepoint]), tail))
} }
_ -> Error(Nil) _ -> Error(Nil)
} }
@@ -207,7 +207,10 @@ fn parse_reference(
use #(name, doc) <- result.try(parse_name(tail)) use #(name, doc) <- result.try(parse_name(tail))
case doc { case doc {
";" <> tail -> Ok(#(char <> name <> ";", tail)) ";" <> tail -> {
use value <- result.try(process_reference(name, doctype))
Ok(#("", value <> tail))
}
_ -> Error(Nil) _ -> Error(Nil)
} }
} }
@@ -215,6 +218,36 @@ fn parse_reference(
} }
} }
fn process_reference(
ref: String,
doctype: Option(DocType),
) -> Result(String, Nil) {
case doctype {
Some(DocType(_, entities)) -> {
get_reference(entities, ref)
}
None -> {
get_reference(default_entities(), ref)
}
}
}
fn get_reference(
entities: dict.Dict(String, Entity),
ref: String,
) -> Result(String, Nil) {
case dict.get(entities, ref) {
Ok(InternalEntity(val)) -> Ok(val)
Ok(PublicExternalEntity(_, _)) | Ok(SystemExternalEntity(_)) -> Error(Nil)
Error(_) -> {
case entities == default_entities() {
True -> Error(Nil)
False -> get_reference(default_entities(), ref)
}
}
}
}
fn parse_name(doc: String) -> Result(#(String, String), Nil) { fn parse_name(doc: String) -> Result(#(String, String), Nil) {
case parse_name_start_char(doc) { case parse_name_start_char(doc) {
Ok(#(char, tail)) -> { Ok(#(char, tail)) -> {