diff --git a/src/glxml.gleam b/src/glxml.gleam
index 1e5c0fb..da1d18d 100644
--- a/src/glxml.gleam
+++ b/src/glxml.gleam
@@ -1,5 +1,6 @@
import gleam/bool
import gleam/dict
+import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
import gleam/result
@@ -10,7 +11,9 @@ pub type Declaration {
}
pub type Entity {
- Entity
+ InternalEntity(value: String)
+ SystemExternalEntity(literal: String)
+ PublicExternalEntity(literal: String, pubidliteral: String)
}
pub type DocType {
@@ -36,11 +39,21 @@ pub type Element {
pub fn main() {
parse_document(
- "\r\n \n",
+ "\r\n \n",
)
|> echo
}
+pub fn default_entities() -> dict.Dict(String, Entity) {
+ dict.from_list([
+ #("lt", InternalEntity("<")),
+ #("gt", InternalEntity("#62;")),
+ #("amp", InternalEntity("&")),
+ #("apos", InternalEntity("'")),
+ #("quot", InternalEntity(""")),
+ ])
+}
+
fn parse_document(doc: String) -> Result(Document, Nil) {
use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
@@ -162,7 +175,35 @@ fn parse_reference(
doctype: Option(DocType),
) -> Result(#(String, String), Nil) {
case doc {
- "&" as char <> tail | "%" as char <> tail -> {
+ "" <> tail -> {
+ case tail {
+ "x" <> tail -> {
+ use #(digits, doc) <- result.try(parse_multiple(tail, parse_hex_digit))
+
+ case doc {
+ ";" <> tail -> {
+ use value <- result.try(int.base_parse(digits, 16))
+ use codepoint <- result.try(string.utf_codepoint(value))
+ Ok(#("", string.from_utf_codepoints([codepoint]) <> tail))
+ }
+ _ -> Error(Nil)
+ }
+ }
+ _ -> {
+ use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit))
+
+ case doc {
+ ";" <> tail -> {
+ use value <- result.try(int.base_parse(digits, 10))
+ use codepoint <- result.try(string.utf_codepoint(value))
+ Ok(#("", string.from_utf_codepoints([codepoint]) <> tail))
+ }
+ _ -> Error(Nil)
+ }
+ }
+ }
+ }
+ "&" as char <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
case doc {
@@ -269,7 +310,7 @@ fn do_parse_version(
doc: String,
version: String,
) -> Result(#(String, String), Nil) {
- case do_parse_digit(doc) {
+ case parse_digit(doc) {
Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit)
Error(_) if version == "" -> Error(Nil)
Error(_) -> Ok(#(version, doc))
@@ -302,14 +343,14 @@ fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
}
fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
- case do_parse_alpha(doc) {
+ case parse_alpha(doc) {
Ok(#(char, doc)) -> {
Ok(parse_multiple_optional(
doc,
try_parsers(
[
- do_parse_alpha,
- do_parse_digit,
+ parse_alpha,
+ parse_digit,
fn(doc) {
case doc {
"." as char <> tail | "_" as char <> tail | "-" as char <> tail ->
@@ -336,7 +377,7 @@ fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
}
}
-fn do_parse_digit(doc: String) -> Result(#(String, String), Nil) {
+fn parse_digit(doc: String) -> Result(#(String, String), Nil) {
case doc {
"0" as digit <> tail
| "1" as digit <> tail
@@ -352,7 +393,36 @@ fn do_parse_digit(doc: String) -> Result(#(String, String), Nil) {
}
}
-fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) {
+pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
+ case str {
+ "0" as digit <> tail
+ | "1" as digit <> tail
+ | "2" as digit <> tail
+ | "3" as digit <> tail
+ | "4" as digit <> tail
+ | "5" as digit <> tail
+ | "6" as digit <> tail
+ | "7" as digit <> tail
+ | "8" as digit <> tail
+ | "9" as digit <> tail
+ | "a" as digit <> tail
+ | "b" as digit <> tail
+ | "c" as digit <> tail
+ | "d" as digit <> tail
+ | "e" as digit <> tail
+ | "f" as digit <> tail
+ | "A" as digit <> tail
+ | "B" as digit <> tail
+ | "C" as digit <> tail
+ | "D" as digit <> tail
+ | "E" as digit <> tail
+ | "F" as digit <> tail -> Ok(#(digit, tail))
+
+ _ -> Error(Nil)
+ }
+}
+
+fn parse_alpha(doc: String) -> Result(#(String, String), Nil) {
case doc {
"a" as char <> tail
| "b" as char <> tail