This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import gleam/bool
|
import gleam/bool
|
||||||
import gleam/dict
|
import gleam/dict
|
||||||
|
import gleam/int
|
||||||
import gleam/list
|
import gleam/list
|
||||||
import gleam/option.{type Option, None, Some}
|
import gleam/option.{type Option, None, Some}
|
||||||
import gleam/result
|
import gleam/result
|
||||||
@@ -10,7 +11,9 @@ pub type Declaration {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub type Entity {
|
pub type Entity {
|
||||||
Entity
|
InternalEntity(value: String)
|
||||||
|
SystemExternalEntity(literal: String)
|
||||||
|
PublicExternalEntity(literal: String, pubidliteral: String)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type DocType {
|
pub type DocType {
|
||||||
@@ -36,11 +39,21 @@ pub type Element {
|
|||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
parse_document(
|
parse_document(
|
||||||
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='haha' battr='baba' ref='&ref;'/>",
|
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='ha ha' battr='baba' ref='&ref;'/>",
|
||||||
)
|
)
|
||||||
|> echo
|
|> echo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn default_entities() -> dict.Dict(String, Entity) {
|
||||||
|
dict.from_list([
|
||||||
|
#("lt", InternalEntity("&#60;")),
|
||||||
|
#("gt", InternalEntity("#62;")),
|
||||||
|
#("amp", InternalEntity("&#38;")),
|
||||||
|
#("apos", InternalEntity("'")),
|
||||||
|
#("quot", InternalEntity(""")),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_document(doc: String) -> Result(Document, Nil) {
|
fn parse_document(doc: String) -> Result(Document, Nil) {
|
||||||
use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
|
use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
|
||||||
use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
|
use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
|
||||||
@@ -162,7 +175,35 @@ fn parse_reference(
|
|||||||
doctype: Option(DocType),
|
doctype: Option(DocType),
|
||||||
) -> Result(#(String, String), Nil) {
|
) -> Result(#(String, String), Nil) {
|
||||||
case doc {
|
case doc {
|
||||||
"&" as char <> tail | "%" as char <> tail -> {
|
"&#" <> tail -> {
|
||||||
|
case tail {
|
||||||
|
"x" <> tail -> {
|
||||||
|
use #(digits, doc) <- result.try(parse_multiple(tail, parse_hex_digit))
|
||||||
|
|
||||||
|
case doc {
|
||||||
|
";" <> tail -> {
|
||||||
|
use value <- result.try(int.base_parse(digits, 16))
|
||||||
|
use codepoint <- result.try(string.utf_codepoint(value))
|
||||||
|
Ok(#("", string.from_utf_codepoints([codepoint]) <> tail))
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> {
|
||||||
|
use #(digits, doc) <- result.try(parse_multiple(tail, parse_digit))
|
||||||
|
|
||||||
|
case doc {
|
||||||
|
";" <> tail -> {
|
||||||
|
use value <- result.try(int.base_parse(digits, 10))
|
||||||
|
use codepoint <- result.try(string.utf_codepoint(value))
|
||||||
|
Ok(#("", string.from_utf_codepoints([codepoint]) <> tail))
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"&" as char <> tail -> {
|
||||||
use #(name, doc) <- result.try(parse_name(tail))
|
use #(name, doc) <- result.try(parse_name(tail))
|
||||||
|
|
||||||
case doc {
|
case doc {
|
||||||
@@ -269,7 +310,7 @@ fn do_parse_version(
|
|||||||
doc: String,
|
doc: String,
|
||||||
version: String,
|
version: String,
|
||||||
) -> Result(#(String, String), Nil) {
|
) -> Result(#(String, String), Nil) {
|
||||||
case do_parse_digit(doc) {
|
case parse_digit(doc) {
|
||||||
Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit)
|
Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit)
|
||||||
Error(_) if version == "" -> Error(Nil)
|
Error(_) if version == "" -> Error(Nil)
|
||||||
Error(_) -> Ok(#(version, doc))
|
Error(_) -> Ok(#(version, doc))
|
||||||
@@ -302,14 +343,14 @@ fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
|
fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
|
||||||
case do_parse_alpha(doc) {
|
case parse_alpha(doc) {
|
||||||
Ok(#(char, doc)) -> {
|
Ok(#(char, doc)) -> {
|
||||||
Ok(parse_multiple_optional(
|
Ok(parse_multiple_optional(
|
||||||
doc,
|
doc,
|
||||||
try_parsers(
|
try_parsers(
|
||||||
[
|
[
|
||||||
do_parse_alpha,
|
parse_alpha,
|
||||||
do_parse_digit,
|
parse_digit,
|
||||||
fn(doc) {
|
fn(doc) {
|
||||||
case doc {
|
case doc {
|
||||||
"." as char <> tail | "_" as char <> tail | "-" as char <> tail ->
|
"." as char <> tail | "_" as char <> tail | "-" as char <> tail ->
|
||||||
@@ -336,7 +377,7 @@ fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_parse_digit(doc: String) -> Result(#(String, String), Nil) {
|
fn parse_digit(doc: String) -> Result(#(String, String), Nil) {
|
||||||
case doc {
|
case doc {
|
||||||
"0" as digit <> tail
|
"0" as digit <> tail
|
||||||
| "1" as digit <> tail
|
| "1" as digit <> tail
|
||||||
@@ -352,7 +393,36 @@ fn do_parse_digit(doc: String) -> Result(#(String, String), Nil) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) {
|
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
|
||||||
|
case str {
|
||||||
|
"0" as digit <> tail
|
||||||
|
| "1" as digit <> tail
|
||||||
|
| "2" as digit <> tail
|
||||||
|
| "3" as digit <> tail
|
||||||
|
| "4" as digit <> tail
|
||||||
|
| "5" as digit <> tail
|
||||||
|
| "6" as digit <> tail
|
||||||
|
| "7" as digit <> tail
|
||||||
|
| "8" as digit <> tail
|
||||||
|
| "9" as digit <> tail
|
||||||
|
| "a" as digit <> tail
|
||||||
|
| "b" as digit <> tail
|
||||||
|
| "c" as digit <> tail
|
||||||
|
| "d" as digit <> tail
|
||||||
|
| "e" as digit <> tail
|
||||||
|
| "f" as digit <> tail
|
||||||
|
| "A" as digit <> tail
|
||||||
|
| "B" as digit <> tail
|
||||||
|
| "C" as digit <> tail
|
||||||
|
| "D" as digit <> tail
|
||||||
|
| "E" as digit <> tail
|
||||||
|
| "F" as digit <> tail -> Ok(#(digit, tail))
|
||||||
|
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_alpha(doc: String) -> Result(#(String, String), Nil) {
|
||||||
case doc {
|
case doc {
|
||||||
"a" as char <> tail
|
"a" as char <> tail
|
||||||
| "b" as char <> tail
|
| "b" as char <> tail
|
||||||
|
|||||||
Reference in New Issue
Block a user