feat: Work on declarations
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-10 21:27:44 +01:00
parent 1e5227f60c
commit 336286c7cf

View File

@@ -7,7 +7,12 @@ import gleam/result
import gleam/string import gleam/string
pub type Declaration { pub type Declaration {
Declaration(versioninfo: String, encoding: String, standalone: Bool) XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
GEntityDecl
PEntityDecl(name: String, decl: String)
ElementDecl
AttListDecl
NotationDecl
} }
pub type Entity { pub type Entity {
@@ -16,8 +21,17 @@ pub type Entity {
PublicExternalEntity(literal: String, pubidliteral: String) PublicExternalEntity(literal: String, pubidliteral: String)
} }
pub type ExternalID {
SystemID(system_literal: String)
PublicID(system_literal: String, public_literal: String)
}
pub type DocType { pub type DocType {
DocType(name: String, entities: dict.Dict(String, Entity)) DocType(
name: String,
external_id: Option(ExternalID),
entities: dict.Dict(String, Entity),
)
} }
pub type Document { pub type Document {
@@ -387,7 +401,7 @@ fn process_reference(
doctype: Option(DocType), doctype: Option(DocType),
) -> Result(String, Nil) { ) -> Result(String, Nil) {
case doctype { case doctype {
Some(DocType(_, entities)) -> { Some(DocType(_, _, entities)) -> {
get_reference(entities, ref) get_reference(entities, ref)
} }
None -> { None -> {
@@ -431,13 +445,249 @@ fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
fn parse_prolog( fn parse_prolog(
doc: String, doc: String,
) -> Result(#(Declaration, Option(DocType), String), Nil) { ) -> Result(#(Declaration, Option(DocType), String), Nil) {
let #(decl, doc) = case parse_decl(doc) { let #(decl, doc) =
Ok(#(decl, doc)) -> #(decl, doc) parse_decl(doc) |> result.unwrap(#(XMLDecl("1.0", "UTF-8", False), doc))
_ -> #(Declaration("1.0", "UTF-8", False), doc)
}
let doc = parse_misc(doc) let doc = parse_misc(doc)
Ok(#(decl, None, doc)) let #(doctype, doc) =
parse_doctype(doc)
|> result.map(fn(d) { #(Some(d.0), d.1) })
|> result.unwrap(#(None, doc))
Ok(#(decl, doctype, doc))
}
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
case doc {
"<!DOCTYPE" <> tail -> {
let doc = trim_space(tail)
use #(name, doc) <- result.try(parse_name(doc))
let #(external_id, doc) =
parse_external_id(doc) |> result.unwrap(#(None, doc))
let doc = trim_space(doc)
let #(int_subset, doc) =
parse_int_subset(doc) |> result.unwrap(#([], doc))
case doc {
">" <> tail -> Ok(#(DocType(name, external_id, dict.new()), tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_int_subset(doc: String) -> Result(#(List(Declaration), String), Nil) {
let doc = trim_space(doc)
case doc {
"[" <> tail -> {
use #(decl_list, doc) <- result.try(do_parse_int_subset(tail, []))
case doc {
"]" <> tail -> {
Ok(#(decl_list, tail))
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_int_subset(
doc: String,
decl_list: List(Declaration),
) -> Result(#(List(Declaration), String), Nil) {
let doc = trim_space(doc)
case doc {
"%" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
case doc {
";" <> tail -> {
case get_entity_replacement(name, decl_list) {
Some(decl) -> {
do_parse_int_subset(decl <> tail, decl_list)
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> {
todo
}
}
}
fn get_entity_replacement(
entity: String,
decl_list: List(Declaration),
) -> Option(String) {
list.find_map(decl_list, fn(decl) {
case decl {
PEntityDecl(name, decl) if name == entity -> Ok(Some(decl))
_ -> Error(Nil)
}
})
|> result.unwrap(None)
}
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
let doc = trim_space(doc)
case doc {
"SYSTEM" <> tail -> {
let doc = trim_space(tail)
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
"",
))
Ok(#(Some(SystemID(system_literal:)), doc))
}
"PUBLIC" <> tail -> {
let doc = trim_space(tail)
use #(public_literal, doc) <- result.try(parse_public_literal(
doc,
None,
"",
))
let doc = trim_space(doc)
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
"",
))
Ok(#(Some(PublicID(system_literal:, public_literal:)), doc))
}
_ -> Error(Nil)
}
}
fn parse_public_literal(
doc: String,
quote: Option(String),
literal: String,
) -> Result(#(String, String), Nil) {
case doc, quote {
"\"" as q <> tail, None | "'" as q <> tail, None ->
parse_public_literal(tail, Some(q), "")
"", _ -> Error(Nil)
_, None -> Error(Nil)
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
"'" <> tail, Some("'") -> Ok(#(literal, tail))
" " as char <> tail, Some(_)
| "\r" as char <> tail, Some(_)
| "\n" as char <> tail, Some(_)
| "0" as char <> tail, Some(_)
| "1" as char <> tail, Some(_)
| "2" as char <> tail, Some(_)
| "3" as char <> tail, Some(_)
| "4" as char <> tail, Some(_)
| "5" as char <> tail, Some(_)
| "6" as char <> tail, Some(_)
| "7" as char <> tail, Some(_)
| "8" as char <> tail, Some(_)
| "9" as char <> tail, Some(_)
| "a" as char <> tail, Some(_)
| "b" as char <> tail, Some(_)
| "c" as char <> tail, Some(_)
| "d" as char <> tail, Some(_)
| "e" as char <> tail, Some(_)
| "f" as char <> tail, Some(_)
| "g" as char <> tail, Some(_)
| "h" as char <> tail, Some(_)
| "i" as char <> tail, Some(_)
| "j" as char <> tail, Some(_)
| "k" as char <> tail, Some(_)
| "l" as char <> tail, Some(_)
| "m" as char <> tail, Some(_)
| "n" as char <> tail, Some(_)
| "o" as char <> tail, Some(_)
| "p" as char <> tail, Some(_)
| "q" as char <> tail, Some(_)
| "r" as char <> tail, Some(_)
| "s" as char <> tail, Some(_)
| "t" as char <> tail, Some(_)
| "u" as char <> tail, Some(_)
| "v" as char <> tail, Some(_)
| "w" as char <> tail, Some(_)
| "x" as char <> tail, Some(_)
| "y" as char <> tail, Some(_)
| "z" as char <> tail, Some(_)
| "A" as char <> tail, Some(_)
| "B" as char <> tail, Some(_)
| "C" as char <> tail, Some(_)
| "D" as char <> tail, Some(_)
| "E" as char <> tail, Some(_)
| "F" as char <> tail, Some(_)
| "G" as char <> tail, Some(_)
| "H" as char <> tail, Some(_)
| "I" as char <> tail, Some(_)
| "J" as char <> tail, Some(_)
| "K" as char <> tail, Some(_)
| "L" as char <> tail, Some(_)
| "M" as char <> tail, Some(_)
| "N" as char <> tail, Some(_)
| "O" as char <> tail, Some(_)
| "P" as char <> tail, Some(_)
| "Q" as char <> tail, Some(_)
| "R" as char <> tail, Some(_)
| "S" as char <> tail, Some(_)
| "T" as char <> tail, Some(_)
| "U" as char <> tail, Some(_)
| "V" as char <> tail, Some(_)
| "W" as char <> tail, Some(_)
| "X" as char <> tail, Some(_)
| "Y" as char <> tail, Some(_)
| "Z" as char <> tail, Some(_)
| "-" as char <> tail, Some(_)
| "(" as char <> tail, Some(_)
| ")" as char <> tail, Some(_)
| "+" as char <> tail, Some(_)
| "," as char <> tail, Some(_)
| "." as char <> tail, Some(_)
| "/" as char <> tail, Some(_)
| ":" as char <> tail, Some(_)
| "=" as char <> tail, Some(_)
| "?" as char <> tail, Some(_)
| ";" as char <> tail, Some(_)
| "!" as char <> tail, Some(_)
| "*" as char <> tail, Some(_)
| "#" as char <> tail, Some(_)
| "@" as char <> tail, Some(_)
| "$" as char <> tail, Some(_)
| "_" as char <> tail, Some(_)
| "%" as char <> tail, Some(_)
| "'" as char <> tail, Some("\"")
-> {
parse_public_literal(tail, quote, literal <> char)
}
_, _ -> Error(Nil)
}
}
fn parse_system_literal(
doc: String,
quote: Option(String),
literal: String,
) -> Result(#(String, String), Nil) {
case doc, quote {
"\"" as q <> tail, None | "'" as q <> tail, None ->
parse_system_literal(tail, Some(q), "")
"", _ -> Error(Nil)
_, None -> Error(Nil)
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
"'" <> tail, Some("'") -> Ok(#(literal, tail))
_, _ -> {
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
parse_system_literal(tail, quote, literal <> char)
}
}
} }
fn parse_misc(doc: String) -> String { fn parse_misc(doc: String) -> String {
@@ -463,18 +713,14 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
case doc { case doc {
"<?xml" <> tail -> { "<?xml" <> tail -> {
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail)) use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
let #(encoding, doc) = case parse_encodingdecl(doc) { let #(encoding, doc) =
Ok(e) -> e parse_encodingdecl(doc) |> result.unwrap(#("", doc))
Error(_) -> #("", doc) let #(standalone, doc) =
} parse_standalone(doc) |> result.unwrap(#(False, doc))
let #(standalone, doc) = case parse_standalone(doc) {
Ok(e) -> e
Error(_) -> #(False, doc)
}
case trim_space(doc) { case trim_space(doc) {
"?>" <> tail -> "?>" <> tail ->
Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail)) Ok(#(XMLDecl(versioninfo:, encoding:, standalone:), tail))
_ -> Error(Nil) _ -> Error(Nil)
} }
} }