feat: Start work on !ELEMENT
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-15 22:54:21 +01:00
parent 464ad513b6
commit 15c6062d04

View File

@@ -6,15 +6,35 @@ import gleam/option.{type Option, None, Some}
import gleam/result import gleam/result
import gleam/string import gleam/string
pub type Content {
Empty
Any
Mixed(content: List(String))
Choice(content: ContentParticle)
}
pub type Declaration { pub type Declaration {
XMLDecl(versioninfo: String, encoding: String, standalone: Bool) XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
GEntityDecl GEntityDecl
PEntityDecl(name: String, decl: String) PEntityDecl(name: String, decl: String)
ElementDecl ElementDecl(name: String, content: Content)
AttListDecl AttListDecl
NotationDecl NotationDecl
} }
pub type Optional {
One
OneOrMore
ZeroOrMore
ZeroOrOne
}
pub type ContentParticle {
ElParticle(name: String, optional: Optional)
ChoiceParticle(choices: List(ContentParticle), optional: Optional)
SeqParticle(seq: List(ContentParticle), optional: Optional)
}
pub type Entity { pub type Entity {
InternalEntity(value: String) InternalEntity(value: String)
SystemExternalEntity(literal: String) SystemExternalEntity(literal: String)
@@ -58,14 +78,15 @@ pub type Element {
pub fn main() { pub fn main() {
parse_document( parse_document(
"<?xml version=\"1.1\"?>\r\n<!DOCTYPE el [\r\n<!ELEMENT div1 (head, (p | list | note)*, div2*)>\r\n]>",
//"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b blah:test='1'><a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>", //"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b blah:test='1'><a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
// "<doc> // "<doc>
"<doc> // "<doc>
<A a=\"asdf>'&#34;> // <A a=\"asdf>'&#34;>
asdf // asdf
?>%\"/> // ?>%\"/>
<A a='\"\">&#39;&#34;'/> // <A a='\"\">&#39;&#34;'/>
</doc>", // </doc>",
) )
|> echo |> echo
} }
@@ -284,7 +305,7 @@ fn parse_attribute(
doc: String, doc: String,
doctype: Option(DocType), doctype: Option(DocType),
) -> Result(#(Attribute, String), Nil) { ) -> Result(#(Attribute, String), Nil) {
let doc = trim_space(doc) use doc <- result.try(trim_mandatory_space(doc))
use #(name, doc) <- result.try(parse_name(doc)) use #(name, doc) <- result.try(parse_name(doc))
case doc { case doc {
"=" <> tail -> { "=" <> tail -> {
@@ -461,7 +482,7 @@ fn parse_prolog(
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) { fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
case doc { case doc {
"<!DOCTYPE" <> tail -> { "<!DOCTYPE" <> tail -> {
let doc = trim_space(tail) use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc)) use #(name, doc) <- result.try(parse_name(doc))
let #(external_id, doc) = let #(external_id, doc) =
@@ -518,11 +539,241 @@ fn do_parse_int_subset(
} }
} }
_ -> { _ -> {
todo case
try_parsers(
[
parse_elementdecl,
],
doc,
)
{
Ok(#(decl, doc)) -> {
do_parse_int_subset(doc, [decl, ..decl_list])
}
Error(_) -> Ok(#(list.reverse(decl_list), doc))
}
} }
} }
} }
fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"<!ELEMENT" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
use doc <- result.try(trim_mandatory_space(doc))
case doc {
"EMPTY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Empty), tail))
}
_ -> Error(Nil)
}
}
"ANY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Any), tail))
}
_ -> Error(Nil)
}
}
"(" <> _ -> {
try_parsers([parse_mixed(name, _), parse_children(name, _)], doc)
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_children(
name: String,
doc: String,
) -> Result(#(Declaration, String), Nil) {
use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc))
let #(children, doc) = case doc {
"?" <> tail -> {
#(set_optional(children, ZeroOrOne), tail)
}
"*" <> tail -> {
#(set_optional(children, ZeroOrMore), tail)
}
"+" <> tail -> {
#(set_optional(children, OneOrMore), tail)
}
_ -> {
#(children, doc)
}
}
children |> echo
doc |> echo
todo
}
fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) {
Ok(#(ChoiceParticle([], _), _))
| Ok(#(ChoiceParticle([_], _), _))
| Error(_) -> Error(Nil)
Ok(#(cps, doc)) -> Ok(#(cps, doc))
})
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_choice(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_choice(doc, [cp, ..acc])
}
_ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc))
}
}
fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
let #(cps, doc) = case do_parse_seq(doc, [cp]) {
Ok(#(cps, doc)) -> #(cps, doc)
Error(_) -> #(SeqParticle([cp], One), doc)
}
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_seq(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"," <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_seq(doc, [cp, ..acc])
}
_ -> Ok(#(SeqParticle(list.reverse(acc), One), doc))
}
}
fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) {
use #(el, doc) <- result.try(try_parsers(
[
fn(doc) {
use #(name, doc) <- result.try(parse_name(doc))
Ok(#(ElParticle(name, One), doc))
},
parse_choice,
parse_seq,
],
doc,
))
case doc {
"?" <> tail -> {
Ok(#(set_optional(el, ZeroOrOne), tail))
}
"*" <> tail -> {
Ok(#(set_optional(el, ZeroOrMore), tail))
}
"+" <> tail -> {
Ok(#(set_optional(el, OneOrMore), tail))
}
_ -> {
Ok(#(el, doc))
}
}
}
fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle {
case el {
ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:)
ElParticle(name, _) -> ElParticle(name:, optional:)
SeqParticle(seq, _) -> SeqParticle(seq:, optional:)
}
}
fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
case doc {
"#PCDATA" <> tail -> {
use #(els, doc) <- result.try(parse_mixed_elements(tail))
let doc = trim_space(doc)
case els {
[] -> {
case doc {
")*" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail))
_ -> Error(Nil)
}
}
_ -> {
case doc {
")" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail))
_ -> Error(Nil)
}
}
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) {
Ok(
parse_multiple_to_list(
doc,
fn(doc) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
parse_name(doc)
}
_ -> Error(Nil)
}
},
[],
),
)
}
fn get_entity_replacement( fn get_entity_replacement(
entity: String, entity: String,
decl_list: List(Declaration), decl_list: List(Declaration),
@@ -537,10 +788,10 @@ fn get_entity_replacement(
} }
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) { fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
let doc = trim_space(doc) use doc <- result.try(trim_mandatory_space(doc))
case doc { case doc {
"SYSTEM" <> tail -> { "SYSTEM" <> tail -> {
let doc = trim_space(tail) use doc <- result.try(trim_mandatory_space(tail))
use #(system_literal, doc) <- result.try(parse_system_literal( use #(system_literal, doc) <- result.try(parse_system_literal(
doc, doc,
None, None,
@@ -549,13 +800,13 @@ fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil)
Ok(#(Some(SystemID(system_literal:)), doc)) Ok(#(Some(SystemID(system_literal:)), doc))
} }
"PUBLIC" <> tail -> { "PUBLIC" <> tail -> {
let doc = trim_space(tail) use doc <- result.try(trim_mandatory_space(tail))
use #(public_literal, doc) <- result.try(parse_public_literal( use #(public_literal, doc) <- result.try(parse_public_literal(
doc, doc,
None, None,
"", "",
)) ))
let doc = trim_space(doc) use doc <- result.try(trim_mandatory_space(doc))
use #(system_literal, doc) <- result.try(parse_system_literal( use #(system_literal, doc) <- result.try(parse_system_literal(
doc, doc,
None, None,
@@ -695,11 +946,11 @@ fn parse_misc(doc: String) -> String {
try_parsers( try_parsers(
[ [
parse_comment, parse_comment,
parse_pi,
fn(doc) { fn(doc) {
parse_space(doc) parse_space(doc)
|> result.map(fn(sp) { #(Whitespace, sp.1) }) |> result.map(fn(sp) { #(Whitespace, sp.1) })
}, },
parse_pi,
], ],
doc, doc,
) )
@@ -729,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
} }
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) { fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) { case trim_mandatory_space(doc) {
"version=" <> tail -> { Ok("version=" <> tail) -> {
use #(version, doc) <- result.try(parse_version(tail)) use #(version, doc) <- result.try(parse_version(tail))
Ok(#(version, doc)) Ok(#(version, doc))
} }
@@ -770,8 +1021,8 @@ fn do_parse_version(
} }
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) { fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) { case trim_mandatory_space(doc) {
"encoding=" <> tail -> { Ok("encoding=" <> tail) -> {
case tail { case tail {
"\"" <> tail -> { "\"" <> tail -> {
use #(encoding, doc) <- result.try(parse_encoding(tail)) use #(encoding, doc) <- result.try(parse_encoding(tail))
@@ -821,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
} }
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) { fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
case trim_space(doc) { case trim_mandatory_space(doc) {
"standalone=\"yes\"" <> tail | "standalone='yes'" <> tail -> Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) ->
Ok(#(True, tail))
Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) ->
Ok(#(True, tail)) Ok(#(True, tail))
"standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
_ -> Error(Nil) _ -> Error(Nil)
} }
} }
@@ -1053,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
} }
} }
fn trim_mandatory_space(doc: String) -> Result(String, Nil) {
do_trim_mandatory_space(doc, True)
}
fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) {
case parse_space(doc), first {
Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False)
Error(_), True -> Error(Nil)
Error(_), False -> Ok(doc)
}
}
fn trim_space(doc: String) -> String { fn trim_space(doc: String) -> String {
case parse_space(doc) { case parse_space(doc) {
Ok(#(_, doc)) -> trim_space(doc) Ok(#(_, doc)) -> trim_space(doc)
@@ -1094,6 +1358,21 @@ fn parse_multiple(
} }
} }
fn parse_multiple_to_list(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
acc ret: List(String),
) -> #(List(String), String) {
case str {
"" -> #(list.reverse(ret), str)
_ ->
case to_run(str) {
Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret])
Error(_) -> #(list.reverse(ret), str)
}
}
}
fn parse_multiple_optional( fn parse_multiple_optional(
to_parse str: String, to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil), with to_run: fn(String) -> Result(#(String, String), Nil),