feat: Start work on !ELEMENT
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-15 22:54:21 +01:00
parent 464ad513b6
commit 15c6062d04

View File

@@ -6,15 +6,35 @@ import gleam/option.{type Option, None, Some}
import gleam/result
import gleam/string
pub type Content {
Empty
Any
Mixed(content: List(String))
Choice(content: ContentParticle)
}
pub type Declaration {
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
GEntityDecl
PEntityDecl(name: String, decl: String)
ElementDecl
ElementDecl(name: String, content: Content)
AttListDecl
NotationDecl
}
pub type Optional {
One
OneOrMore
ZeroOrMore
ZeroOrOne
}
pub type ContentParticle {
ElParticle(name: String, optional: Optional)
ChoiceParticle(choices: List(ContentParticle), optional: Optional)
SeqParticle(seq: List(ContentParticle), optional: Optional)
}
pub type Entity {
InternalEntity(value: String)
SystemExternalEntity(literal: String)
@@ -58,14 +78,15 @@ pub type Element {
pub fn main() {
parse_document(
"<?xml version=\"1.1\"?>\r\n<!DOCTYPE el [\r\n<!ELEMENT div1 (head, (p | list | note)*, div2*)>\r\n]>",
//"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b blah:test='1'><a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
// "<doc>
"<doc>
<A a=\"asdf>'&#34;>
asdf
?>%\"/>
<A a='\"\">&#39;&#34;'/>
</doc>",
// "<doc>
// <A a=\"asdf>'&#34;>
// asdf
// ?>%\"/>
// <A a='\"\">&#39;&#34;'/>
// </doc>",
)
|> echo
}
@@ -284,7 +305,7 @@ fn parse_attribute(
doc: String,
doctype: Option(DocType),
) -> Result(#(Attribute, String), Nil) {
let doc = trim_space(doc)
use doc <- result.try(trim_mandatory_space(doc))
use #(name, doc) <- result.try(parse_name(doc))
case doc {
"=" <> tail -> {
@@ -461,7 +482,7 @@ fn parse_prolog(
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
case doc {
"<!DOCTYPE" <> tail -> {
let doc = trim_space(tail)
use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
let #(external_id, doc) =
@@ -518,10 +539,240 @@ fn do_parse_int_subset(
}
}
_ -> {
case
try_parsers(
[
parse_elementdecl,
],
doc,
)
{
Ok(#(decl, doc)) -> {
do_parse_int_subset(doc, [decl, ..decl_list])
}
Error(_) -> Ok(#(list.reverse(decl_list), doc))
}
}
}
}
fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"<!ELEMENT" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
use doc <- result.try(trim_mandatory_space(doc))
case doc {
"EMPTY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Empty), tail))
}
_ -> Error(Nil)
}
}
"ANY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Any), tail))
}
_ -> Error(Nil)
}
}
"(" <> _ -> {
try_parsers([parse_mixed(name, _), parse_children(name, _)], doc)
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_children(
name: String,
doc: String,
) -> Result(#(Declaration, String), Nil) {
use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc))
let #(children, doc) = case doc {
"?" <> tail -> {
#(set_optional(children, ZeroOrOne), tail)
}
"*" <> tail -> {
#(set_optional(children, ZeroOrMore), tail)
}
"+" <> tail -> {
#(set_optional(children, OneOrMore), tail)
}
_ -> {
#(children, doc)
}
}
children |> echo
doc |> echo
todo
}
fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) {
Ok(#(ChoiceParticle([], _), _))
| Ok(#(ChoiceParticle([_], _), _))
| Error(_) -> Error(Nil)
Ok(#(cps, doc)) -> Ok(#(cps, doc))
})
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_choice(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_choice(doc, [cp, ..acc])
}
_ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc))
}
}
fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
let #(cps, doc) = case do_parse_seq(doc, [cp]) {
Ok(#(cps, doc)) -> #(cps, doc)
Error(_) -> #(SeqParticle([cp], One), doc)
}
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_seq(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"," <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_seq(doc, [cp, ..acc])
}
_ -> Ok(#(SeqParticle(list.reverse(acc), One), doc))
}
}
fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) {
use #(el, doc) <- result.try(try_parsers(
[
fn(doc) {
use #(name, doc) <- result.try(parse_name(doc))
Ok(#(ElParticle(name, One), doc))
},
parse_choice,
parse_seq,
],
doc,
))
case doc {
"?" <> tail -> {
Ok(#(set_optional(el, ZeroOrOne), tail))
}
"*" <> tail -> {
Ok(#(set_optional(el, ZeroOrMore), tail))
}
"+" <> tail -> {
Ok(#(set_optional(el, OneOrMore), tail))
}
_ -> {
Ok(#(el, doc))
}
}
}
fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle {
case el {
ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:)
ElParticle(name, _) -> ElParticle(name:, optional:)
SeqParticle(seq, _) -> SeqParticle(seq:, optional:)
}
}
fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
case doc {
"#PCDATA" <> tail -> {
use #(els, doc) <- result.try(parse_mixed_elements(tail))
let doc = trim_space(doc)
case els {
[] -> {
case doc {
")*" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail))
_ -> Error(Nil)
}
}
_ -> {
case doc {
")" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail))
_ -> Error(Nil)
}
}
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) {
Ok(
parse_multiple_to_list(
doc,
fn(doc) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
parse_name(doc)
}
_ -> Error(Nil)
}
},
[],
),
)
}
fn get_entity_replacement(
entity: String,
@@ -537,10 +788,10 @@ fn get_entity_replacement(
}
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
let doc = trim_space(doc)
use doc <- result.try(trim_mandatory_space(doc))
case doc {
"SYSTEM" <> tail -> {
let doc = trim_space(tail)
use doc <- result.try(trim_mandatory_space(tail))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
@@ -549,13 +800,13 @@ fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil)
Ok(#(Some(SystemID(system_literal:)), doc))
}
"PUBLIC" <> tail -> {
let doc = trim_space(tail)
use doc <- result.try(trim_mandatory_space(tail))
use #(public_literal, doc) <- result.try(parse_public_literal(
doc,
None,
"",
))
let doc = trim_space(doc)
use doc <- result.try(trim_mandatory_space(doc))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
@@ -695,11 +946,11 @@ fn parse_misc(doc: String) -> String {
try_parsers(
[
parse_comment,
parse_pi,
fn(doc) {
parse_space(doc)
|> result.map(fn(sp) { #(Whitespace, sp.1) })
},
parse_pi,
],
doc,
)
@@ -729,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
}
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) {
"version=" <> tail -> {
case trim_mandatory_space(doc) {
Ok("version=" <> tail) -> {
use #(version, doc) <- result.try(parse_version(tail))
Ok(#(version, doc))
}
@@ -770,8 +1021,8 @@ fn do_parse_version(
}
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) {
"encoding=" <> tail -> {
case trim_mandatory_space(doc) {
Ok("encoding=" <> tail) -> {
case tail {
"\"" <> tail -> {
use #(encoding, doc) <- result.try(parse_encoding(tail))
@@ -821,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
}
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
case trim_space(doc) {
"standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
case trim_mandatory_space(doc) {
Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) ->
Ok(#(True, tail))
Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) ->
Ok(#(True, tail))
"standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
_ -> Error(Nil)
}
}
@@ -1053,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
}
}
fn trim_mandatory_space(doc: String) -> Result(String, Nil) {
do_trim_mandatory_space(doc, True)
}
fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) {
case parse_space(doc), first {
Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False)
Error(_), True -> Error(Nil)
Error(_), False -> Ok(doc)
}
}
fn trim_space(doc: String) -> String {
case parse_space(doc) {
Ok(#(_, doc)) -> trim_space(doc)
@@ -1094,6 +1358,21 @@ fn parse_multiple(
}
}
fn parse_multiple_to_list(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
acc ret: List(String),
) -> #(List(String), String) {
case str {
"" -> #(list.reverse(ret), str)
_ ->
case to_run(str) {
Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret])
Error(_) -> #(list.reverse(ret), str)
}
}
}
fn parse_multiple_optional(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),