From 15c6062d0477ee4dc2aa70880389f6d031ffc84b Mon Sep 17 00:00:00 2001 From: Gareth Pendleton Date: Wed, 15 Oct 2025 22:54:21 +0100 Subject: [PATCH] feat: Start work on !ELEMENT --- src/glxml.gleam | 325 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 302 insertions(+), 23 deletions(-) diff --git a/src/glxml.gleam b/src/glxml.gleam index dbed3b1..7b270eb 100644 --- a/src/glxml.gleam +++ b/src/glxml.gleam @@ -6,15 +6,35 @@ import gleam/option.{type Option, None, Some} import gleam/result import gleam/string +pub type Content { + Empty + Any + Mixed(content: List(String)) + Choice(content: ContentParticle) +} + pub type Declaration { XMLDecl(versioninfo: String, encoding: String, standalone: Bool) GEntityDecl PEntityDecl(name: String, decl: String) - ElementDecl + ElementDecl(name: String, content: Content) AttListDecl NotationDecl } +pub type Optional { + One + OneOrMore + ZeroOrMore + ZeroOrOne +} + +pub type ContentParticle { + ElParticle(name: String, optional: Optional) + ChoiceParticle(choices: List(ContentParticle), optional: Optional) + SeqParticle(seq: List(ContentParticle), optional: Optional) +} + pub type Entity { InternalEntity(value: String) SystemExternalEntity(literal: String) @@ -58,14 +78,15 @@ pub type Element { pub fn main() { parse_document( + "\r\n\r\n]>", //"\r\n \n]]>", - // " - " -'"> -asdf - ?>%\"/> - -", + // " + // " + // '"> + // asdf + // ?>%\"/> + // + // ", ) |> echo } @@ -284,7 +305,7 @@ fn parse_attribute( doc: String, doctype: Option(DocType), ) -> Result(#(Attribute, String), Nil) { - let doc = trim_space(doc) + use doc <- result.try(trim_mandatory_space(doc)) use #(name, doc) <- result.try(parse_name(doc)) case doc { "=" <> tail -> { @@ -461,7 +482,7 @@ fn parse_prolog( fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) { case doc { " tail -> { - let doc = trim_space(tail) + use doc <- result.try(trim_mandatory_space(tail)) use #(name, doc) <- result.try(parse_name(doc)) let #(external_id, doc) = @@ -518,11 +539,241 @@ fn do_parse_int_subset( } } _ -> { - todo + case + try_parsers( + [ + parse_elementdecl, + ], + doc, + ) + { + Ok(#(decl, doc)) -> { + do_parse_int_subset(doc, [decl, ..decl_list]) + } + Error(_) -> Ok(#(list.reverse(decl_list), doc)) + } } } } +fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) { + case doc { + " tail -> { + use doc <- result.try(trim_mandatory_space(tail)) + use #(name, doc) <- result.try(parse_name(doc)) + use doc <- result.try(trim_mandatory_space(doc)) + case doc { + "EMPTY" <> tail -> { + let doc = trim_space(tail) + case doc { + ")" <> tail -> { + Ok(#(ElementDecl(name, Empty), tail)) + } + _ -> Error(Nil) + } + } + "ANY" <> tail -> { + let doc = trim_space(tail) + case doc { + ")" <> tail -> { + Ok(#(ElementDecl(name, Any), tail)) + } + _ -> Error(Nil) + } + } + "(" <> _ -> { + try_parsers([parse_mixed(name, _), parse_children(name, _)], doc) + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn parse_children( + name: String, + doc: String, +) -> Result(#(Declaration, String), Nil) { + use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc)) + + let #(children, doc) = case doc { + "?" <> tail -> { + #(set_optional(children, ZeroOrOne), tail) + } + "*" <> tail -> { + #(set_optional(children, ZeroOrMore), tail) + } + "+" <> tail -> { + #(set_optional(children, OneOrMore), tail) + } + _ -> { + #(children, doc) + } + } + children |> echo + doc |> echo + todo +} + +fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) { + case doc { + "(" <> tail -> { + let doc = trim_space(tail) + use #(cp, doc) <- result.try(parse_cp(doc)) + use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) { + Ok(#(ChoiceParticle([], _), _)) + | Ok(#(ChoiceParticle([_], _), _)) + | Error(_) -> Error(Nil) + Ok(#(cps, doc)) -> Ok(#(cps, doc)) + }) + let doc = trim_space(doc) + case doc { + ")" <> tail -> Ok(#(cps, tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn do_parse_choice( + doc: String, + acc: List(ContentParticle), +) -> Result(#(ContentParticle, String), Nil) { + let doc = trim_space(doc) + case doc { + "|" <> tail -> { + let doc = trim_space(tail) + use #(cp, doc) <- result.try(parse_cp(doc)) + do_parse_choice(doc, [cp, ..acc]) + } + _ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc)) + } +} + +fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) { + case doc { + "(" <> tail -> { + let doc = trim_space(tail) + use #(cp, doc) <- result.try(parse_cp(doc)) + let #(cps, doc) = case do_parse_seq(doc, [cp]) { + Ok(#(cps, doc)) -> #(cps, doc) + Error(_) -> #(SeqParticle([cp], One), doc) + } + let doc = trim_space(doc) + case doc { + ")" <> tail -> Ok(#(cps, tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn do_parse_seq( + doc: String, + acc: List(ContentParticle), +) -> Result(#(ContentParticle, String), Nil) { + let doc = trim_space(doc) + case doc { + "," <> tail -> { + let doc = trim_space(tail) + use #(cp, doc) <- result.try(parse_cp(doc)) + do_parse_seq(doc, [cp, ..acc]) + } + _ -> Ok(#(SeqParticle(list.reverse(acc), One), doc)) + } +} + +fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) { + use #(el, doc) <- result.try(try_parsers( + [ + fn(doc) { + use #(name, doc) <- result.try(parse_name(doc)) + Ok(#(ElParticle(name, One), doc)) + }, + parse_choice, + parse_seq, + ], + doc, + )) + + case doc { + "?" <> tail -> { + Ok(#(set_optional(el, ZeroOrOne), tail)) + } + "*" <> tail -> { + Ok(#(set_optional(el, ZeroOrMore), tail)) + } + "+" <> tail -> { + Ok(#(set_optional(el, OneOrMore), tail)) + } + _ -> { + Ok(#(el, doc)) + } + } +} + +fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle { + case el { + ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:) + ElParticle(name, _) -> ElParticle(name:, optional:) + SeqParticle(seq, _) -> SeqParticle(seq:, optional:) + } +} + +fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) { + case doc { + "(" <> tail -> { + let doc = trim_space(tail) + case doc { + "#PCDATA" <> tail -> { + use #(els, doc) <- result.try(parse_mixed_elements(tail)) + let doc = trim_space(doc) + case els { + [] -> { + case doc { + ")*" <> tail -> + Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail)) + _ -> Error(Nil) + } + } + _ -> { + case doc { + ")" <> tail -> + Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail)) + _ -> Error(Nil) + } + } + } + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) { + Ok( + parse_multiple_to_list( + doc, + fn(doc) { + let doc = trim_space(doc) + case doc { + "|" <> tail -> { + let doc = trim_space(tail) + parse_name(doc) + } + _ -> Error(Nil) + } + }, + [], + ), + ) +} + fn get_entity_replacement( entity: String, decl_list: List(Declaration), @@ -537,10 +788,10 @@ fn get_entity_replacement( } fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) { - let doc = trim_space(doc) + use doc <- result.try(trim_mandatory_space(doc)) case doc { "SYSTEM" <> tail -> { - let doc = trim_space(tail) + use doc <- result.try(trim_mandatory_space(tail)) use #(system_literal, doc) <- result.try(parse_system_literal( doc, None, @@ -549,13 +800,13 @@ fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) Ok(#(Some(SystemID(system_literal:)), doc)) } "PUBLIC" <> tail -> { - let doc = trim_space(tail) + use doc <- result.try(trim_mandatory_space(tail)) use #(public_literal, doc) <- result.try(parse_public_literal( doc, None, "", )) - let doc = trim_space(doc) + use doc <- result.try(trim_mandatory_space(doc)) use #(system_literal, doc) <- result.try(parse_system_literal( doc, None, @@ -695,11 +946,11 @@ fn parse_misc(doc: String) -> String { try_parsers( [ parse_comment, + parse_pi, fn(doc) { parse_space(doc) |> result.map(fn(sp) { #(Whitespace, sp.1) }) }, - parse_pi, ], doc, ) @@ -729,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) { } fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) { - case trim_space(doc) { - "version=" <> tail -> { + case trim_mandatory_space(doc) { + Ok("version=" <> tail) -> { use #(version, doc) <- result.try(parse_version(tail)) Ok(#(version, doc)) } @@ -770,8 +1021,8 @@ fn do_parse_version( } fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) { - case trim_space(doc) { - "encoding=" <> tail -> { + case trim_mandatory_space(doc) { + Ok("encoding=" <> tail) -> { case tail { "\"" <> tail -> { use #(encoding, doc) <- result.try(parse_encoding(tail)) @@ -821,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) { } fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) { - case trim_space(doc) { - "standalone=\"yes\"" <> tail | "standalone='yes'" <> tail -> + case trim_mandatory_space(doc) { + Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) -> + Ok(#(True, tail)) + Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) -> Ok(#(True, tail)) - "standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail)) _ -> Error(Nil) } } @@ -1053,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) { } } +fn trim_mandatory_space(doc: String) -> Result(String, Nil) { + do_trim_mandatory_space(doc, True) +} + +fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) { + case parse_space(doc), first { + Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False) + Error(_), True -> Error(Nil) + Error(_), False -> Ok(doc) + } +} + fn trim_space(doc: String) -> String { case parse_space(doc) { Ok(#(_, doc)) -> trim_space(doc) @@ -1094,6 +1358,21 @@ fn parse_multiple( } } +fn parse_multiple_to_list( + to_parse str: String, + with to_run: fn(String) -> Result(#(String, String), Nil), + acc ret: List(String), +) -> #(List(String), String) { + case str { + "" -> #(list.reverse(ret), str) + _ -> + case to_run(str) { + Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret]) + Error(_) -> #(list.reverse(ret), str) + } + } +} + fn parse_multiple_optional( to_parse str: String, with to_run: fn(String) -> Result(#(String, String), Nil),