diff --git a/src/glxml.gleam b/src/glxml.gleam
index dbed3b1..7b270eb 100644
--- a/src/glxml.gleam
+++ b/src/glxml.gleam
@@ -6,15 +6,35 @@ import gleam/option.{type Option, None, Some}
import gleam/result
import gleam/string
+pub type Content {
+ Empty
+ Any
+ Mixed(content: List(String))
+ Choice(content: ContentParticle)
+}
+
pub type Declaration {
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
GEntityDecl
PEntityDecl(name: String, decl: String)
- ElementDecl
+ ElementDecl(name: String, content: Content)
AttListDecl
NotationDecl
}
+pub type Optional {
+ One
+ OneOrMore
+ ZeroOrMore
+ ZeroOrOne
+}
+
+pub type ContentParticle {
+ ElParticle(name: String, optional: Optional)
+ ChoiceParticle(choices: List(ContentParticle), optional: Optional)
+ SeqParticle(seq: List(ContentParticle), optional: Optional)
+}
+
pub type Entity {
InternalEntity(value: String)
SystemExternalEntity(literal: String)
@@ -58,14 +78,15 @@ pub type Element {
pub fn main() {
parse_document(
+ "\r\n\r\n]>",
//"\r\n \n]]>",
- // "
- "
-'">
-asdf
- ?>%\"/>
-
-",
+ // "
+ // "
+ // '">
+ // asdf
+ // ?>%\"/>
+ //
+ // ",
)
|> echo
}
@@ -284,7 +305,7 @@ fn parse_attribute(
doc: String,
doctype: Option(DocType),
) -> Result(#(Attribute, String), Nil) {
- let doc = trim_space(doc)
+ use doc <- result.try(trim_mandatory_space(doc))
use #(name, doc) <- result.try(parse_name(doc))
case doc {
"=" <> tail -> {
@@ -461,7 +482,7 @@ fn parse_prolog(
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
case doc {
" tail -> {
- let doc = trim_space(tail)
+ use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
let #(external_id, doc) =
@@ -518,11 +539,241 @@ fn do_parse_int_subset(
}
}
_ -> {
- todo
+ case
+ try_parsers(
+ [
+ parse_elementdecl,
+ ],
+ doc,
+ )
+ {
+ Ok(#(decl, doc)) -> {
+ do_parse_int_subset(doc, [decl, ..decl_list])
+ }
+ Error(_) -> Ok(#(list.reverse(decl_list), doc))
+ }
}
}
}
+fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) {
+ case doc {
+ " tail -> {
+ use doc <- result.try(trim_mandatory_space(tail))
+ use #(name, doc) <- result.try(parse_name(doc))
+ use doc <- result.try(trim_mandatory_space(doc))
+ case doc {
+ "EMPTY" <> tail -> {
+ let doc = trim_space(tail)
+ case doc {
+ ")" <> tail -> {
+ Ok(#(ElementDecl(name, Empty), tail))
+ }
+ _ -> Error(Nil)
+ }
+ }
+ "ANY" <> tail -> {
+ let doc = trim_space(tail)
+ case doc {
+ ")" <> tail -> {
+ Ok(#(ElementDecl(name, Any), tail))
+ }
+ _ -> Error(Nil)
+ }
+ }
+ "(" <> _ -> {
+ try_parsers([parse_mixed(name, _), parse_children(name, _)], doc)
+ }
+ _ -> Error(Nil)
+ }
+ }
+ _ -> Error(Nil)
+ }
+}
+
+fn parse_children(
+ name: String,
+ doc: String,
+) -> Result(#(Declaration, String), Nil) {
+ use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc))
+
+ let #(children, doc) = case doc {
+ "?" <> tail -> {
+ #(set_optional(children, ZeroOrOne), tail)
+ }
+ "*" <> tail -> {
+ #(set_optional(children, ZeroOrMore), tail)
+ }
+ "+" <> tail -> {
+ #(set_optional(children, OneOrMore), tail)
+ }
+ _ -> {
+ #(children, doc)
+ }
+ }
+ children |> echo
+ doc |> echo
+ todo
+}
+
+fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) {
+ case doc {
+ "(" <> tail -> {
+ let doc = trim_space(tail)
+ use #(cp, doc) <- result.try(parse_cp(doc))
+ use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) {
+ Ok(#(ChoiceParticle([], _), _))
+ | Ok(#(ChoiceParticle([_], _), _))
+ | Error(_) -> Error(Nil)
+ Ok(#(cps, doc)) -> Ok(#(cps, doc))
+ })
+ let doc = trim_space(doc)
+ case doc {
+ ")" <> tail -> Ok(#(cps, tail))
+ _ -> Error(Nil)
+ }
+ }
+ _ -> Error(Nil)
+ }
+}
+
+fn do_parse_choice(
+ doc: String,
+ acc: List(ContentParticle),
+) -> Result(#(ContentParticle, String), Nil) {
+ let doc = trim_space(doc)
+ case doc {
+ "|" <> tail -> {
+ let doc = trim_space(tail)
+ use #(cp, doc) <- result.try(parse_cp(doc))
+ do_parse_choice(doc, [cp, ..acc])
+ }
+ _ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc))
+ }
+}
+
+fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) {
+ case doc {
+ "(" <> tail -> {
+ let doc = trim_space(tail)
+ use #(cp, doc) <- result.try(parse_cp(doc))
+ let #(cps, doc) = case do_parse_seq(doc, [cp]) {
+ Ok(#(cps, doc)) -> #(cps, doc)
+ Error(_) -> #(SeqParticle([cp], One), doc)
+ }
+ let doc = trim_space(doc)
+ case doc {
+ ")" <> tail -> Ok(#(cps, tail))
+ _ -> Error(Nil)
+ }
+ }
+ _ -> Error(Nil)
+ }
+}
+
+fn do_parse_seq(
+ doc: String,
+ acc: List(ContentParticle),
+) -> Result(#(ContentParticle, String), Nil) {
+ let doc = trim_space(doc)
+ case doc {
+ "," <> tail -> {
+ let doc = trim_space(tail)
+ use #(cp, doc) <- result.try(parse_cp(doc))
+ do_parse_seq(doc, [cp, ..acc])
+ }
+ _ -> Ok(#(SeqParticle(list.reverse(acc), One), doc))
+ }
+}
+
+fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) {
+ use #(el, doc) <- result.try(try_parsers(
+ [
+ fn(doc) {
+ use #(name, doc) <- result.try(parse_name(doc))
+ Ok(#(ElParticle(name, One), doc))
+ },
+ parse_choice,
+ parse_seq,
+ ],
+ doc,
+ ))
+
+ case doc {
+ "?" <> tail -> {
+ Ok(#(set_optional(el, ZeroOrOne), tail))
+ }
+ "*" <> tail -> {
+ Ok(#(set_optional(el, ZeroOrMore), tail))
+ }
+ "+" <> tail -> {
+ Ok(#(set_optional(el, OneOrMore), tail))
+ }
+ _ -> {
+ Ok(#(el, doc))
+ }
+ }
+}
+
+fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle {
+ case el {
+ ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:)
+ ElParticle(name, _) -> ElParticle(name:, optional:)
+ SeqParticle(seq, _) -> SeqParticle(seq:, optional:)
+ }
+}
+
+fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) {
+ case doc {
+ "(" <> tail -> {
+ let doc = trim_space(tail)
+ case doc {
+ "#PCDATA" <> tail -> {
+ use #(els, doc) <- result.try(parse_mixed_elements(tail))
+ let doc = trim_space(doc)
+ case els {
+ [] -> {
+ case doc {
+ ")*" <> tail ->
+ Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail))
+ _ -> Error(Nil)
+ }
+ }
+ _ -> {
+ case doc {
+ ")" <> tail ->
+ Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail))
+ _ -> Error(Nil)
+ }
+ }
+ }
+ }
+ _ -> Error(Nil)
+ }
+ }
+ _ -> Error(Nil)
+ }
+}
+
+fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) {
+ Ok(
+ parse_multiple_to_list(
+ doc,
+ fn(doc) {
+ let doc = trim_space(doc)
+ case doc {
+ "|" <> tail -> {
+ let doc = trim_space(tail)
+ parse_name(doc)
+ }
+ _ -> Error(Nil)
+ }
+ },
+ [],
+ ),
+ )
+}
+
fn get_entity_replacement(
entity: String,
decl_list: List(Declaration),
@@ -537,10 +788,10 @@ fn get_entity_replacement(
}
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
- let doc = trim_space(doc)
+ use doc <- result.try(trim_mandatory_space(doc))
case doc {
"SYSTEM" <> tail -> {
- let doc = trim_space(tail)
+ use doc <- result.try(trim_mandatory_space(tail))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
@@ -549,13 +800,13 @@ fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil)
Ok(#(Some(SystemID(system_literal:)), doc))
}
"PUBLIC" <> tail -> {
- let doc = trim_space(tail)
+ use doc <- result.try(trim_mandatory_space(tail))
use #(public_literal, doc) <- result.try(parse_public_literal(
doc,
None,
"",
))
- let doc = trim_space(doc)
+ use doc <- result.try(trim_mandatory_space(doc))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
@@ -695,11 +946,11 @@ fn parse_misc(doc: String) -> String {
try_parsers(
[
parse_comment,
+ parse_pi,
fn(doc) {
parse_space(doc)
|> result.map(fn(sp) { #(Whitespace, sp.1) })
},
- parse_pi,
],
doc,
)
@@ -729,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
}
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
- case trim_space(doc) {
- "version=" <> tail -> {
+ case trim_mandatory_space(doc) {
+ Ok("version=" <> tail) -> {
use #(version, doc) <- result.try(parse_version(tail))
Ok(#(version, doc))
}
@@ -770,8 +1021,8 @@ fn do_parse_version(
}
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
- case trim_space(doc) {
- "encoding=" <> tail -> {
+ case trim_mandatory_space(doc) {
+ Ok("encoding=" <> tail) -> {
case tail {
"\"" <> tail -> {
use #(encoding, doc) <- result.try(parse_encoding(tail))
@@ -821,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
}
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
- case trim_space(doc) {
- "standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
+ case trim_mandatory_space(doc) {
+ Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) ->
+ Ok(#(True, tail))
+ Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) ->
Ok(#(True, tail))
- "standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
_ -> Error(Nil)
}
}
@@ -1053,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
}
}
+fn trim_mandatory_space(doc: String) -> Result(String, Nil) {
+ do_trim_mandatory_space(doc, True)
+}
+
+fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) {
+ case parse_space(doc), first {
+ Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False)
+ Error(_), True -> Error(Nil)
+ Error(_), False -> Ok(doc)
+ }
+}
+
fn trim_space(doc: String) -> String {
case parse_space(doc) {
Ok(#(_, doc)) -> trim_space(doc)
@@ -1094,6 +1358,21 @@ fn parse_multiple(
}
}
+fn parse_multiple_to_list(
+ to_parse str: String,
+ with to_run: fn(String) -> Result(#(String, String), Nil),
+ acc ret: List(String),
+) -> #(List(String), String) {
+ case str {
+ "" -> #(list.reverse(ret), str)
+ _ ->
+ case to_run(str) {
+ Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret])
+ Error(_) -> #(list.reverse(ret), str)
+ }
+ }
+}
+
fn parse_multiple_optional(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),