This commit is contained in:
321
src/glxml.gleam
321
src/glxml.gleam
@@ -6,15 +6,35 @@ import gleam/option.{type Option, None, Some}
|
||||
import gleam/result
|
||||
import gleam/string
|
||||
|
||||
pub type Content {
|
||||
Empty
|
||||
Any
|
||||
Mixed(content: List(String))
|
||||
Choice(content: ContentParticle)
|
||||
}
|
||||
|
||||
pub type Declaration {
|
||||
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
|
||||
GEntityDecl
|
||||
PEntityDecl(name: String, decl: String)
|
||||
ElementDecl
|
||||
ElementDecl(name: String, content: Content)
|
||||
AttListDecl
|
||||
NotationDecl
|
||||
}
|
||||
|
||||
pub type Optional {
|
||||
One
|
||||
OneOrMore
|
||||
ZeroOrMore
|
||||
ZeroOrOne
|
||||
}
|
||||
|
||||
pub type ContentParticle {
|
||||
ElParticle(name: String, optional: Optional)
|
||||
ChoiceParticle(choices: List(ContentParticle), optional: Optional)
|
||||
SeqParticle(seq: List(ContentParticle), optional: Optional)
|
||||
}
|
||||
|
||||
pub type Entity {
|
||||
InternalEntity(value: String)
|
||||
SystemExternalEntity(literal: String)
|
||||
@@ -58,14 +78,15 @@ pub type Element {
|
||||
|
||||
pub fn main() {
|
||||
parse_document(
|
||||
"<?xml version=\"1.1\"?>\r\n<!DOCTYPE el [\r\n<!ELEMENT div1 (head, (p | list | note)*, div2*)>\r\n]>",
|
||||
//"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b blah:test='1'><a attr='ha &#38;ha' battr='baba' ref='&'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
|
||||
// "<doc>
|
||||
"<doc>
|
||||
<A a=\"asdf>'">
|
||||
asdf
|
||||
?>%\"/>
|
||||
<A a='\"\">'"'/>
|
||||
</doc>",
|
||||
// "<doc>
|
||||
// <A a=\"asdf>'">
|
||||
// asdf
|
||||
// ?>%\"/>
|
||||
// <A a='\"\">'"'/>
|
||||
// </doc>",
|
||||
)
|
||||
|> echo
|
||||
}
|
||||
@@ -284,7 +305,7 @@ fn parse_attribute(
|
||||
doc: String,
|
||||
doctype: Option(DocType),
|
||||
) -> Result(#(Attribute, String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
use doc <- result.try(trim_mandatory_space(doc))
|
||||
use #(name, doc) <- result.try(parse_name(doc))
|
||||
case doc {
|
||||
"=" <> tail -> {
|
||||
@@ -461,7 +482,7 @@ fn parse_prolog(
|
||||
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
|
||||
case doc {
|
||||
"<!DOCTYPE" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use doc <- result.try(trim_mandatory_space(tail))
|
||||
use #(name, doc) <- result.try(parse_name(doc))
|
||||
|
||||
let #(external_id, doc) =
|
||||
@@ -518,10 +539,240 @@ fn do_parse_int_subset(
|
||||
}
|
||||
}
|
||||
_ -> {
|
||||
case
|
||||
try_parsers(
|
||||
[
|
||||
parse_elementdecl,
|
||||
],
|
||||
doc,
|
||||
)
|
||||
{
|
||||
Ok(#(decl, doc)) -> {
|
||||
do_parse_int_subset(doc, [decl, ..decl_list])
|
||||
}
|
||||
Error(_) -> Ok(#(list.reverse(decl_list), doc))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) {
|
||||
case doc {
|
||||
"<!ELEMENT" <> tail -> {
|
||||
use doc <- result.try(trim_mandatory_space(tail))
|
||||
use #(name, doc) <- result.try(parse_name(doc))
|
||||
use doc <- result.try(trim_mandatory_space(doc))
|
||||
case doc {
|
||||
"EMPTY" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
case doc {
|
||||
")" <> tail -> {
|
||||
Ok(#(ElementDecl(name, Empty), tail))
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
"ANY" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
case doc {
|
||||
")" <> tail -> {
|
||||
Ok(#(ElementDecl(name, Any), tail))
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
"(" <> _ -> {
|
||||
try_parsers([parse_mixed(name, _), parse_children(name, _)], doc)
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_children(
|
||||
name: String,
|
||||
doc: String,
|
||||
) -> Result(#(Declaration, String), Nil) {
|
||||
use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc))
|
||||
|
||||
let #(children, doc) = case doc {
|
||||
"?" <> tail -> {
|
||||
#(set_optional(children, ZeroOrOne), tail)
|
||||
}
|
||||
"*" <> tail -> {
|
||||
#(set_optional(children, ZeroOrMore), tail)
|
||||
}
|
||||
"+" <> tail -> {
|
||||
#(set_optional(children, OneOrMore), tail)
|
||||
}
|
||||
_ -> {
|
||||
#(children, doc)
|
||||
}
|
||||
}
|
||||
children |> echo
|
||||
doc |> echo
|
||||
todo
|
||||
}
|
||||
|
||||
fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) {
|
||||
case doc {
|
||||
"(" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(cp, doc) <- result.try(parse_cp(doc))
|
||||
use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) {
|
||||
Ok(#(ChoiceParticle([], _), _))
|
||||
| Ok(#(ChoiceParticle([_], _), _))
|
||||
| Error(_) -> Error(Nil)
|
||||
Ok(#(cps, doc)) -> Ok(#(cps, doc))
|
||||
})
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
")" <> tail -> Ok(#(cps, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn do_parse_choice(
|
||||
doc: String,
|
||||
acc: List(ContentParticle),
|
||||
) -> Result(#(ContentParticle, String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"|" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(cp, doc) <- result.try(parse_cp(doc))
|
||||
do_parse_choice(doc, [cp, ..acc])
|
||||
}
|
||||
_ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) {
|
||||
case doc {
|
||||
"(" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(cp, doc) <- result.try(parse_cp(doc))
|
||||
let #(cps, doc) = case do_parse_seq(doc, [cp]) {
|
||||
Ok(#(cps, doc)) -> #(cps, doc)
|
||||
Error(_) -> #(SeqParticle([cp], One), doc)
|
||||
}
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
")" <> tail -> Ok(#(cps, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn do_parse_seq(
|
||||
doc: String,
|
||||
acc: List(ContentParticle),
|
||||
) -> Result(#(ContentParticle, String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"," <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(cp, doc) <- result.try(parse_cp(doc))
|
||||
do_parse_seq(doc, [cp, ..acc])
|
||||
}
|
||||
_ -> Ok(#(SeqParticle(list.reverse(acc), One), doc))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) {
|
||||
use #(el, doc) <- result.try(try_parsers(
|
||||
[
|
||||
fn(doc) {
|
||||
use #(name, doc) <- result.try(parse_name(doc))
|
||||
Ok(#(ElParticle(name, One), doc))
|
||||
},
|
||||
parse_choice,
|
||||
parse_seq,
|
||||
],
|
||||
doc,
|
||||
))
|
||||
|
||||
case doc {
|
||||
"?" <> tail -> {
|
||||
Ok(#(set_optional(el, ZeroOrOne), tail))
|
||||
}
|
||||
"*" <> tail -> {
|
||||
Ok(#(set_optional(el, ZeroOrMore), tail))
|
||||
}
|
||||
"+" <> tail -> {
|
||||
Ok(#(set_optional(el, OneOrMore), tail))
|
||||
}
|
||||
_ -> {
|
||||
Ok(#(el, doc))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle {
|
||||
case el {
|
||||
ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:)
|
||||
ElParticle(name, _) -> ElParticle(name:, optional:)
|
||||
SeqParticle(seq, _) -> SeqParticle(seq:, optional:)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) {
|
||||
case doc {
|
||||
"(" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
case doc {
|
||||
"#PCDATA" <> tail -> {
|
||||
use #(els, doc) <- result.try(parse_mixed_elements(tail))
|
||||
let doc = trim_space(doc)
|
||||
case els {
|
||||
[] -> {
|
||||
case doc {
|
||||
")*" <> tail ->
|
||||
Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> {
|
||||
case doc {
|
||||
")" <> tail ->
|
||||
Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) {
|
||||
Ok(
|
||||
parse_multiple_to_list(
|
||||
doc,
|
||||
fn(doc) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"|" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
parse_name(doc)
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
},
|
||||
[],
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
fn get_entity_replacement(
|
||||
entity: String,
|
||||
@@ -537,10 +788,10 @@ fn get_entity_replacement(
|
||||
}
|
||||
|
||||
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
use doc <- result.try(trim_mandatory_space(doc))
|
||||
case doc {
|
||||
"SYSTEM" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use doc <- result.try(trim_mandatory_space(tail))
|
||||
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||
doc,
|
||||
None,
|
||||
@@ -549,13 +800,13 @@ fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil)
|
||||
Ok(#(Some(SystemID(system_literal:)), doc))
|
||||
}
|
||||
"PUBLIC" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use doc <- result.try(trim_mandatory_space(tail))
|
||||
use #(public_literal, doc) <- result.try(parse_public_literal(
|
||||
doc,
|
||||
None,
|
||||
"",
|
||||
))
|
||||
let doc = trim_space(doc)
|
||||
use doc <- result.try(trim_mandatory_space(doc))
|
||||
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||
doc,
|
||||
None,
|
||||
@@ -695,11 +946,11 @@ fn parse_misc(doc: String) -> String {
|
||||
try_parsers(
|
||||
[
|
||||
parse_comment,
|
||||
parse_pi,
|
||||
fn(doc) {
|
||||
parse_space(doc)
|
||||
|> result.map(fn(sp) { #(Whitespace, sp.1) })
|
||||
},
|
||||
parse_pi,
|
||||
],
|
||||
doc,
|
||||
)
|
||||
@@ -729,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
|
||||
}
|
||||
|
||||
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
|
||||
case trim_space(doc) {
|
||||
"version=" <> tail -> {
|
||||
case trim_mandatory_space(doc) {
|
||||
Ok("version=" <> tail) -> {
|
||||
use #(version, doc) <- result.try(parse_version(tail))
|
||||
Ok(#(version, doc))
|
||||
}
|
||||
@@ -770,8 +1021,8 @@ fn do_parse_version(
|
||||
}
|
||||
|
||||
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
|
||||
case trim_space(doc) {
|
||||
"encoding=" <> tail -> {
|
||||
case trim_mandatory_space(doc) {
|
||||
Ok("encoding=" <> tail) -> {
|
||||
case tail {
|
||||
"\"" <> tail -> {
|
||||
use #(encoding, doc) <- result.try(parse_encoding(tail))
|
||||
@@ -821,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
|
||||
}
|
||||
|
||||
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
|
||||
case trim_space(doc) {
|
||||
"standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
|
||||
case trim_mandatory_space(doc) {
|
||||
Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) ->
|
||||
Ok(#(True, tail))
|
||||
Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) ->
|
||||
Ok(#(True, tail))
|
||||
"standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
@@ -1053,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_mandatory_space(doc: String) -> Result(String, Nil) {
|
||||
do_trim_mandatory_space(doc, True)
|
||||
}
|
||||
|
||||
fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) {
|
||||
case parse_space(doc), first {
|
||||
Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False)
|
||||
Error(_), True -> Error(Nil)
|
||||
Error(_), False -> Ok(doc)
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_space(doc: String) -> String {
|
||||
case parse_space(doc) {
|
||||
Ok(#(_, doc)) -> trim_space(doc)
|
||||
@@ -1094,6 +1358,21 @@ fn parse_multiple(
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_multiple_to_list(
|
||||
to_parse str: String,
|
||||
with to_run: fn(String) -> Result(#(String, String), Nil),
|
||||
acc ret: List(String),
|
||||
) -> #(List(String), String) {
|
||||
case str {
|
||||
"" -> #(list.reverse(ret), str)
|
||||
_ ->
|
||||
case to_run(str) {
|
||||
Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret])
|
||||
Error(_) -> #(list.reverse(ret), str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_multiple_optional(
|
||||
to_parse str: String,
|
||||
with to_run: fn(String) -> Result(#(String, String), Nil),
|
||||
|
||||
Reference in New Issue
Block a user