Compare commits

...

10 Commits

Author SHA1 Message Date
15c6062d04 feat: Start work on !ELEMENT
Some checks failed
test / test (push) Has been cancelled
2025-10-15 22:54:21 +01:00
464ad513b6 test: Add xml tests
Some checks failed
test / test (push) Has been cancelled
2025-10-10 21:28:18 +01:00
336286c7cf feat: Work on declarations
Some checks failed
test / test (push) Has been cancelled
2025-10-10 21:27:44 +01:00
1e5227f60c fix: Initial chardata was being thrown away
Some checks failed
test / test (push) Has been cancelled
2025-10-09 22:20:18 +01:00
489590bdac feat: Added PI Parsing
Some checks failed
test / test (push) Has been cancelled
2025-10-09 19:20:02 +01:00
963f44dbeb feat: Added CData parsing
Some checks failed
test / test (push) Has been cancelled
2025-10-09 18:50:00 +01:00
43d0638fd8 feat: Added comment parsing in elements
Some checks failed
test / test (push) Has been cancelled
2025-10-09 18:32:12 +01:00
8e0e6f988a feat: Further element parsing
Some checks failed
test / test (push) Has been cancelled
2025-10-09 18:18:16 +01:00
0ea34f11f3 refactor: Minor tidy up
Some checks failed
test / test (push) Has been cancelled
2025-10-09 16:12:38 +01:00
1c6e76bace docs: gleam.toml changes 2025-10-09 16:12:16 +01:00
373 changed files with 4001 additions and 41 deletions

View File

@@ -4,9 +4,9 @@ version = "1.0.0"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.
#
# description = ""
# licences = ["Apache-2.0"]
# repository = { type = "github", user = "", repo = "" }
description = "Gleam XML parser"
licences = ["Apache-2.0"]
repository = { type = "gitea", host = "git.pendleton.ie", user = "pendletong", repo = "glxml" }
# links = [{ title = "Website", href = "" }]
#
# For a full reference of all the available options, you can have a look at

View File

@@ -6,8 +6,33 @@ import gleam/option.{type Option, None, Some}
import gleam/result
import gleam/string
pub type Content {
Empty
Any
Mixed(content: List(String))
Choice(content: ContentParticle)
}
pub type Declaration {
Declaration(versioninfo: String, encoding: String, standalone: Bool)
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
GEntityDecl
PEntityDecl(name: String, decl: String)
ElementDecl(name: String, content: Content)
AttListDecl
NotationDecl
}
pub type Optional {
One
OneOrMore
ZeroOrMore
ZeroOrOne
}
pub type ContentParticle {
ElParticle(name: String, optional: Optional)
ChoiceParticle(choices: List(ContentParticle), optional: Optional)
SeqParticle(seq: List(ContentParticle), optional: Optional)
}
pub type Entity {
@@ -16,8 +41,17 @@ pub type Entity {
PublicExternalEntity(literal: String, pubidliteral: String)
}
pub type ExternalID {
SystemID(system_literal: String)
PublicID(system_literal: String, public_literal: String)
}
pub type DocType {
DocType(name: String, entities: dict.Dict(String, Entity))
DocType(
name: String,
external_id: Option(ExternalID),
entities: dict.Dict(String, Entity),
)
}
pub type Document {
@@ -35,16 +69,29 @@ pub type Attribute {
pub type Element {
EmptyElem(name: String, attrs: List(Attribute))
Element(name: String, attrs: List(Attribute), elements: List(Element))
Text(content: String)
Comment(content: String)
CData(content: String)
PI(name: String, content: String)
Whitespace
}
pub fn main() {
parse_document(
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/>",
"<?xml version=\"1.1\"?>\r\n<!DOCTYPE el [\r\n<!ELEMENT div1 (head, (p | list | note)*, div2*)>\r\n]>",
//"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n<b blah:test='1'><a attr='ha&#x20;&#38;#38;ha' battr='baba' ref='&amp;'/><!-- ma comment --><![CDATA[Testing&&<haha>]]><?test asuhashd ?></b>",
// "<doc>
// "<doc>
// <A a=\"asdf>'&#34;>
// asdf
// ?>%\"/>
// <A a='\"\">&#39;&#34;'/>
// </doc>",
)
|> echo
}
pub fn default_entities() -> dict.Dict(String, Entity) {
fn default_entities() -> dict.Dict(String, Entity) {
dict.from_list([
#("lt", InternalEntity("&#60;")),
#("gt", InternalEntity("&#62;")),
@@ -70,7 +117,10 @@ fn parse_element(
doc: String,
doctype: Option(DocType),
) -> Result(#(Element, String), Nil) {
try_parsers([parse_empty_elem(_, doctype)], doc)
try_parsers(
[parse_empty_elem(_, doctype), parse_tagged_elem(_, doctype)],
doc,
)
}
fn parse_empty_elem(
@@ -91,6 +141,155 @@ fn parse_empty_elem(
}
}
fn parse_tagged_elem(
doc: String,
doctype: Option(DocType),
) -> Result(#(Element, String), Nil) {
case doc {
"<" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
let doc = trim_space(doc)
case doc {
">" <> tail -> {
use #(content, doc) <- result.try(parse_content(tail, doctype, []))
case doc {
"</" <> tail -> {
use #(close_name, doc) <- result.try(parse_name(tail))
let doc = trim_space(doc)
case doc {
">" <> tail -> {
use <- bool.guard(
when: name != close_name,
return: Error(Nil),
)
Ok(#(Element(name, attrs, content), tail))
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_content(
doc: String,
doctype: Option(DocType),
content: List(Element),
) -> Result(#(List(Element), String), Nil) {
use #(chardata, doc) <- result.try(parse_chardata(doc, doctype, ""))
let content = case chardata {
"" -> content
_ -> [Text(chardata), ..content]
}
case doc {
"<" <> _ -> {
case
try_parsers(
[
parse_element(_, doctype),
parse_comment,
parse_cdata,
parse_pi,
],
doc,
)
{
Ok(#(element, doc)) -> parse_content(doc, doctype, [element, ..content])
Error(_) -> Ok(#(list.reverse(content), doc))
}
}
_ -> Ok(#(list.reverse(content), doc))
}
}
fn parse_pi(doc: String) -> Result(#(Element, String), Nil) {
case doc {
"<?" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
use <- bool.guard(
when: string.lowercase(name) == "xml",
return: Error(Nil),
)
let doc = trim_space(doc)
use #(content, doc) <- result.try(parse_pi_content(doc, ""))
case doc {
"?>" <> tail -> Ok(#(PI(name, content), tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_pi_content(doc: String, pi: String) -> Result(#(String, String), Nil) {
case doc {
"?>" <> _ -> Ok(#(pi, doc))
"" -> Error(Nil)
_ -> {
case parse_char(doc) {
Ok(#(char, doc)) -> parse_pi_content(doc, pi <> char)
Error(_) -> Ok(#(pi, doc))
}
}
}
}
fn parse_cdata(doc: String) -> Result(#(Element, String), Nil) {
case doc {
"<![CDATA[" <> tail -> {
use #(cdata, doc) <- result.try(do_parse_cdata(tail, ""))
Ok(#(CData(cdata), doc))
}
_ -> Error(Nil)
}
}
fn do_parse_cdata(doc: String, cdata: String) -> Result(#(String, String), Nil) {
case doc {
"]]>" <> tail -> Ok(#(cdata, tail))
"" -> Error(Nil)
_ -> {
case parse_char(doc) {
Ok(#(char, doc)) -> do_parse_cdata(doc, cdata <> char)
_ -> Error(Nil)
}
}
}
}
fn parse_chardata(
doc: String,
doctype: Option(DocType),
chardata: String,
) -> Result(#(String, String), Nil) {
case doc {
"]]>" <> _ -> Error(Nil)
"<" <> _ -> Ok(#(chardata, doc))
"&" <> _ -> {
use #(refval, doc) <- result.try(parse_reference(doc, doctype))
parse_chardata(doc, doctype, chardata <> refval)
}
"" -> Ok(#("", ""))
_ -> {
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
parse_chardata(tail, doctype, chardata <> char)
}
}
}
fn parse_attributes(
doc: String,
doctype: Option(DocType),
@@ -106,7 +305,7 @@ fn parse_attribute(
doc: String,
doctype: Option(DocType),
) -> Result(#(Attribute, String), Nil) {
let doc = trim_space(doc)
use doc <- result.try(trim_mandatory_space(doc))
use #(name, doc) <- result.try(parse_name(doc))
case doc {
"=" <> tail -> {
@@ -203,7 +402,7 @@ fn parse_reference(
}
}
}
"&" as char <> tail -> {
"&" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
case doc {
@@ -223,7 +422,7 @@ fn process_reference(
doctype: Option(DocType),
) -> Result(String, Nil) {
case doctype {
Some(DocType(_, entities)) -> {
Some(DocType(_, _, entities)) -> {
get_reference(entities, ref)
}
None -> {
@@ -267,41 +466,512 @@ fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
fn parse_prolog(
doc: String,
) -> Result(#(Declaration, Option(DocType), String), Nil) {
let #(decl, doc) = case parse_decl(doc) {
Ok(#(decl, doc)) -> #(decl, doc)
_ -> #(Declaration("1.0", "UTF-8", False), doc)
}
let #(decl, doc) =
parse_decl(doc) |> result.unwrap(#(XMLDecl("1.0", "UTF-8", False), doc))
let doc = parse_misc(doc)
Ok(#(decl, None, doc))
let #(doctype, doc) =
parse_doctype(doc)
|> result.map(fn(d) { #(Some(d.0), d.1) })
|> result.unwrap(#(None, doc))
Ok(#(decl, doctype, doc))
}
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
case doc {
"<!DOCTYPE" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
let #(external_id, doc) =
parse_external_id(doc) |> result.unwrap(#(None, doc))
let doc = trim_space(doc)
let #(int_subset, doc) =
parse_int_subset(doc) |> result.unwrap(#([], doc))
case doc {
">" <> tail -> Ok(#(DocType(name, external_id, dict.new()), tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_int_subset(doc: String) -> Result(#(List(Declaration), String), Nil) {
let doc = trim_space(doc)
case doc {
"[" <> tail -> {
use #(decl_list, doc) <- result.try(do_parse_int_subset(tail, []))
case doc {
"]" <> tail -> {
Ok(#(decl_list, tail))
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_int_subset(
doc: String,
decl_list: List(Declaration),
) -> Result(#(List(Declaration), String), Nil) {
let doc = trim_space(doc)
case doc {
"%" <> tail -> {
use #(name, doc) <- result.try(parse_name(tail))
case doc {
";" <> tail -> {
case get_entity_replacement(name, decl_list) {
Some(decl) -> {
do_parse_int_subset(decl <> tail, decl_list)
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
_ -> {
case
try_parsers(
[
parse_elementdecl,
],
doc,
)
{
Ok(#(decl, doc)) -> {
do_parse_int_subset(doc, [decl, ..decl_list])
}
Error(_) -> Ok(#(list.reverse(decl_list), doc))
}
}
}
}
fn parse_elementdecl(doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"<!ELEMENT" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(name, doc) <- result.try(parse_name(doc))
use doc <- result.try(trim_mandatory_space(doc))
case doc {
"EMPTY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Empty), tail))
}
_ -> Error(Nil)
}
}
"ANY" <> tail -> {
let doc = trim_space(tail)
case doc {
")" <> tail -> {
Ok(#(ElementDecl(name, Any), tail))
}
_ -> Error(Nil)
}
}
"(" <> _ -> {
try_parsers([parse_mixed(name, _), parse_children(name, _)], doc)
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_children(
name: String,
doc: String,
) -> Result(#(Declaration, String), Nil) {
use #(children, doc) <- result.try(try_parsers([parse_choice, parse_seq], doc))
let #(children, doc) = case doc {
"?" <> tail -> {
#(set_optional(children, ZeroOrOne), tail)
}
"*" <> tail -> {
#(set_optional(children, ZeroOrMore), tail)
}
"+" <> tail -> {
#(set_optional(children, OneOrMore), tail)
}
_ -> {
#(children, doc)
}
}
children |> echo
doc |> echo
todo
}
fn parse_choice(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
use #(cps, doc) <- result.try(case do_parse_choice(doc, [cp]) {
Ok(#(ChoiceParticle([], _), _))
| Ok(#(ChoiceParticle([_], _), _))
| Error(_) -> Error(Nil)
Ok(#(cps, doc)) -> Ok(#(cps, doc))
})
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_choice(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_choice(doc, [cp, ..acc])
}
_ -> Ok(#(ChoiceParticle(list.reverse(acc), One), doc))
}
}
fn parse_seq(doc: String) -> Result(#(ContentParticle, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
let #(cps, doc) = case do_parse_seq(doc, [cp]) {
Ok(#(cps, doc)) -> #(cps, doc)
Error(_) -> #(SeqParticle([cp], One), doc)
}
let doc = trim_space(doc)
case doc {
")" <> tail -> Ok(#(cps, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_seq(
doc: String,
acc: List(ContentParticle),
) -> Result(#(ContentParticle, String), Nil) {
let doc = trim_space(doc)
case doc {
"," <> tail -> {
let doc = trim_space(tail)
use #(cp, doc) <- result.try(parse_cp(doc))
do_parse_seq(doc, [cp, ..acc])
}
_ -> Ok(#(SeqParticle(list.reverse(acc), One), doc))
}
}
fn parse_cp(doc: String) -> Result(#(ContentParticle, String), Nil) {
use #(el, doc) <- result.try(try_parsers(
[
fn(doc) {
use #(name, doc) <- result.try(parse_name(doc))
Ok(#(ElParticle(name, One), doc))
},
parse_choice,
parse_seq,
],
doc,
))
case doc {
"?" <> tail -> {
Ok(#(set_optional(el, ZeroOrOne), tail))
}
"*" <> tail -> {
Ok(#(set_optional(el, ZeroOrMore), tail))
}
"+" <> tail -> {
Ok(#(set_optional(el, OneOrMore), tail))
}
_ -> {
Ok(#(el, doc))
}
}
}
fn set_optional(el: ContentParticle, optional: Optional) -> ContentParticle {
case el {
ChoiceParticle(choices, _) -> ChoiceParticle(choices:, optional:)
ElParticle(name, _) -> ElParticle(name:, optional:)
SeqParticle(seq, _) -> SeqParticle(seq:, optional:)
}
}
fn parse_mixed(name: String, doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"(" <> tail -> {
let doc = trim_space(tail)
case doc {
"#PCDATA" <> tail -> {
use #(els, doc) <- result.try(parse_mixed_elements(tail))
let doc = trim_space(doc)
case els {
[] -> {
case doc {
")*" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA"])), tail))
_ -> Error(Nil)
}
}
_ -> {
case doc {
")" <> tail ->
Ok(#(ElementDecl(name, Mixed(["#PCDATA", ..els])), tail))
_ -> Error(Nil)
}
}
}
}
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn parse_mixed_elements(doc: String) -> Result(#(List(String), String), Nil) {
Ok(
parse_multiple_to_list(
doc,
fn(doc) {
let doc = trim_space(doc)
case doc {
"|" <> tail -> {
let doc = trim_space(tail)
parse_name(doc)
}
_ -> Error(Nil)
}
},
[],
),
)
}
fn get_entity_replacement(
entity: String,
decl_list: List(Declaration),
) -> Option(String) {
list.find_map(decl_list, fn(decl) {
case decl {
PEntityDecl(name, decl) if name == entity -> Ok(Some(decl))
_ -> Error(Nil)
}
})
|> result.unwrap(None)
}
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
use doc <- result.try(trim_mandatory_space(doc))
case doc {
"SYSTEM" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
"",
))
Ok(#(Some(SystemID(system_literal:)), doc))
}
"PUBLIC" <> tail -> {
use doc <- result.try(trim_mandatory_space(tail))
use #(public_literal, doc) <- result.try(parse_public_literal(
doc,
None,
"",
))
use doc <- result.try(trim_mandatory_space(doc))
use #(system_literal, doc) <- result.try(parse_system_literal(
doc,
None,
"",
))
Ok(#(Some(PublicID(system_literal:, public_literal:)), doc))
}
_ -> Error(Nil)
}
}
fn parse_public_literal(
doc: String,
quote: Option(String),
literal: String,
) -> Result(#(String, String), Nil) {
case doc, quote {
"\"" as q <> tail, None | "'" as q <> tail, None ->
parse_public_literal(tail, Some(q), "")
"", _ -> Error(Nil)
_, None -> Error(Nil)
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
"'" <> tail, Some("'") -> Ok(#(literal, tail))
" " as char <> tail, Some(_)
| "\r" as char <> tail, Some(_)
| "\n" as char <> tail, Some(_)
| "0" as char <> tail, Some(_)
| "1" as char <> tail, Some(_)
| "2" as char <> tail, Some(_)
| "3" as char <> tail, Some(_)
| "4" as char <> tail, Some(_)
| "5" as char <> tail, Some(_)
| "6" as char <> tail, Some(_)
| "7" as char <> tail, Some(_)
| "8" as char <> tail, Some(_)
| "9" as char <> tail, Some(_)
| "a" as char <> tail, Some(_)
| "b" as char <> tail, Some(_)
| "c" as char <> tail, Some(_)
| "d" as char <> tail, Some(_)
| "e" as char <> tail, Some(_)
| "f" as char <> tail, Some(_)
| "g" as char <> tail, Some(_)
| "h" as char <> tail, Some(_)
| "i" as char <> tail, Some(_)
| "j" as char <> tail, Some(_)
| "k" as char <> tail, Some(_)
| "l" as char <> tail, Some(_)
| "m" as char <> tail, Some(_)
| "n" as char <> tail, Some(_)
| "o" as char <> tail, Some(_)
| "p" as char <> tail, Some(_)
| "q" as char <> tail, Some(_)
| "r" as char <> tail, Some(_)
| "s" as char <> tail, Some(_)
| "t" as char <> tail, Some(_)
| "u" as char <> tail, Some(_)
| "v" as char <> tail, Some(_)
| "w" as char <> tail, Some(_)
| "x" as char <> tail, Some(_)
| "y" as char <> tail, Some(_)
| "z" as char <> tail, Some(_)
| "A" as char <> tail, Some(_)
| "B" as char <> tail, Some(_)
| "C" as char <> tail, Some(_)
| "D" as char <> tail, Some(_)
| "E" as char <> tail, Some(_)
| "F" as char <> tail, Some(_)
| "G" as char <> tail, Some(_)
| "H" as char <> tail, Some(_)
| "I" as char <> tail, Some(_)
| "J" as char <> tail, Some(_)
| "K" as char <> tail, Some(_)
| "L" as char <> tail, Some(_)
| "M" as char <> tail, Some(_)
| "N" as char <> tail, Some(_)
| "O" as char <> tail, Some(_)
| "P" as char <> tail, Some(_)
| "Q" as char <> tail, Some(_)
| "R" as char <> tail, Some(_)
| "S" as char <> tail, Some(_)
| "T" as char <> tail, Some(_)
| "U" as char <> tail, Some(_)
| "V" as char <> tail, Some(_)
| "W" as char <> tail, Some(_)
| "X" as char <> tail, Some(_)
| "Y" as char <> tail, Some(_)
| "Z" as char <> tail, Some(_)
| "-" as char <> tail, Some(_)
| "(" as char <> tail, Some(_)
| ")" as char <> tail, Some(_)
| "+" as char <> tail, Some(_)
| "," as char <> tail, Some(_)
| "." as char <> tail, Some(_)
| "/" as char <> tail, Some(_)
| ":" as char <> tail, Some(_)
| "=" as char <> tail, Some(_)
| "?" as char <> tail, Some(_)
| ";" as char <> tail, Some(_)
| "!" as char <> tail, Some(_)
| "*" as char <> tail, Some(_)
| "#" as char <> tail, Some(_)
| "@" as char <> tail, Some(_)
| "$" as char <> tail, Some(_)
| "_" as char <> tail, Some(_)
| "%" as char <> tail, Some(_)
| "'" as char <> tail, Some("\"")
-> {
parse_public_literal(tail, quote, literal <> char)
}
_, _ -> Error(Nil)
}
}
fn parse_system_literal(
doc: String,
quote: Option(String),
literal: String,
) -> Result(#(String, String), Nil) {
case doc, quote {
"\"" as q <> tail, None | "'" as q <> tail, None ->
parse_system_literal(tail, Some(q), "")
"", _ -> Error(Nil)
_, None -> Error(Nil)
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
"'" <> tail, Some("'") -> Ok(#(literal, tail))
_, _ -> {
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
parse_system_literal(tail, quote, literal <> char)
}
}
}
fn parse_misc(doc: String) -> String {
let #(_, doc) =
parse_multiple_optional(
case
try_parsers(
[
parse_comment,
parse_pi,
fn(doc) {
parse_space(doc)
|> result.map(fn(sp) { #(Whitespace, sp.1) })
},
],
doc,
try_parsers([parse_comment, parse_space], _),
"",
)
doc
{
Ok(#(_element, doc)) -> parse_misc(doc)
Error(Nil) -> doc
}
}
fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
case doc {
"<?xml" <> tail -> {
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
let #(encoding, doc) = case parse_encodingdecl(doc) {
Ok(e) -> e
Error(_) -> #("", doc)
}
let #(standalone, doc) = case parse_standalone(doc) {
Ok(e) -> e
Error(_) -> #(False, doc)
}
let #(encoding, doc) =
parse_encodingdecl(doc) |> result.unwrap(#("", doc))
let #(standalone, doc) =
parse_standalone(doc) |> result.unwrap(#(False, doc))
case trim_space(doc) {
"?>" <> tail ->
Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
Ok(#(XMLDecl(versioninfo:, encoding:, standalone:), tail))
_ -> Error(Nil)
}
}
@@ -310,8 +980,8 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
}
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) {
"version=" <> tail -> {
case trim_mandatory_space(doc) {
Ok("version=" <> tail) -> {
use #(version, doc) <- result.try(parse_version(tail))
Ok(#(version, doc))
}
@@ -351,8 +1021,8 @@ fn do_parse_version(
}
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
case trim_space(doc) {
"encoding=" <> tail -> {
case trim_mandatory_space(doc) {
Ok("encoding=" <> tail) -> {
case tail {
"\"" <> tail -> {
use #(encoding, doc) <- result.try(parse_encoding(tail))
@@ -402,10 +1072,11 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
}
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
case trim_space(doc) {
"standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
case trim_mandatory_space(doc) {
Ok("standalone=\"yes\"" <> tail) | Ok("standalone='yes'" <> tail) ->
Ok(#(True, tail))
Ok("standalone=\"no\"" <> tail) | Ok("standalone='no'" <> tail) ->
Ok(#(True, tail))
"standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
_ -> Error(Nil)
}
}
@@ -426,7 +1097,7 @@ fn parse_digit(doc: String) -> Result(#(String, String), Nil) {
}
}
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as digit <> tail
| "1" as digit <> tail
@@ -513,12 +1184,12 @@ fn parse_alpha(doc: String) -> Result(#(String, String), Nil) {
}
}
fn parse_comment(doc: String) -> Result(#(String, String), Nil) {
fn parse_comment(doc: String) -> Result(#(Element, String), Nil) {
case doc {
"<!--" <> tail -> {
let #(comment, doc) = do_parse_comment(tail)
case doc {
"-->" <> tail -> Ok(#(comment, tail))
"-->" <> tail -> Ok(#(Comment(comment), tail))
_ -> Error(Nil)
}
}
@@ -634,6 +1305,18 @@ fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
}
}
fn trim_mandatory_space(doc: String) -> Result(String, Nil) {
do_trim_mandatory_space(doc, True)
}
fn do_trim_mandatory_space(doc: String, first: Bool) -> Result(String, Nil) {
case parse_space(doc), first {
Ok(#(_, doc)), _ -> do_trim_mandatory_space(doc, False)
Error(_), True -> Error(Nil)
Error(_), False -> Ok(doc)
}
}
fn trim_space(doc: String) -> String {
case parse_space(doc) {
Ok(#(_, doc)) -> trim_space(doc)
@@ -665,7 +1348,7 @@ fn try_parsers(
}
}
pub fn parse_multiple(
fn parse_multiple(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
) -> Result(#(String, String), Nil) {
@@ -675,6 +1358,21 @@ pub fn parse_multiple(
}
}
fn parse_multiple_to_list(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
acc ret: List(String),
) -> #(List(String), String) {
case str {
"" -> #(list.reverse(ret), str)
_ ->
case to_run(str) {
Ok(#(r, rest)) -> parse_multiple_to_list(rest, to_run, [r, ..ret])
Error(_) -> #(list.reverse(ret), str)
}
}
}
fn parse_multiple_optional(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),

5
test/oasis/e2.xml Executable file
View File

@@ -0,0 +1,5 @@
<!DOCTYPE el [
<!ELEMENT el EMPTY>
<!ATTLIST el at (one|two|two) #IMPLIED>
]>
<e1 at="two"/>

1637
test/oasis/oasis.xml Executable file

File diff suppressed because it is too large Load Diff

11
test/oasis/p01fail1.xml Executable file
View File

@@ -0,0 +1,11 @@
<?xml version="1.0"?>
<doc>
<a><b><c/></b></a>
</doc>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>

10
test/oasis/p01fail2.xml Executable file
View File

@@ -0,0 +1,10 @@
<!--bad comment--><?xml version="1.0"?>
<doc>
<a><b><c/></b></a>
</doc>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>

7
test/oasis/p01fail3.xml Executable file
View File

@@ -0,0 +1,7 @@
<doc/><bad/>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>

1
test/oasis/p01fail4.xml Executable file
View File

@@ -0,0 +1 @@
<doc>

3
test/oasis/p01pass1.xml Executable file
View File

@@ -0,0 +1,3 @@
<doc>
<a><b><c/></b></a>
</doc>

23
test/oasis/p01pass2.xml Executable file
View File

@@ -0,0 +1,23 @@
<?PI before document element?>
<!-- comment after document element-->
<?PI before document element?>
<!-- comment after document element-->
<?PI before document element?>
<!-- comment after document element-->
<?PI before document element?>
<!DOCTYPE doc
[
<!ELEMENT doc ANY>
<!ELEMENT a ANY>
<!ELEMENT b ANY>
<!ELEMENT c ANY>
]>
<doc>
<a><b><c/></b></a>
</doc>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>

9
test/oasis/p01pass3.xml Executable file
View File

@@ -0,0 +1,9 @@
<doc>
<a><b><c/></b></a>
</doc>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>
<!-- comment after document element-->
<?PI after document element?>

BIN
test/oasis/p02fail1.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail10.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail11.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail12.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail13.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail14.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail15.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail16.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail17.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail18.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail19.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail2.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail20.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail21.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail22.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail23.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail24.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail25.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail26.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail27.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail28.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail29.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail3.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail30.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail31.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail4.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail5.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail6.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail7.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail8.xml Executable file

Binary file not shown.

BIN
test/oasis/p02fail9.xml Executable file

Binary file not shown.

BIN
test/oasis/p03fail1.xml Executable file

Binary file not shown.

1
test/oasis/p03fail10.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail11.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail12.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail13.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail14.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail15.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail16.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail17.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail18.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail19.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail2.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail20.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail21.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail22.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail23.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail24.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail25.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail26.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail27.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail28.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail29.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail3.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail4.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail5.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail7.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail8.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

1
test/oasis/p03fail9.xml Executable file
View File

@@ -0,0 +1 @@
<doc/>

2
test/oasis/p03pass1.xml Executable file
View File

@@ -0,0 +1,2 @@
<doc/>

1
test/oasis/p04fail1.xml Executable file
View File

@@ -0,0 +1 @@
<A@/>

1
test/oasis/p04fail2.xml Executable file
View File

@@ -0,0 +1 @@
<A#/>

1
test/oasis/p04fail3.xml Executable file
View File

@@ -0,0 +1 @@
<A$/>

6
test/oasis/p04pass1.xml Executable file
View File

@@ -0,0 +1,6 @@
<doc>
<abcdefghijklmnopqrstuvwxyz/>
<ABCDEFGHIJKLMNOPQRSTUVWXYZ/>
<A01234567890/>
<A.-:̀·/>
</doc>

1
test/oasis/p05fail1.xml Executable file
View File

@@ -0,0 +1 @@
<0A/>

1
test/oasis/p05fail2.xml Executable file
View File

@@ -0,0 +1 @@
<.A/>

1
test/oasis/p05fail3.xml Executable file
View File

@@ -0,0 +1 @@
<-A/>

1
test/oasis/p05fail4.xml Executable file
View File

@@ -0,0 +1 @@
<̀A/>

1
test/oasis/p05fail5.xml Executable file
View File

@@ -0,0 +1 @@
<·A/>

8
test/oasis/p05pass1.xml Executable file
View File

@@ -0,0 +1,8 @@
<doc>
<A:._-0/>
<::._-0/>
<_:._-0/>
<A/>
<_/>
<:/>
</doc>

13
test/oasis/p06fail1.xml Executable file
View File

@@ -0,0 +1,13 @@
<!--non-validating processors may pass this instance because they don't check the IDREFS attribute type-->
<!DOCTYPE doc
[
<!ELEMENT doc (a|refs)*>
<!ELEMENT a EMPTY>
<!ELEMENT refs EMPTY>
<!ATTLIST refs refs IDREFS #REQUIRED>
<!ATTLIST a id ID #REQUIRED>
]>
<doc>
<a id="A1"/><a id="A2"/><a id="A3"/>
<refs refs=""/>
</doc>

15
test/oasis/p06pass1.xml Executable file
View File

@@ -0,0 +1,15 @@
<!DOCTYPE doc
[
<!ELEMENT doc (a|refs)*>
<!ELEMENT a EMPTY>
<!ELEMENT refs EMPTY>
<!ATTLIST refs refs IDREFS #REQUIRED>
<!ATTLIST a id ID #REQUIRED>
]>
<doc>
<a id="A1"/><a id="A2"/><a id="A3"/>
<refs refs="A1 A2 A3"/>
<refs refs="A1
A2 A3"/>
<refs refs="A1"/>
</doc>

6
test/oasis/p07pass1.xml Executable file
View File

@@ -0,0 +1,6 @@
<!DOCTYPE doc
[
<!ELEMENT doc EMPTY>
<!ATTLIST doc att (0|35a|A|-a|:a|a:|.|_a) #IMPLIED>
]>
<doc/>

10
test/oasis/p08fail1.xml Executable file
View File

@@ -0,0 +1,10 @@
<!--note: non-validating parsers may accept this document-->
<!DOCTYPE doc
[
<!ELEMENT doc (A*)>
<!ELEMENT A EMPTY>
<!ATTLIST A att NMTOKENS #IMPLIED>
]>
<doc>
<A att=""/>
</doc>

10
test/oasis/p08fail2.xml Executable file
View File

@@ -0,0 +1,10 @@
<!--note: non-validating parsers may accept this document-->
<!DOCTYPE doc
[
<!ELEMENT doc (A*)>
<!ELEMENT A EMPTY>
<!ATTLIST A att NMTOKENS #IMPLIED>
]>
<doc>
<A att="abc / def"/>
</doc>

12
test/oasis/p08pass1.xml Executable file
View File

@@ -0,0 +1,12 @@
<!DOCTYPE doc
[
<!ELEMENT doc (A*)>
<!ELEMENT A EMPTY>
<!ATTLIST A att NMTOKENS #IMPLIED>
]>
<doc>
<A att="abc"/><A att="abc def . :"/><A att="
abc
def
"/>
</doc>

2
test/oasis/p09fail1.dtd Executable file
View File

@@ -0,0 +1,2 @@
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 "asdf%">

2
test/oasis/p09fail1.xml Executable file
View File

@@ -0,0 +1,2 @@
<!DOCTYPE doc SYSTEM "p09fail1.dtd">
<doc/>

2
test/oasis/p09fail2.dtd Executable file
View File

@@ -0,0 +1,2 @@
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 "asdf&">

2
test/oasis/p09fail2.xml Executable file
View File

@@ -0,0 +1,2 @@
<!DOCTYPE doc SYSTEM "p09fail2.dtd">
<doc/>

6
test/oasis/p09fail3.xml Executable file
View File

@@ -0,0 +1,6 @@
<!DOCTYPE doc
[
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 "asdf&#65">
]>
<doc/>

6
test/oasis/p09fail4.xml Executable file
View File

@@ -0,0 +1,6 @@
<!DOCTYPE doc
[
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 'a">
]>
<doc/>

6
test/oasis/p09fail5.xml Executable file
View File

@@ -0,0 +1,6 @@
<!DOCTYPE doc
[
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 "a'>
]>
<doc/>

5
test/oasis/p09pass1.dtd Executable file
View File

@@ -0,0 +1,5 @@
<!ELEMENT doc EMPTY>
<!ENTITY % ent1 "">
<!ENTITY ent2 "text2">
<!ENTITY % ent3 "<!-- <!DOCTYPE <!ELEMENT <? '''&#34;&ent2; %ent1;">
<!ENTITY % ent4 '""&#x27;&#39;"'>

2
test/oasis/p09pass1.xml Executable file
View File

@@ -0,0 +1,2 @@
<!DOCTYPE doc SYSTEM "p09pass1.dtd">
<doc/>

1
test/oasis/p10fail1.xml Executable file
View File

@@ -0,0 +1 @@
<doc a="1 < 2"/>

1
test/oasis/p10fail2.xml Executable file
View File

@@ -0,0 +1 @@
<doc a="1 &"/>

1
test/oasis/p10fail3.xml Executable file
View File

@@ -0,0 +1 @@
<doc a='asd"/>

6
test/oasis/p10pass1.xml Executable file
View File

@@ -0,0 +1,6 @@
<doc>
<A a="asdf>'&#34;>
asdf
?>%"/>
<A a='"">&#39;&#34;'/>
</doc>

Some files were not shown because too many files have changed in this diff Show More