This commit is contained in:
280
src/glxml.gleam
280
src/glxml.gleam
@@ -7,7 +7,12 @@ import gleam/result
|
|||||||
import gleam/string
|
import gleam/string
|
||||||
|
|
||||||
pub type Declaration {
|
pub type Declaration {
|
||||||
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
|
||||||
|
GEntityDecl
|
||||||
|
PEntityDecl(name: String, decl: String)
|
||||||
|
ElementDecl
|
||||||
|
AttListDecl
|
||||||
|
NotationDecl
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Entity {
|
pub type Entity {
|
||||||
@@ -16,8 +21,17 @@ pub type Entity {
|
|||||||
PublicExternalEntity(literal: String, pubidliteral: String)
|
PublicExternalEntity(literal: String, pubidliteral: String)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type ExternalID {
|
||||||
|
SystemID(system_literal: String)
|
||||||
|
PublicID(system_literal: String, public_literal: String)
|
||||||
|
}
|
||||||
|
|
||||||
pub type DocType {
|
pub type DocType {
|
||||||
DocType(name: String, entities: dict.Dict(String, Entity))
|
DocType(
|
||||||
|
name: String,
|
||||||
|
external_id: Option(ExternalID),
|
||||||
|
entities: dict.Dict(String, Entity),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Document {
|
pub type Document {
|
||||||
@@ -387,7 +401,7 @@ fn process_reference(
|
|||||||
doctype: Option(DocType),
|
doctype: Option(DocType),
|
||||||
) -> Result(String, Nil) {
|
) -> Result(String, Nil) {
|
||||||
case doctype {
|
case doctype {
|
||||||
Some(DocType(_, entities)) -> {
|
Some(DocType(_, _, entities)) -> {
|
||||||
get_reference(entities, ref)
|
get_reference(entities, ref)
|
||||||
}
|
}
|
||||||
None -> {
|
None -> {
|
||||||
@@ -431,13 +445,249 @@ fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
|
|||||||
fn parse_prolog(
|
fn parse_prolog(
|
||||||
doc: String,
|
doc: String,
|
||||||
) -> Result(#(Declaration, Option(DocType), String), Nil) {
|
) -> Result(#(Declaration, Option(DocType), String), Nil) {
|
||||||
let #(decl, doc) = case parse_decl(doc) {
|
let #(decl, doc) =
|
||||||
Ok(#(decl, doc)) -> #(decl, doc)
|
parse_decl(doc) |> result.unwrap(#(XMLDecl("1.0", "UTF-8", False), doc))
|
||||||
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
|
||||||
}
|
|
||||||
let doc = parse_misc(doc)
|
let doc = parse_misc(doc)
|
||||||
|
|
||||||
Ok(#(decl, None, doc))
|
let #(doctype, doc) =
|
||||||
|
parse_doctype(doc)
|
||||||
|
|> result.map(fn(d) { #(Some(d.0), d.1) })
|
||||||
|
|> result.unwrap(#(None, doc))
|
||||||
|
|
||||||
|
Ok(#(decl, doctype, doc))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
|
||||||
|
case doc {
|
||||||
|
"<!DOCTYPE" <> tail -> {
|
||||||
|
let doc = trim_space(tail)
|
||||||
|
use #(name, doc) <- result.try(parse_name(doc))
|
||||||
|
|
||||||
|
let #(external_id, doc) =
|
||||||
|
parse_external_id(doc) |> result.unwrap(#(None, doc))
|
||||||
|
|
||||||
|
let doc = trim_space(doc)
|
||||||
|
|
||||||
|
let #(int_subset, doc) =
|
||||||
|
parse_int_subset(doc) |> result.unwrap(#([], doc))
|
||||||
|
|
||||||
|
case doc {
|
||||||
|
">" <> tail -> Ok(#(DocType(name, external_id, dict.new()), tail))
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_int_subset(doc: String) -> Result(#(List(Declaration), String), Nil) {
|
||||||
|
let doc = trim_space(doc)
|
||||||
|
case doc {
|
||||||
|
"[" <> tail -> {
|
||||||
|
use #(decl_list, doc) <- result.try(do_parse_int_subset(tail, []))
|
||||||
|
case doc {
|
||||||
|
"]" <> tail -> {
|
||||||
|
Ok(#(decl_list, tail))
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_parse_int_subset(
|
||||||
|
doc: String,
|
||||||
|
decl_list: List(Declaration),
|
||||||
|
) -> Result(#(List(Declaration), String), Nil) {
|
||||||
|
let doc = trim_space(doc)
|
||||||
|
case doc {
|
||||||
|
"%" <> tail -> {
|
||||||
|
use #(name, doc) <- result.try(parse_name(tail))
|
||||||
|
case doc {
|
||||||
|
";" <> tail -> {
|
||||||
|
case get_entity_replacement(name, decl_list) {
|
||||||
|
Some(decl) -> {
|
||||||
|
do_parse_int_subset(decl <> tail, decl_list)
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> {
|
||||||
|
todo
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_entity_replacement(
|
||||||
|
entity: String,
|
||||||
|
decl_list: List(Declaration),
|
||||||
|
) -> Option(String) {
|
||||||
|
list.find_map(decl_list, fn(decl) {
|
||||||
|
case decl {
|
||||||
|
PEntityDecl(name, decl) if name == entity -> Ok(Some(decl))
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> result.unwrap(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
|
||||||
|
let doc = trim_space(doc)
|
||||||
|
case doc {
|
||||||
|
"SYSTEM" <> tail -> {
|
||||||
|
let doc = trim_space(tail)
|
||||||
|
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||||
|
doc,
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
))
|
||||||
|
Ok(#(Some(SystemID(system_literal:)), doc))
|
||||||
|
}
|
||||||
|
"PUBLIC" <> tail -> {
|
||||||
|
let doc = trim_space(tail)
|
||||||
|
use #(public_literal, doc) <- result.try(parse_public_literal(
|
||||||
|
doc,
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
))
|
||||||
|
let doc = trim_space(doc)
|
||||||
|
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||||
|
doc,
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
))
|
||||||
|
Ok(#(Some(PublicID(system_literal:, public_literal:)), doc))
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_public_literal(
|
||||||
|
doc: String,
|
||||||
|
quote: Option(String),
|
||||||
|
literal: String,
|
||||||
|
) -> Result(#(String, String), Nil) {
|
||||||
|
case doc, quote {
|
||||||
|
"\"" as q <> tail, None | "'" as q <> tail, None ->
|
||||||
|
parse_public_literal(tail, Some(q), "")
|
||||||
|
"", _ -> Error(Nil)
|
||||||
|
_, None -> Error(Nil)
|
||||||
|
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
|
||||||
|
"'" <> tail, Some("'") -> Ok(#(literal, tail))
|
||||||
|
" " as char <> tail, Some(_)
|
||||||
|
| "\r" as char <> tail, Some(_)
|
||||||
|
| "\n" as char <> tail, Some(_)
|
||||||
|
| "0" as char <> tail, Some(_)
|
||||||
|
| "1" as char <> tail, Some(_)
|
||||||
|
| "2" as char <> tail, Some(_)
|
||||||
|
| "3" as char <> tail, Some(_)
|
||||||
|
| "4" as char <> tail, Some(_)
|
||||||
|
| "5" as char <> tail, Some(_)
|
||||||
|
| "6" as char <> tail, Some(_)
|
||||||
|
| "7" as char <> tail, Some(_)
|
||||||
|
| "8" as char <> tail, Some(_)
|
||||||
|
| "9" as char <> tail, Some(_)
|
||||||
|
| "a" as char <> tail, Some(_)
|
||||||
|
| "b" as char <> tail, Some(_)
|
||||||
|
| "c" as char <> tail, Some(_)
|
||||||
|
| "d" as char <> tail, Some(_)
|
||||||
|
| "e" as char <> tail, Some(_)
|
||||||
|
| "f" as char <> tail, Some(_)
|
||||||
|
| "g" as char <> tail, Some(_)
|
||||||
|
| "h" as char <> tail, Some(_)
|
||||||
|
| "i" as char <> tail, Some(_)
|
||||||
|
| "j" as char <> tail, Some(_)
|
||||||
|
| "k" as char <> tail, Some(_)
|
||||||
|
| "l" as char <> tail, Some(_)
|
||||||
|
| "m" as char <> tail, Some(_)
|
||||||
|
| "n" as char <> tail, Some(_)
|
||||||
|
| "o" as char <> tail, Some(_)
|
||||||
|
| "p" as char <> tail, Some(_)
|
||||||
|
| "q" as char <> tail, Some(_)
|
||||||
|
| "r" as char <> tail, Some(_)
|
||||||
|
| "s" as char <> tail, Some(_)
|
||||||
|
| "t" as char <> tail, Some(_)
|
||||||
|
| "u" as char <> tail, Some(_)
|
||||||
|
| "v" as char <> tail, Some(_)
|
||||||
|
| "w" as char <> tail, Some(_)
|
||||||
|
| "x" as char <> tail, Some(_)
|
||||||
|
| "y" as char <> tail, Some(_)
|
||||||
|
| "z" as char <> tail, Some(_)
|
||||||
|
| "A" as char <> tail, Some(_)
|
||||||
|
| "B" as char <> tail, Some(_)
|
||||||
|
| "C" as char <> tail, Some(_)
|
||||||
|
| "D" as char <> tail, Some(_)
|
||||||
|
| "E" as char <> tail, Some(_)
|
||||||
|
| "F" as char <> tail, Some(_)
|
||||||
|
| "G" as char <> tail, Some(_)
|
||||||
|
| "H" as char <> tail, Some(_)
|
||||||
|
| "I" as char <> tail, Some(_)
|
||||||
|
| "J" as char <> tail, Some(_)
|
||||||
|
| "K" as char <> tail, Some(_)
|
||||||
|
| "L" as char <> tail, Some(_)
|
||||||
|
| "M" as char <> tail, Some(_)
|
||||||
|
| "N" as char <> tail, Some(_)
|
||||||
|
| "O" as char <> tail, Some(_)
|
||||||
|
| "P" as char <> tail, Some(_)
|
||||||
|
| "Q" as char <> tail, Some(_)
|
||||||
|
| "R" as char <> tail, Some(_)
|
||||||
|
| "S" as char <> tail, Some(_)
|
||||||
|
| "T" as char <> tail, Some(_)
|
||||||
|
| "U" as char <> tail, Some(_)
|
||||||
|
| "V" as char <> tail, Some(_)
|
||||||
|
| "W" as char <> tail, Some(_)
|
||||||
|
| "X" as char <> tail, Some(_)
|
||||||
|
| "Y" as char <> tail, Some(_)
|
||||||
|
| "Z" as char <> tail, Some(_)
|
||||||
|
| "-" as char <> tail, Some(_)
|
||||||
|
| "(" as char <> tail, Some(_)
|
||||||
|
| ")" as char <> tail, Some(_)
|
||||||
|
| "+" as char <> tail, Some(_)
|
||||||
|
| "," as char <> tail, Some(_)
|
||||||
|
| "." as char <> tail, Some(_)
|
||||||
|
| "/" as char <> tail, Some(_)
|
||||||
|
| ":" as char <> tail, Some(_)
|
||||||
|
| "=" as char <> tail, Some(_)
|
||||||
|
| "?" as char <> tail, Some(_)
|
||||||
|
| ";" as char <> tail, Some(_)
|
||||||
|
| "!" as char <> tail, Some(_)
|
||||||
|
| "*" as char <> tail, Some(_)
|
||||||
|
| "#" as char <> tail, Some(_)
|
||||||
|
| "@" as char <> tail, Some(_)
|
||||||
|
| "$" as char <> tail, Some(_)
|
||||||
|
| "_" as char <> tail, Some(_)
|
||||||
|
| "%" as char <> tail, Some(_)
|
||||||
|
| "'" as char <> tail, Some("\"")
|
||||||
|
-> {
|
||||||
|
parse_public_literal(tail, quote, literal <> char)
|
||||||
|
}
|
||||||
|
_, _ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_system_literal(
|
||||||
|
doc: String,
|
||||||
|
quote: Option(String),
|
||||||
|
literal: String,
|
||||||
|
) -> Result(#(String, String), Nil) {
|
||||||
|
case doc, quote {
|
||||||
|
"\"" as q <> tail, None | "'" as q <> tail, None ->
|
||||||
|
parse_system_literal(tail, Some(q), "")
|
||||||
|
"", _ -> Error(Nil)
|
||||||
|
_, None -> Error(Nil)
|
||||||
|
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
|
||||||
|
"'" <> tail, Some("'") -> Ok(#(literal, tail))
|
||||||
|
_, _ -> {
|
||||||
|
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
|
||||||
|
|
||||||
|
parse_system_literal(tail, quote, literal <> char)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_misc(doc: String) -> String {
|
fn parse_misc(doc: String) -> String {
|
||||||
@@ -463,18 +713,14 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
|
|||||||
case doc {
|
case doc {
|
||||||
"<?xml" <> tail -> {
|
"<?xml" <> tail -> {
|
||||||
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
|
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
|
||||||
let #(encoding, doc) = case parse_encodingdecl(doc) {
|
let #(encoding, doc) =
|
||||||
Ok(e) -> e
|
parse_encodingdecl(doc) |> result.unwrap(#("", doc))
|
||||||
Error(_) -> #("", doc)
|
let #(standalone, doc) =
|
||||||
}
|
parse_standalone(doc) |> result.unwrap(#(False, doc))
|
||||||
let #(standalone, doc) = case parse_standalone(doc) {
|
|
||||||
Ok(e) -> e
|
|
||||||
Error(_) -> #(False, doc)
|
|
||||||
}
|
|
||||||
|
|
||||||
case trim_space(doc) {
|
case trim_space(doc) {
|
||||||
"?>" <> tail ->
|
"?>" <> tail ->
|
||||||
Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
|
Ok(#(XMLDecl(versioninfo:, encoding:, standalone:), tail))
|
||||||
_ -> Error(Nil)
|
_ -> Error(Nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user