This commit is contained in:
280
src/glxml.gleam
280
src/glxml.gleam
@@ -7,7 +7,12 @@ import gleam/result
|
||||
import gleam/string
|
||||
|
||||
pub type Declaration {
|
||||
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
||||
XMLDecl(versioninfo: String, encoding: String, standalone: Bool)
|
||||
GEntityDecl
|
||||
PEntityDecl(name: String, decl: String)
|
||||
ElementDecl
|
||||
AttListDecl
|
||||
NotationDecl
|
||||
}
|
||||
|
||||
pub type Entity {
|
||||
@@ -16,8 +21,17 @@ pub type Entity {
|
||||
PublicExternalEntity(literal: String, pubidliteral: String)
|
||||
}
|
||||
|
||||
pub type ExternalID {
|
||||
SystemID(system_literal: String)
|
||||
PublicID(system_literal: String, public_literal: String)
|
||||
}
|
||||
|
||||
pub type DocType {
|
||||
DocType(name: String, entities: dict.Dict(String, Entity))
|
||||
DocType(
|
||||
name: String,
|
||||
external_id: Option(ExternalID),
|
||||
entities: dict.Dict(String, Entity),
|
||||
)
|
||||
}
|
||||
|
||||
pub type Document {
|
||||
@@ -387,7 +401,7 @@ fn process_reference(
|
||||
doctype: Option(DocType),
|
||||
) -> Result(String, Nil) {
|
||||
case doctype {
|
||||
Some(DocType(_, entities)) -> {
|
||||
Some(DocType(_, _, entities)) -> {
|
||||
get_reference(entities, ref)
|
||||
}
|
||||
None -> {
|
||||
@@ -431,13 +445,249 @@ fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
|
||||
fn parse_prolog(
|
||||
doc: String,
|
||||
) -> Result(#(Declaration, Option(DocType), String), Nil) {
|
||||
let #(decl, doc) = case parse_decl(doc) {
|
||||
Ok(#(decl, doc)) -> #(decl, doc)
|
||||
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
||||
}
|
||||
let #(decl, doc) =
|
||||
parse_decl(doc) |> result.unwrap(#(XMLDecl("1.0", "UTF-8", False), doc))
|
||||
|
||||
let doc = parse_misc(doc)
|
||||
|
||||
Ok(#(decl, None, doc))
|
||||
let #(doctype, doc) =
|
||||
parse_doctype(doc)
|
||||
|> result.map(fn(d) { #(Some(d.0), d.1) })
|
||||
|> result.unwrap(#(None, doc))
|
||||
|
||||
Ok(#(decl, doctype, doc))
|
||||
}
|
||||
|
||||
fn parse_doctype(doc: String) -> Result(#(DocType, String), Nil) {
|
||||
case doc {
|
||||
"<!DOCTYPE" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(name, doc) <- result.try(parse_name(doc))
|
||||
|
||||
let #(external_id, doc) =
|
||||
parse_external_id(doc) |> result.unwrap(#(None, doc))
|
||||
|
||||
let doc = trim_space(doc)
|
||||
|
||||
let #(int_subset, doc) =
|
||||
parse_int_subset(doc) |> result.unwrap(#([], doc))
|
||||
|
||||
case doc {
|
||||
">" <> tail -> Ok(#(DocType(name, external_id, dict.new()), tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_int_subset(doc: String) -> Result(#(List(Declaration), String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"[" <> tail -> {
|
||||
use #(decl_list, doc) <- result.try(do_parse_int_subset(tail, []))
|
||||
case doc {
|
||||
"]" <> tail -> {
|
||||
Ok(#(decl_list, tail))
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn do_parse_int_subset(
|
||||
doc: String,
|
||||
decl_list: List(Declaration),
|
||||
) -> Result(#(List(Declaration), String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"%" <> tail -> {
|
||||
use #(name, doc) <- result.try(parse_name(tail))
|
||||
case doc {
|
||||
";" <> tail -> {
|
||||
case get_entity_replacement(name, decl_list) {
|
||||
Some(decl) -> {
|
||||
do_parse_int_subset(decl <> tail, decl_list)
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> {
|
||||
todo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_entity_replacement(
|
||||
entity: String,
|
||||
decl_list: List(Declaration),
|
||||
) -> Option(String) {
|
||||
list.find_map(decl_list, fn(decl) {
|
||||
case decl {
|
||||
PEntityDecl(name, decl) if name == entity -> Ok(Some(decl))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
})
|
||||
|> result.unwrap(None)
|
||||
}
|
||||
|
||||
fn parse_external_id(doc: String) -> Result(#(Option(ExternalID), String), Nil) {
|
||||
let doc = trim_space(doc)
|
||||
case doc {
|
||||
"SYSTEM" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||
doc,
|
||||
None,
|
||||
"",
|
||||
))
|
||||
Ok(#(Some(SystemID(system_literal:)), doc))
|
||||
}
|
||||
"PUBLIC" <> tail -> {
|
||||
let doc = trim_space(tail)
|
||||
use #(public_literal, doc) <- result.try(parse_public_literal(
|
||||
doc,
|
||||
None,
|
||||
"",
|
||||
))
|
||||
let doc = trim_space(doc)
|
||||
use #(system_literal, doc) <- result.try(parse_system_literal(
|
||||
doc,
|
||||
None,
|
||||
"",
|
||||
))
|
||||
Ok(#(Some(PublicID(system_literal:, public_literal:)), doc))
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_public_literal(
|
||||
doc: String,
|
||||
quote: Option(String),
|
||||
literal: String,
|
||||
) -> Result(#(String, String), Nil) {
|
||||
case doc, quote {
|
||||
"\"" as q <> tail, None | "'" as q <> tail, None ->
|
||||
parse_public_literal(tail, Some(q), "")
|
||||
"", _ -> Error(Nil)
|
||||
_, None -> Error(Nil)
|
||||
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
|
||||
"'" <> tail, Some("'") -> Ok(#(literal, tail))
|
||||
" " as char <> tail, Some(_)
|
||||
| "\r" as char <> tail, Some(_)
|
||||
| "\n" as char <> tail, Some(_)
|
||||
| "0" as char <> tail, Some(_)
|
||||
| "1" as char <> tail, Some(_)
|
||||
| "2" as char <> tail, Some(_)
|
||||
| "3" as char <> tail, Some(_)
|
||||
| "4" as char <> tail, Some(_)
|
||||
| "5" as char <> tail, Some(_)
|
||||
| "6" as char <> tail, Some(_)
|
||||
| "7" as char <> tail, Some(_)
|
||||
| "8" as char <> tail, Some(_)
|
||||
| "9" as char <> tail, Some(_)
|
||||
| "a" as char <> tail, Some(_)
|
||||
| "b" as char <> tail, Some(_)
|
||||
| "c" as char <> tail, Some(_)
|
||||
| "d" as char <> tail, Some(_)
|
||||
| "e" as char <> tail, Some(_)
|
||||
| "f" as char <> tail, Some(_)
|
||||
| "g" as char <> tail, Some(_)
|
||||
| "h" as char <> tail, Some(_)
|
||||
| "i" as char <> tail, Some(_)
|
||||
| "j" as char <> tail, Some(_)
|
||||
| "k" as char <> tail, Some(_)
|
||||
| "l" as char <> tail, Some(_)
|
||||
| "m" as char <> tail, Some(_)
|
||||
| "n" as char <> tail, Some(_)
|
||||
| "o" as char <> tail, Some(_)
|
||||
| "p" as char <> tail, Some(_)
|
||||
| "q" as char <> tail, Some(_)
|
||||
| "r" as char <> tail, Some(_)
|
||||
| "s" as char <> tail, Some(_)
|
||||
| "t" as char <> tail, Some(_)
|
||||
| "u" as char <> tail, Some(_)
|
||||
| "v" as char <> tail, Some(_)
|
||||
| "w" as char <> tail, Some(_)
|
||||
| "x" as char <> tail, Some(_)
|
||||
| "y" as char <> tail, Some(_)
|
||||
| "z" as char <> tail, Some(_)
|
||||
| "A" as char <> tail, Some(_)
|
||||
| "B" as char <> tail, Some(_)
|
||||
| "C" as char <> tail, Some(_)
|
||||
| "D" as char <> tail, Some(_)
|
||||
| "E" as char <> tail, Some(_)
|
||||
| "F" as char <> tail, Some(_)
|
||||
| "G" as char <> tail, Some(_)
|
||||
| "H" as char <> tail, Some(_)
|
||||
| "I" as char <> tail, Some(_)
|
||||
| "J" as char <> tail, Some(_)
|
||||
| "K" as char <> tail, Some(_)
|
||||
| "L" as char <> tail, Some(_)
|
||||
| "M" as char <> tail, Some(_)
|
||||
| "N" as char <> tail, Some(_)
|
||||
| "O" as char <> tail, Some(_)
|
||||
| "P" as char <> tail, Some(_)
|
||||
| "Q" as char <> tail, Some(_)
|
||||
| "R" as char <> tail, Some(_)
|
||||
| "S" as char <> tail, Some(_)
|
||||
| "T" as char <> tail, Some(_)
|
||||
| "U" as char <> tail, Some(_)
|
||||
| "V" as char <> tail, Some(_)
|
||||
| "W" as char <> tail, Some(_)
|
||||
| "X" as char <> tail, Some(_)
|
||||
| "Y" as char <> tail, Some(_)
|
||||
| "Z" as char <> tail, Some(_)
|
||||
| "-" as char <> tail, Some(_)
|
||||
| "(" as char <> tail, Some(_)
|
||||
| ")" as char <> tail, Some(_)
|
||||
| "+" as char <> tail, Some(_)
|
||||
| "," as char <> tail, Some(_)
|
||||
| "." as char <> tail, Some(_)
|
||||
| "/" as char <> tail, Some(_)
|
||||
| ":" as char <> tail, Some(_)
|
||||
| "=" as char <> tail, Some(_)
|
||||
| "?" as char <> tail, Some(_)
|
||||
| ";" as char <> tail, Some(_)
|
||||
| "!" as char <> tail, Some(_)
|
||||
| "*" as char <> tail, Some(_)
|
||||
| "#" as char <> tail, Some(_)
|
||||
| "@" as char <> tail, Some(_)
|
||||
| "$" as char <> tail, Some(_)
|
||||
| "_" as char <> tail, Some(_)
|
||||
| "%" as char <> tail, Some(_)
|
||||
| "'" as char <> tail, Some("\"")
|
||||
-> {
|
||||
parse_public_literal(tail, quote, literal <> char)
|
||||
}
|
||||
_, _ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_system_literal(
|
||||
doc: String,
|
||||
quote: Option(String),
|
||||
literal: String,
|
||||
) -> Result(#(String, String), Nil) {
|
||||
case doc, quote {
|
||||
"\"" as q <> tail, None | "'" as q <> tail, None ->
|
||||
parse_system_literal(tail, Some(q), "")
|
||||
"", _ -> Error(Nil)
|
||||
_, None -> Error(Nil)
|
||||
"\"" <> tail, Some("\"") -> Ok(#(literal, tail))
|
||||
"'" <> tail, Some("'") -> Ok(#(literal, tail))
|
||||
_, _ -> {
|
||||
let assert Ok(#(char, tail)) = string.pop_grapheme(doc)
|
||||
|
||||
parse_system_literal(tail, quote, literal <> char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_misc(doc: String) -> String {
|
||||
@@ -463,18 +713,14 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
|
||||
case doc {
|
||||
"<?xml" <> tail -> {
|
||||
use #(versioninfo, doc) <- result.try(parse_versioninfo(tail))
|
||||
let #(encoding, doc) = case parse_encodingdecl(doc) {
|
||||
Ok(e) -> e
|
||||
Error(_) -> #("", doc)
|
||||
}
|
||||
let #(standalone, doc) = case parse_standalone(doc) {
|
||||
Ok(e) -> e
|
||||
Error(_) -> #(False, doc)
|
||||
}
|
||||
let #(encoding, doc) =
|
||||
parse_encodingdecl(doc) |> result.unwrap(#("", doc))
|
||||
let #(standalone, doc) =
|
||||
parse_standalone(doc) |> result.unwrap(#(False, doc))
|
||||
|
||||
case trim_space(doc) {
|
||||
"?>" <> tail ->
|
||||
Ok(#(Declaration(versioninfo:, encoding:, standalone:), tail))
|
||||
Ok(#(XMLDecl(versioninfo:, encoding:, standalone:), tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user