This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import gleam/option.{type Option, None}
|
import gleam/option.{type Option, None}
|
||||||
import gleam/result
|
import gleam/result
|
||||||
|
import gleam/string
|
||||||
|
|
||||||
pub type Declaration {
|
pub type Declaration {
|
||||||
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
||||||
@@ -14,7 +15,10 @@ pub type Document {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
parse_document("<?xml version=\"1.1\" encoding='UTF-8'?>") |> echo
|
parse_document(
|
||||||
|
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n",
|
||||||
|
)
|
||||||
|
|> echo
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_document(doc: String) -> Result(Document, Nil) {
|
fn parse_document(doc: String) -> Result(Document, Nil) {
|
||||||
@@ -30,7 +34,14 @@ fn parse_prolog(
|
|||||||
Ok(#(decl, doc)) -> #(decl, doc)
|
Ok(#(decl, doc)) -> #(decl, doc)
|
||||||
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
||||||
}
|
}
|
||||||
|
let #(comment, doc) =
|
||||||
|
parse_multiple_optional(
|
||||||
|
doc,
|
||||||
|
try_parsers([parse_comment, parse_space], _),
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
comment |> echo
|
||||||
|
doc |> echo
|
||||||
Ok(#(decl, None, doc))
|
Ok(#(decl, None, doc))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,6 +248,67 @@ fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_comment(doc: String) -> Result(#(String, String), Nil) {
|
||||||
|
case doc {
|
||||||
|
"<!--" <> tail -> {
|
||||||
|
let #(comment, doc) = do_parse_comment(tail)
|
||||||
|
case doc {
|
||||||
|
"-->" <> tail -> Ok(#(comment, tail))
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_parse_comment(doc: String) -> #(String, String) {
|
||||||
|
parse_multiple_optional(
|
||||||
|
doc,
|
||||||
|
try_parsers(
|
||||||
|
[
|
||||||
|
parse_char_except_dash,
|
||||||
|
fn(doc) {
|
||||||
|
case doc {
|
||||||
|
"-" <> tail -> {
|
||||||
|
use #(char, doc) <- result.try(parse_char_except_dash(tail))
|
||||||
|
Ok(#("-" <> char, doc))
|
||||||
|
}
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
_,
|
||||||
|
),
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) {
|
||||||
|
case doc {
|
||||||
|
"-" <> _ -> Error(Nil)
|
||||||
|
_ -> parse_char(doc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_char(doc: String) -> Result(#(String, String), Nil) {
|
||||||
|
case string.pop_grapheme(doc) {
|
||||||
|
Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail))
|
||||||
|
Ok(#("\t", tail)) -> Ok(#("\t", tail))
|
||||||
|
Ok(#("\n", tail)) -> Ok(#("\n", tail))
|
||||||
|
Ok(#("\r", tail)) -> Ok(#("\r", tail))
|
||||||
|
Ok(#(char, tail)) -> {
|
||||||
|
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||||
|
case string.utf_codepoint_to_int(codepoint) {
|
||||||
|
i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail))
|
||||||
|
i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail))
|
||||||
|
i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail))
|
||||||
|
_ -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Error(_) -> Error(Nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn trim_space(doc: String) -> String {
|
fn trim_space(doc: String) -> String {
|
||||||
case parse_space(doc) {
|
case parse_space(doc) {
|
||||||
Ok(#(_, doc)) -> trim_space(doc)
|
Ok(#(_, doc)) -> trim_space(doc)
|
||||||
|
|||||||
Reference in New Issue
Block a user