This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import gleam/option.{type Option, None}
|
||||
import gleam/result
|
||||
import gleam/string
|
||||
|
||||
pub type Declaration {
|
||||
Declaration(versioninfo: String, encoding: String, standalone: Bool)
|
||||
@@ -14,7 +15,10 @@ pub type Document {
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
parse_document("<?xml version=\"1.1\" encoding='UTF-8'?>") |> echo
|
||||
parse_document(
|
||||
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n",
|
||||
)
|
||||
|> echo
|
||||
}
|
||||
|
||||
fn parse_document(doc: String) -> Result(Document, Nil) {
|
||||
@@ -30,7 +34,14 @@ fn parse_prolog(
|
||||
Ok(#(decl, doc)) -> #(decl, doc)
|
||||
_ -> #(Declaration("1.0", "UTF-8", False), doc)
|
||||
}
|
||||
|
||||
let #(comment, doc) =
|
||||
parse_multiple_optional(
|
||||
doc,
|
||||
try_parsers([parse_comment, parse_space], _),
|
||||
"",
|
||||
)
|
||||
comment |> echo
|
||||
doc |> echo
|
||||
Ok(#(decl, None, doc))
|
||||
}
|
||||
|
||||
@@ -237,6 +248,67 @@ fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_comment(doc: String) -> Result(#(String, String), Nil) {
|
||||
case doc {
|
||||
"<!--" <> tail -> {
|
||||
let #(comment, doc) = do_parse_comment(tail)
|
||||
case doc {
|
||||
"-->" <> tail -> Ok(#(comment, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn do_parse_comment(doc: String) -> #(String, String) {
|
||||
parse_multiple_optional(
|
||||
doc,
|
||||
try_parsers(
|
||||
[
|
||||
parse_char_except_dash,
|
||||
fn(doc) {
|
||||
case doc {
|
||||
"-" <> tail -> {
|
||||
use #(char, doc) <- result.try(parse_char_except_dash(tail))
|
||||
Ok(#("-" <> char, doc))
|
||||
}
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
},
|
||||
],
|
||||
_,
|
||||
),
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) {
|
||||
case doc {
|
||||
"-" <> _ -> Error(Nil)
|
||||
_ -> parse_char(doc)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_char(doc: String) -> Result(#(String, String), Nil) {
|
||||
case string.pop_grapheme(doc) {
|
||||
Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail))
|
||||
Ok(#("\t", tail)) -> Ok(#("\t", tail))
|
||||
Ok(#("\n", tail)) -> Ok(#("\n", tail))
|
||||
Ok(#("\r", tail)) -> Ok(#("\r", tail))
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
case string.utf_codepoint_to_int(codepoint) {
|
||||
i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail))
|
||||
i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail))
|
||||
i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_space(doc: String) -> String {
|
||||
case parse_space(doc) {
|
||||
Ok(#(_, doc)) -> trim_space(doc)
|
||||
|
||||
Reference in New Issue
Block a user