feat: Added comment parsing
Some checks failed
test / test (push) Has been cancelled

This commit is contained in:
2025-10-09 01:26:55 +01:00
parent 25fe8c7264
commit 26499833c0

View File

@@ -1,5 +1,6 @@
import gleam/option.{type Option, None}
import gleam/result
import gleam/string
pub type Declaration {
Declaration(versioninfo: String, encoding: String, standalone: Bool)
@@ -14,7 +15,10 @@ pub type Document {
}
pub fn main() {
parse_document("<?xml version=\"1.1\" encoding='UTF-8'?>") |> echo
parse_document(
"<?xml version=\"1.1\" encoding='UTF-8'?>\r\n <!-- hello-world --> \n",
)
|> echo
}
fn parse_document(doc: String) -> Result(Document, Nil) {
@@ -30,7 +34,14 @@ fn parse_prolog(
Ok(#(decl, doc)) -> #(decl, doc)
_ -> #(Declaration("1.0", "UTF-8", False), doc)
}
let #(comment, doc) =
parse_multiple_optional(
doc,
try_parsers([parse_comment, parse_space], _),
"",
)
comment |> echo
doc |> echo
Ok(#(decl, None, doc))
}
@@ -237,6 +248,67 @@ fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) {
}
}
fn parse_comment(doc: String) -> Result(#(String, String), Nil) {
case doc {
"<!--" <> tail -> {
let #(comment, doc) = do_parse_comment(tail)
case doc {
"-->" <> tail -> Ok(#(comment, tail))
_ -> Error(Nil)
}
}
_ -> Error(Nil)
}
}
fn do_parse_comment(doc: String) -> #(String, String) {
parse_multiple_optional(
doc,
try_parsers(
[
parse_char_except_dash,
fn(doc) {
case doc {
"-" <> tail -> {
use #(char, doc) <- result.try(parse_char_except_dash(tail))
Ok(#("-" <> char, doc))
}
_ -> Error(Nil)
}
},
],
_,
),
"",
)
}
fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) {
case doc {
"-" <> _ -> Error(Nil)
_ -> parse_char(doc)
}
}
fn parse_char(doc: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(doc) {
Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail))
Ok(#("\t", tail)) -> Ok(#("\t", tail))
Ok(#("\n", tail)) -> Ok(#("\n", tail))
Ok(#("\r", tail)) -> Ok(#("\r", tail))
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
case string.utf_codepoint_to_int(codepoint) {
i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail))
i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail))
i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
fn trim_space(doc: String) -> String {
case parse_space(doc) {
Ok(#(_, doc)) -> trim_space(doc)