diff --git a/src/glxml.gleam b/src/glxml.gleam index ccc1578..deb633c 100644 --- a/src/glxml.gleam +++ b/src/glxml.gleam @@ -1,5 +1,6 @@ import gleam/option.{type Option, None} import gleam/result +import gleam/string pub type Declaration { Declaration(versioninfo: String, encoding: String, standalone: Bool) @@ -14,7 +15,10 @@ pub type Document { } pub fn main() { - parse_document("") |> echo + parse_document( + "\r\n \n", + ) + |> echo } fn parse_document(doc: String) -> Result(Document, Nil) { @@ -30,7 +34,14 @@ fn parse_prolog( Ok(#(decl, doc)) -> #(decl, doc) _ -> #(Declaration("1.0", "UTF-8", False), doc) } - + let #(comment, doc) = + parse_multiple_optional( + doc, + try_parsers([parse_comment, parse_space], _), + "", + ) + comment |> echo + doc |> echo Ok(#(decl, None, doc)) } @@ -237,6 +248,67 @@ fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) { } } +fn parse_comment(doc: String) -> Result(#(String, String), Nil) { + case doc { + "" <> tail -> Ok(#(comment, tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn do_parse_comment(doc: String) -> #(String, String) { + parse_multiple_optional( + doc, + try_parsers( + [ + parse_char_except_dash, + fn(doc) { + case doc { + "-" <> tail -> { + use #(char, doc) <- result.try(parse_char_except_dash(tail)) + Ok(#("-" <> char, doc)) + } + _ -> Error(Nil) + } + }, + ], + _, + ), + "", + ) +} + +fn parse_char_except_dash(doc: String) -> Result(#(String, String), Nil) { + case doc { + "-" <> _ -> Error(Nil) + _ -> parse_char(doc) + } +} + +fn parse_char(doc: String) -> Result(#(String, String), Nil) { + case string.pop_grapheme(doc) { + Ok(#("\r\n", tail)) -> Ok(#("\r\n", tail)) + Ok(#("\t", tail)) -> Ok(#("\t", tail)) + Ok(#("\n", tail)) -> Ok(#("\n", tail)) + Ok(#("\r", tail)) -> Ok(#("\r", tail)) + Ok(#(char, tail)) -> { + let assert [codepoint] = string.to_utf_codepoints(char) + case string.utf_codepoint_to_int(codepoint) { + i if i >= 0x20 && i <= 0xD7FF -> Ok(#(char, tail)) + i if i >= 0xE000 && i <= 0xFFFD -> Ok(#(char, tail)) + i if i >= 0x10000 && i <= 0x10FFFF -> Ok(#(char, tail)) + _ -> Error(Nil) + } + } + Error(_) -> Error(Nil) + } +} + fn trim_space(doc: String) -> String { case parse_space(doc) { Ok(#(_, doc)) -> trim_space(doc)