From 9961cce70d5b67e265df58b41c3f380e40735c36 Mon Sep 17 00:00:00 2001 From: Gareth Pendleton Date: Thu, 9 Oct 2025 00:32:02 +0100 Subject: [PATCH] initial local commit --- .github/workflows/test.yml | 23 ++++ .gitignore | 4 + README.md | 23 +++- gleam.toml | 19 +++ manifest.toml | 11 ++ src/glxml.gleam | 273 +++++++++++++++++++++++++++++++++++++ test/glxml_test.gleam | 13 ++ 7 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 gleam.toml create mode 100644 manifest.toml create mode 100644 src/glxml.gleam create mode 100644 test/glxml_test.gleam diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..4a7fe22 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: test + +on: + push: + branches: + - master + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: erlef/setup-beam@v1 + with: + otp-version: "27.1.2" + gleam-version: "1.12.0" + rebar3-version: "3" + # elixir-version: "1" + - run: gleam deps download + - run: gleam test + - run: gleam format --check src test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..599be4e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.beam +*.ez +/build +erl_crash.dump diff --git a/README.md b/README.md index e70f5ab..c385797 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ # glxml -Gleam XML Parser \ No newline at end of file +[![Package Version](https://img.shields.io/hexpm/v/glxml)](https://hex.pm/packages/glxml) +[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/glxml/) + +```sh +gleam add glxml@1 +``` +```gleam +import glxml + +pub fn main() -> Nil { + // TODO: An example of the project in use +} +``` + +Further documentation can be found at . + +## Development + +```sh +gleam run # Run the project +gleam test # Run the tests +``` diff --git a/gleam.toml b/gleam.toml new file mode 100644 index 0000000..31b5a6c --- /dev/null +++ b/gleam.toml @@ -0,0 +1,19 @@ +name = "glxml" +version = "1.0.0" + +# Fill out these fields if you intend to generate HTML documentation or publish +# your project to the Hex package manager. +# +# description = "" +# licences = ["Apache-2.0"] +# repository = { type = "github", user = "", repo = "" } +# links = [{ title = "Website", href = "" }] +# +# For a full reference of all the available options, you can have a look at +# https://gleam.run/writing-gleam/gleam-toml/. + +[dependencies] +gleam_stdlib = ">= 0.44.0 and < 2.0.0" + +[dev-dependencies] +gleeunit = ">= 1.0.0 and < 2.0.0" diff --git a/manifest.toml b/manifest.toml new file mode 100644 index 0000000..b3c53aa --- /dev/null +++ b/manifest.toml @@ -0,0 +1,11 @@ +# This file was generated by Gleam +# You typically do not need to edit this file + +packages = [ + { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" }, + { name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" }, +] + +[requirements] +gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } +gleeunit = { version = ">= 1.0.0 and < 2.0.0" } diff --git a/src/glxml.gleam b/src/glxml.gleam new file mode 100644 index 0000000..9857965 --- /dev/null +++ b/src/glxml.gleam @@ -0,0 +1,273 @@ +import gleam/result + +pub type Declaration { + Declaration(versioninfo: String, encoding: String) +} + +pub type DocType { + None + DocType(name: String) +} + +pub type Document { + Document(decl: Declaration, doctype: DocType) +} + +pub fn main() { + parse_document("") |> echo +} + +fn parse_document(doc: String) -> Result(Document, Nil) { + use #(decl, doctype, _doc) <- result.try(parse_prolog(doc)) + + Ok(Document(decl, doctype)) +} + +fn parse_prolog(doc: String) -> Result(#(Declaration, DocType, String), Nil) { + use #(decl, doc) <- result.try(parse_decl(doc)) + + Ok(#(decl, None, doc)) +} + +fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) { + case doc { + " tail -> { + use #(versioninfo, doc) <- result.try(parse_versioninfo(tail)) + let #(encoding, doc) = case parse_encodingdecl(doc) { + Ok(e) -> e + Error(_) -> #("", doc) + } + + case trim_space(doc) { + "?>" <> tail -> Ok(#(Declaration(versioninfo:, encoding:), tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) { + use #(_, doc) <- result.try(parse_space(doc)) + case doc { + "version=" <> tail -> { + use #(version, doc) <- result.try(parse_version(tail)) + Ok(#(version, doc)) + } + _ -> Error(Nil) + } +} + +fn parse_version(doc: String) -> Result(#(String, String), Nil) { + case doc { + "\"1." <> tail -> { + use #(version, doc) <- result.try(do_parse_version(tail, "1.")) + case doc { + "\"" <> tail -> Ok(#(version, tail)) + _ -> Error(Nil) + } + } + "'1." <> tail -> { + use #(version, doc) <- result.try(do_parse_version(tail, "1.")) + case doc { + "'" <> tail -> Ok(#(version, tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn do_parse_version( + doc: String, + version: String, +) -> Result(#(String, String), Nil) { + case do_parse_digit(doc) { + Ok(#(digit, doc)) -> do_parse_version(doc, version <> digit) + Error(_) if version == "" -> Error(Nil) + Error(_) -> Ok(#(version, doc)) + } +} + +fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) { + use #(_, doc) <- result.try(parse_space(doc)) + + case doc { + "encoding=" <> tail -> { + case tail { + "\"" <> tail -> { + use #(encoding, doc) <- result.try(parse_encoding(tail)) + case doc { + "\"" <> tail -> Ok(#(encoding, tail)) + _ -> Error(Nil) + } + } + "'" <> tail -> { + use #(encoding, doc) <- result.try(parse_encoding(tail)) + case doc { + "'" <> tail -> Ok(#(encoding, tail)) + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } + } + _ -> Error(Nil) + } +} + +fn parse_encoding(doc: String) -> Result(#(String, String), Nil) { + case do_parse_alpha(doc) { + Ok(#(char, doc)) -> { + Ok(parse_multiple_optional( + doc, + try_parsers( + [ + do_parse_alpha, + do_parse_digit, + fn(doc) { + case doc { + "." as char <> tail | "_" as char <> tail | "-" as char <> tail -> + Ok(#(char, tail)) + _ -> Error(Nil) + } + }, + ], + _, + ), + char, + )) + } + Error(_) -> Error(Nil) + } +} + +fn do_parse_digit(doc: String) -> Result(#(String, String), Nil) { + case doc { + "0" as digit <> tail + | "1" as digit <> tail + | "2" as digit <> tail + | "3" as digit <> tail + | "4" as digit <> tail + | "5" as digit <> tail + | "6" as digit <> tail + | "7" as digit <> tail + | "8" as digit <> tail + | "9" as digit <> tail -> Ok(#(digit, tail)) + _ -> Error(Nil) + } +} + +fn do_parse_alpha(doc: String) -> Result(#(String, String), Nil) { + case doc { + "a" as char <> tail + | "b" as char <> tail + | "c" as char <> tail + | "d" as char <> tail + | "e" as char <> tail + | "f" as char <> tail + | "g" as char <> tail + | "h" as char <> tail + | "i" as char <> tail + | "j" as char <> tail + | "k" as char <> tail + | "l" as char <> tail + | "m" as char <> tail + | "n" as char <> tail + | "o" as char <> tail + | "p" as char <> tail + | "q" as char <> tail + | "r" as char <> tail + | "s" as char <> tail + | "t" as char <> tail + | "u" as char <> tail + | "v" as char <> tail + | "w" as char <> tail + | "x" as char <> tail + | "y" as char <> tail + | "z" as char <> tail + | "A" as char <> tail + | "B" as char <> tail + | "C" as char <> tail + | "D" as char <> tail + | "E" as char <> tail + | "F" as char <> tail + | "G" as char <> tail + | "H" as char <> tail + | "I" as char <> tail + | "J" as char <> tail + | "K" as char <> tail + | "L" as char <> tail + | "M" as char <> tail + | "N" as char <> tail + | "O" as char <> tail + | "P" as char <> tail + | "Q" as char <> tail + | "R" as char <> tail + | "S" as char <> tail + | "T" as char <> tail + | "U" as char <> tail + | "V" as char <> tail + | "W" as char <> tail + | "X" as char <> tail + | "Y" as char <> tail + | "Z" as char <> tail -> Ok(#(char, tail)) + _ -> Error(Nil) + } +} + +fn trim_space(doc: String) -> String { + case parse_space(doc) { + Ok(#(_, doc)) -> trim_space(doc) + Error(_) -> doc + } +} + +fn parse_space(doc: String) -> Result(#(String, String), Nil) { + case doc { + " " as ws <> tail + | "\t" as ws <> tail + | "\n" as ws <> tail + | "\r" as ws <> tail -> Ok(#(ws, tail)) + _ -> Error(Nil) + } +} + +fn try_parsers( + over list: List(fn(String) -> Result(#(a, String), Nil)), + against static_data: String, +) -> Result(#(a, String), Nil) { + case list { + [] -> Error(Nil) + [first, ..rest] -> + case first(static_data) { + Error(_) -> try_parsers(rest, static_data) + Ok(r) -> Ok(r) + } + } +} + +pub fn parse_multiple( + to_parse str: String, + with to_run: fn(String) -> Result(#(String, String), Nil), +) -> Result(#(String, String), Nil) { + case parse_multiple_optional(str, to_run, "") { + #("", _) -> Error(Nil) + #(r, rest) -> Ok(#(r, rest)) + } +} + +fn parse_multiple_optional( + to_parse str: String, + with to_run: fn(String) -> Result(#(String, String), Nil), + acc ret: String, +) -> #(String, String) { + case str { + "" -> #(ret, str) + _ -> + case to_run(str) { + Ok(#(r, rest)) -> parse_multiple_optional(rest, to_run, ret <> r) + Error(_) -> #(ret, str) + } + } +} diff --git a/test/glxml_test.gleam b/test/glxml_test.gleam new file mode 100644 index 0000000..fba3c88 --- /dev/null +++ b/test/glxml_test.gleam @@ -0,0 +1,13 @@ +import gleeunit + +pub fn main() -> Nil { + gleeunit.main() +} + +// gleeunit test functions end in `_test` +pub fn hello_world_test() { + let name = "Joe" + let greeting = "Hello, " <> name <> "!" + + assert greeting == "Hello, Joe!" +}