This commit is contained in:
		
							
								
								
									
										258
									
								
								src/glxml.gleam
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								src/glxml.gleam
									
									
									
									
									
								
							@@ -1,4 +1,7 @@
 | 
			
		||||
import gleam/option.{type Option, None}
 | 
			
		||||
import gleam/bool
 | 
			
		||||
import gleam/dict
 | 
			
		||||
import gleam/list
 | 
			
		||||
import gleam/option.{type Option, None, Some}
 | 
			
		||||
import gleam/result
 | 
			
		||||
import gleam/string
 | 
			
		||||
 | 
			
		||||
@@ -6,25 +9,185 @@ pub type Declaration {
 | 
			
		||||
  Declaration(versioninfo: String, encoding: String, standalone: Bool)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type Entity {
 | 
			
		||||
  Entity
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type DocType {
 | 
			
		||||
  DocType(name: String)
 | 
			
		||||
  DocType(name: String, entities: dict.Dict(String, Entity))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type Document {
 | 
			
		||||
  Document(decl: Declaration, doctype: Option(DocType))
 | 
			
		||||
  Document(
 | 
			
		||||
    decl: Declaration,
 | 
			
		||||
    doctype: Option(DocType),
 | 
			
		||||
    element: Option(Element),
 | 
			
		||||
  )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type Attribute {
 | 
			
		||||
  Attribute(name: String, value: String)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type Element {
 | 
			
		||||
  EmptyElem(name: String, attrs: List(Attribute))
 | 
			
		||||
  Element(name: String, attrs: List(Attribute), elements: List(Element))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn main() {
 | 
			
		||||
  parse_document(
 | 
			
		||||
    "<?xml version=\"1.1\" encoding='UTF-8'?>\r\n   <!-- hello-world -->   \n",
 | 
			
		||||
    "<?xml version=\"1.1\" encoding='UTF-8'?>\r\n   <!-- hello-world -->   \n<a attr='haha' battr='baba' ref='&ref;'/>",
 | 
			
		||||
  )
 | 
			
		||||
  |> echo
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_document(doc: String) -> Result(Document, Nil) {
 | 
			
		||||
  use #(decl, doctype, _doc) <- result.try(parse_prolog(doc))
 | 
			
		||||
  use #(decl, doctype, doc) <- result.try(parse_prolog(doc))
 | 
			
		||||
  use <- bool.guard(when: doc == "", return: Ok(Document(decl, doctype, None)))
 | 
			
		||||
  use #(element, doc) <- result.try(parse_element(doc, doctype))
 | 
			
		||||
  let doc = parse_misc(doc)
 | 
			
		||||
 | 
			
		||||
  Ok(Document(decl, doctype))
 | 
			
		||||
  case doc |> echo {
 | 
			
		||||
    "" -> Ok(Document(decl, doctype, Some(element)))
 | 
			
		||||
    _ -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_element(
 | 
			
		||||
  doc: String,
 | 
			
		||||
  doctype: Option(DocType),
 | 
			
		||||
) -> Result(#(Element, String), Nil) {
 | 
			
		||||
  try_parsers([parse_empty_elem(_, doctype)], doc)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_empty_elem(
 | 
			
		||||
  doc: String,
 | 
			
		||||
  doctype: Option(DocType),
 | 
			
		||||
) -> Result(#(Element, String), Nil) {
 | 
			
		||||
  case doc |> echo {
 | 
			
		||||
    "<" <> tail -> {
 | 
			
		||||
      use #(name, doc) <- result.try(parse_name(tail))
 | 
			
		||||
      use #(attrs, doc) <- result.try(parse_attributes(doc, doctype, []))
 | 
			
		||||
      let doc = trim_space(doc)
 | 
			
		||||
      case doc {
 | 
			
		||||
        "/>" <> tail -> Ok(#(EmptyElem(name, attrs), tail))
 | 
			
		||||
        _ -> Error(Nil)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    _ -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_attributes(
 | 
			
		||||
  doc: String,
 | 
			
		||||
  doctype: Option(DocType),
 | 
			
		||||
  attrs: List(Attribute),
 | 
			
		||||
) -> Result(#(List(Attribute), String), Nil) {
 | 
			
		||||
  case parse_attribute(doc, doctype) {
 | 
			
		||||
    Ok(#(attr, doc)) -> parse_attributes(doc, doctype, [attr, ..attrs])
 | 
			
		||||
    Error(_) -> Ok(#(list.reverse(attrs), doc))
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_attribute(
 | 
			
		||||
  doc: String,
 | 
			
		||||
  doctype: Option(DocType),
 | 
			
		||||
) -> Result(#(Attribute, String), Nil) {
 | 
			
		||||
  let doc = trim_space(doc)
 | 
			
		||||
  use #(name, doc) <- result.try(parse_name(doc))
 | 
			
		||||
  case doc {
 | 
			
		||||
    "=" <> tail -> {
 | 
			
		||||
      case tail {
 | 
			
		||||
        "\"" <> tail -> {
 | 
			
		||||
          let #(value, doc) =
 | 
			
		||||
            parse_multiple_optional(
 | 
			
		||||
              tail,
 | 
			
		||||
              try_parsers(
 | 
			
		||||
                [
 | 
			
		||||
                  fn(doc) {
 | 
			
		||||
                    case string.pop_grapheme(doc) {
 | 
			
		||||
                      Ok(#(char, _doc))
 | 
			
		||||
                        if char == "<" || char == "&" || char == "\""
 | 
			
		||||
                      -> Error(Nil)
 | 
			
		||||
                      Ok(#(char, doc)) -> Ok(#(char, doc))
 | 
			
		||||
                      Error(_) -> Error(Nil)
 | 
			
		||||
                    }
 | 
			
		||||
                  },
 | 
			
		||||
                  parse_reference(_, doctype),
 | 
			
		||||
                ],
 | 
			
		||||
                _,
 | 
			
		||||
              ),
 | 
			
		||||
              "",
 | 
			
		||||
            )
 | 
			
		||||
          case doc {
 | 
			
		||||
            "\"" <> tail -> Ok(#(Attribute(name, value), tail))
 | 
			
		||||
            _ -> Error(Nil)
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        "'" <> tail -> {
 | 
			
		||||
          let #(value, doc) =
 | 
			
		||||
            parse_multiple_optional(
 | 
			
		||||
              tail,
 | 
			
		||||
              try_parsers(
 | 
			
		||||
                [
 | 
			
		||||
                  fn(doc) {
 | 
			
		||||
                    case string.pop_grapheme(doc) {
 | 
			
		||||
                      Ok(#(char, _doc))
 | 
			
		||||
                        if char == "<" || char == "&" || char == "'"
 | 
			
		||||
                      -> Error(Nil)
 | 
			
		||||
                      Ok(#(char, doc)) -> Ok(#(char, doc))
 | 
			
		||||
                      Error(_) -> Error(Nil)
 | 
			
		||||
                    }
 | 
			
		||||
                  },
 | 
			
		||||
                  parse_reference(_, doctype),
 | 
			
		||||
                ],
 | 
			
		||||
                _,
 | 
			
		||||
              ),
 | 
			
		||||
              "",
 | 
			
		||||
            )
 | 
			
		||||
          case doc {
 | 
			
		||||
            "'" <> tail -> Ok(#(Attribute(name, value), tail))
 | 
			
		||||
            _ -> Error(Nil)
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        _ -> Error(Nil)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    _ -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_reference(
 | 
			
		||||
  doc: String,
 | 
			
		||||
  doctype: Option(DocType),
 | 
			
		||||
) -> Result(#(String, String), Nil) {
 | 
			
		||||
  case doc {
 | 
			
		||||
    "&" as char <> tail | "%" as char <> tail -> {
 | 
			
		||||
      use #(name, doc) <- result.try(parse_name(tail))
 | 
			
		||||
 | 
			
		||||
      case doc {
 | 
			
		||||
        ";" <> tail -> Ok(#(char <> name <> ";", tail))
 | 
			
		||||
        _ -> Error(Nil)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    _ -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_name(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  case parse_name_start_char(doc) {
 | 
			
		||||
    Ok(#(char, tail)) -> {
 | 
			
		||||
      do_parse_name(tail, char)
 | 
			
		||||
    }
 | 
			
		||||
    Error(_) -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn do_parse_name(doc: String, name: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  case parse_name_char(doc) {
 | 
			
		||||
    Ok(#(char, tail)) -> do_parse_name(tail, name <> char)
 | 
			
		||||
    Error(_) -> Ok(#(name, doc))
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_prolog(
 | 
			
		||||
@@ -34,15 +197,19 @@ fn parse_prolog(
 | 
			
		||||
    Ok(#(decl, doc)) -> #(decl, doc)
 | 
			
		||||
    _ -> #(Declaration("1.0", "UTF-8", False), doc)
 | 
			
		||||
  }
 | 
			
		||||
  let #(comment, doc) =
 | 
			
		||||
  let doc = parse_misc(doc)
 | 
			
		||||
 | 
			
		||||
  Ok(#(decl, None, doc))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_misc(doc: String) -> String {
 | 
			
		||||
  let #(_, doc) =
 | 
			
		||||
    parse_multiple_optional(
 | 
			
		||||
      doc,
 | 
			
		||||
      try_parsers([parse_comment, parse_space], _),
 | 
			
		||||
      "",
 | 
			
		||||
    )
 | 
			
		||||
  comment |> echo
 | 
			
		||||
  doc |> echo
 | 
			
		||||
  Ok(#(decl, None, doc))
 | 
			
		||||
  doc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
 | 
			
		||||
@@ -69,8 +236,7 @@ fn parse_decl(doc: String) -> Result(#(Declaration, String), Nil) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_versioninfo(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  use #(_, doc) <- result.try(parse_space(doc))
 | 
			
		||||
  case doc {
 | 
			
		||||
  case trim_space(doc) {
 | 
			
		||||
    "version=" <> tail -> {
 | 
			
		||||
      use #(version, doc) <- result.try(parse_version(tail))
 | 
			
		||||
      Ok(#(version, doc))
 | 
			
		||||
@@ -111,9 +277,7 @@ fn do_parse_version(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_encodingdecl(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  use #(_, doc) <- result.try(parse_space(doc))
 | 
			
		||||
 | 
			
		||||
  case doc {
 | 
			
		||||
  case trim_space(doc) {
 | 
			
		||||
    "encoding=" <> tail -> {
 | 
			
		||||
      case tail {
 | 
			
		||||
        "\"" <> tail -> {
 | 
			
		||||
@@ -164,9 +328,7 @@ fn parse_encoding(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_standalone(doc: String) -> Result(#(Bool, String), Nil) {
 | 
			
		||||
  use #(_, doc) <- result.try(parse_space(doc))
 | 
			
		||||
 | 
			
		||||
  case doc {
 | 
			
		||||
  case trim_space(doc) {
 | 
			
		||||
    "standalone=\"yes\"" <> tail | "standalone='yes'" <> tail ->
 | 
			
		||||
      Ok(#(True, tail))
 | 
			
		||||
    "standalone=\"no\"" <> tail | "standalone='no'" <> tail -> Ok(#(True, tail))
 | 
			
		||||
@@ -309,6 +471,66 @@ fn parse_char(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_name_start_char(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  case string.pop_grapheme(doc) {
 | 
			
		||||
    Ok(#(":", tail)) -> Ok(#(":", tail))
 | 
			
		||||
    Ok(#("_", tail)) -> Ok(#("_", tail))
 | 
			
		||||
    Ok(#(char, tail)) -> {
 | 
			
		||||
      let assert [codepoint] = string.to_utf_codepoints(char)
 | 
			
		||||
      case string.utf_codepoint_to_int(codepoint) {
 | 
			
		||||
        i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xF8 && i <= 0x2FF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x370 && i <= 0x37D -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
 | 
			
		||||
        _ -> Error(Nil)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    Error(_) -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn parse_name_char(doc: String) -> Result(#(String, String), Nil) {
 | 
			
		||||
  case string.pop_grapheme(doc) {
 | 
			
		||||
    Ok(#(":", tail)) -> Ok(#(":", tail))
 | 
			
		||||
    Ok(#("_", tail)) -> Ok(#("_", tail))
 | 
			
		||||
    Ok(#("-", tail)) -> Ok(#("-", tail))
 | 
			
		||||
    Ok(#(".", tail)) -> Ok(#(".", tail))
 | 
			
		||||
    Ok(#(char, tail)) -> {
 | 
			
		||||
      let assert [codepoint] = string.to_utf_codepoints(char)
 | 
			
		||||
      case string.utf_codepoint_to_int(codepoint) {
 | 
			
		||||
        i if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
 | 
			
		||||
        i if i == 0xB7 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xC0 && i <= 0xD6 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xD8 && i <= 0xF6 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xF8 && i <= 0x37D -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x37F && i <= 0x1FFF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x200C && i <= 0x200D -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x203F && i <= 0x2040 -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x2070 && i <= 0x218F -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x2C00 && i <= 0x2FEF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x3000 && i <= 0xD7FF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xF900 && i <= 0xFDCF -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0xFDF0 && i <= 0xFFFD -> Ok(#(char, tail))
 | 
			
		||||
        i if i >= 0x10000 && i <= 0xEFFFF -> Ok(#(char, tail))
 | 
			
		||||
        _ -> Error(Nil)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    Error(_) -> Error(Nil)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn trim_space(doc: String) -> String {
 | 
			
		||||
  case parse_space(doc) {
 | 
			
		||||
    Ok(#(_, doc)) -> trim_space(doc)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user