6 Commits

5 changed files with 107 additions and 120 deletions

View File

@@ -16,3 +16,4 @@
## v2.0.2
- Minor performance improvement for uris with userinfo
- More performance improvements for ascii/digit parsing

View File

@@ -1,5 +1,5 @@
name = "gluri"
version = "2.0.1"
version = "2.0.2"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.

View File

@@ -1,4 +1,3 @@
import gleam/bool
import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
@@ -181,8 +180,10 @@ fn parse_authority_part(str: String) -> Result(#(Uri, String), Nil) {
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
fn parse_userinfo(str: String) -> #(Option(String), String) {
use <- bool.guard(when: !string.contains(str, "@"), return: #(None, str))
do_parse_userinfo(str, "")
case string.contains(str, "@") {
True -> do_parse_userinfo(str, "")
False -> #(None, str)
}
}
fn do_parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
@@ -726,53 +727,54 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
fn parse_sub_delim(str: String) {
case str {
"!" as l <> rest
| "$" as l <> rest
| "&" as l <> rest
| "'" as l <> rest
| "(" as l <> rest
| ")" as l <> rest
| "*" as l <> rest
| "+" as l <> rest
| "," as l <> rest
| ";" as l <> rest
| "=" as l <> rest -> Ok(#(l, rest))
// %21 / %24 / %26 / %27 / %28 / %29
// / %2A / %2B / %2C / %3B / %3D
fn parse_sub_delim(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x26 && i <= 0x2C -> Ok(#(char, tail))
_ if i == 0x21 -> Ok(#(char, tail))
_ if i == 0x24 -> Ok(#(char, tail))
_ if i == 0x3B -> Ok(#(char, tail))
_ if i == 0x3D -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
// DIGIT = %x3039
fn parse_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as l <> rest
| "1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest -> Ok(#(l, rest))
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
// DIGIT (non-zero) = %x3139
fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) {
case str {
"1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest -> Ok(#(l, rest))
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x31 && i <= 0x39 -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
fn parse_digits(str: String, digits: String) {
@@ -786,61 +788,18 @@ fn parse_digits(str: String, digits: String) {
// ALPHA = %x415A | %x617A
fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
case str {
"a" as l <> rest
| "b" as l <> rest
| "c" as l <> rest
| "d" as l <> rest
| "e" as l <> rest
| "f" as l <> rest
| "g" as l <> rest
| "h" as l <> rest
| "i" as l <> rest
| "j" as l <> rest
| "k" as l <> rest
| "l" as l <> rest
| "m" as l <> rest
| "n" as l <> rest
| "o" as l <> rest
| "p" as l <> rest
| "q" as l <> rest
| "r" as l <> rest
| "s" as l <> rest
| "t" as l <> rest
| "u" as l <> rest
| "v" as l <> rest
| "w" as l <> rest
| "x" as l <> rest
| "y" as l <> rest
| "z" as l <> rest
| "A" as l <> rest
| "B" as l <> rest
| "C" as l <> rest
| "D" as l <> rest
| "E" as l <> rest
| "F" as l <> rest
| "G" as l <> rest
| "H" as l <> rest
| "I" as l <> rest
| "J" as l <> rest
| "K" as l <> rest
| "L" as l <> rest
| "M" as l <> rest
| "N" as l <> rest
| "O" as l <> rest
| "P" as l <> rest
| "Q" as l <> rest
| "R" as l <> rest
| "S" as l <> rest
| "T" as l <> rest
| "U" as l <> rest
| "V" as l <> rest
| "W" as l <> rest
| "X" as l <> rest
| "Y" as l <> rest
| "Z" as l <> rest -> Ok(#(l, rest))
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
_ if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
pub fn parse_query_parts(query: String) -> Result(List(#(String, String)), Nil) {

View File

@@ -370,32 +370,20 @@ fn unescape_percent(str: String) -> String {
}
}
pub fn parse_hex_digit(str) {
case str {
"0" as l <> rest
| "1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest
| "a" as l <> rest
| "b" as l <> rest
| "c" as l <> rest
| "d" as l <> rest
| "e" as l <> rest
| "f" as l <> rest
| "A" as l <> rest
| "B" as l <> rest
| "C" as l <> rest
| "D" as l <> rest
| "E" as l <> rest
| "F" as l <> rest -> Ok(#(l, rest))
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
_ if i >= 0x41 && i <= 0x46 -> Ok(#(char, tail))
_ if i >= 0x61 && i <= 0x66 -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
}
pub fn parse_hex_digits(str, min, max) {

View File

@@ -27,6 +27,45 @@ pub fn parse_general_tests() {
),
))
}),
it("ai gen pass", fn() {
let _ = uri.parse("https://example.com") |> should.be_ok
let _ =
uri.parse("http://www.example.org/resource?id=123&lang=en")
|> should.be_ok
let _ =
uri.parse("ftp://ftp.example.net/pub/files/archive.tar.gz")
|> should.be_ok
let _ = uri.parse("mailto:user+alias@example.com") |> should.be_ok
let _ = uri.parse("urn:isbn:978-3-16-148410-0") |> should.be_ok
let _ =
uri.parse("ws://socket.example.com:8080/chat?room=42#section2")
|> should.be_ok
let _ =
uri.parse("https://sub.domain.co.uk/path/to/resource/") |> should.be_ok
let _ =
uri.parse("file:///C:/Windows/System32/drivers/etc/hosts")
|> should.be_ok
let _ =
uri.parse("git+ssh://git@example.com:2222/repo.git") |> should.be_ok
let _ =
uri.parse(
"https://xn--fsqu00a.xn--0zwm56d/%E8%B7%AF%E5%BE%84?%E6%9F%A5%E8%AF%A2=%E5%80%BC#%E7%89%87%E6%AE%B5",
)
|> should.be_ok
Nil
}),
it("ai gen fail", fn() {
let _ = uri.parse("ht!tp://example.com") |> should.be_error
let _ = uri.parse("http://exa mple.com") |> should.be_error
let _ = uri.parse("://missing-scheme.com") |> should.be_error
let _ = uri.parse("http://example.com:80a/") |> should.be_error
let _ = uri.parse("http://[2001:db8::1") |> should.be_error
let _ = uri.parse("http://example.com/%ZZ") |> should.be_error
let _ = uri.parse("http://example.com?%") |> should.be_error
let _ = uri.parse("`https://example.com/invalid") |> should.be_error
let _ = uri.parse("http://example.com?foo=bar%2") |> should.be_error
let _ = uri.parse("http://example.com:12345abc/") |> should.be_error
}),
])
}