Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3cd6d5d4af | |||
| a00af69b56 | |||
| c6ee27fa7a | |||
| 5c4a444231 | |||
| 452117db63 | |||
| 5da4ea66b1 | |||
| 321e203778 | |||
| 1ac5e05e1a | |||
| cc110b414f | |||
| 246706d4fc | |||
| 897124be27 |
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: erlef/setup-beam@v1
|
||||
with:
|
||||
otp-version: "28.0.4"
|
||||
otp-version: "28.1"
|
||||
gleam-version: "1.12.0"
|
||||
rebar3-version: "3.25.1"
|
||||
elixir-version: "1.18.4"
|
||||
|
||||
@@ -12,3 +12,8 @@
|
||||
|
||||
- Improved parsing performance significantly and reduced memory usage up to 50%
|
||||
- Significantly improved IPV4 parsing performance
|
||||
|
||||
## v2.0.2
|
||||
|
||||
- Minor performance improvement for uris with userinfo
|
||||
- More performance improvements for ascii/digit parsing
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name = "gluri"
|
||||
version = "2.0.1"
|
||||
version = "2.0.2"
|
||||
|
||||
# Fill out these fields if you intend to generate HTML documentation or publish
|
||||
# your project to the Hex package manager.
|
||||
|
||||
@@ -16,7 +16,7 @@ packages = [
|
||||
{ name = "gleam_json", version = "3.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "874FA3C3BB6E22DD2BB111966BD40B3759E9094E05257899A7C08F5DE77EC049" },
|
||||
{ name = "gleam_otp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "7987CBEBC8060B88F14575DEF546253F3116EBE2A5DA6FD82F38243FCE97C54B" },
|
||||
{ name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" },
|
||||
{ name = "gleam_stdlib", version = "0.63.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "E1D5EC07638F606E48F0EA1556044DD805F2ACE9092A6F6AFBE4A0CC4DA21C2F" },
|
||||
{ name = "gleam_stdlib", version = "0.63.2", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "962B25C667DA07F4CAB32001F44D3C41C1A89E58E3BBA54F183B482CF6122150" },
|
||||
{ name = "gleam_time", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "DCDDC040CE97DA3D2A925CDBBA08D8A78681139745754A83998641C8A3F6587E" },
|
||||
{ name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" },
|
||||
{ name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" },
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import gleam/bool
|
||||
import gleam/int
|
||||
import gleam/list
|
||||
import gleam/option.{type Option, None, Some}
|
||||
@@ -168,7 +167,7 @@ fn parse_authority(str: String) -> Result(#(Uri, String), Nil) {
|
||||
}
|
||||
|
||||
fn parse_authority_part(str: String) -> Result(#(Uri, String), Nil) {
|
||||
let #(userinfo, rest) = parse_userinfo(str, "")
|
||||
let #(userinfo, rest) = parse_userinfo(str)
|
||||
|
||||
use #(host, rest) <- result.try(parse_host(rest))
|
||||
|
||||
@@ -180,8 +179,14 @@ fn parse_authority_part(str: String) -> Result(#(Uri, String), Nil) {
|
||||
}
|
||||
|
||||
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
|
||||
fn parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
|
||||
use <- bool.guard(when: !string.contains(str, "@"), return: #(None, str))
|
||||
fn parse_userinfo(str: String) -> #(Option(String), String) {
|
||||
case string.contains(str, "@") {
|
||||
True -> do_parse_userinfo(str, "")
|
||||
False -> #(None, str)
|
||||
}
|
||||
}
|
||||
|
||||
fn do_parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
|
||||
case str {
|
||||
"@" <> rest -> #(Some(userinfo), rest)
|
||||
"" -> #(None, userinfo <> str)
|
||||
@@ -202,7 +207,7 @@ fn parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
|
||||
str,
|
||||
)
|
||||
{
|
||||
Ok(#(part, rest)) -> parse_userinfo(rest, userinfo <> part)
|
||||
Ok(#(part, rest)) -> do_parse_userinfo(rest, userinfo <> part)
|
||||
Error(_) -> #(None, userinfo <> str)
|
||||
}
|
||||
}
|
||||
@@ -421,7 +426,7 @@ fn parse_ipv4address(str: String) {
|
||||
// / "1" 2DIGIT ; 100-199
|
||||
// / "2" %x30-34 DIGIT ; 200-249
|
||||
// / "25" %x30-35 ; 250-255
|
||||
pub fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
|
||||
fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
|
||||
try_parsers(
|
||||
[
|
||||
parse_this_then(_, [
|
||||
@@ -486,7 +491,7 @@ pub fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
|
||||
}
|
||||
|
||||
// reg-name = *( unreserved / pct-encoded / sub-delims )
|
||||
pub fn parse_reg_name(str: String) {
|
||||
fn parse_reg_name(str: String) {
|
||||
// can't error
|
||||
|
||||
case do_parse_reg_name(str, "") {
|
||||
@@ -722,52 +727,53 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
|
||||
|
||||
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||
// / "*" / "+" / "," / ";" / "="
|
||||
fn parse_sub_delim(str: String) {
|
||||
case str {
|
||||
"!" as l <> rest
|
||||
| "$" as l <> rest
|
||||
| "&" as l <> rest
|
||||
| "'" as l <> rest
|
||||
| "(" as l <> rest
|
||||
| ")" as l <> rest
|
||||
| "*" as l <> rest
|
||||
| "+" as l <> rest
|
||||
| "," as l <> rest
|
||||
| ";" as l <> rest
|
||||
| "=" as l <> rest -> Ok(#(l, rest))
|
||||
_ -> Error(Nil)
|
||||
// %21 / %24 / %26 / %27 / %28 / %29
|
||||
// / %2A / %2B / %2C / %3B / %3D
|
||||
fn parse_sub_delim(str: String) -> Result(#(String, String), Nil) {
|
||||
case string.pop_grapheme(str) {
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
let i = string.utf_codepoint_to_int(codepoint)
|
||||
case i {
|
||||
_ if i >= 0x26 && i <= 0x2C -> Ok(#(char, tail))
|
||||
_ if i == 0x21 -> Ok(#(char, tail))
|
||||
_ if i == 0x24 -> Ok(#(char, tail))
|
||||
_ if i == 0x3B -> Ok(#(char, tail))
|
||||
_ if i == 0x3D -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
// DIGIT = %x30–39
|
||||
fn parse_digit(str: String) -> Result(#(String, String), Nil) {
|
||||
case str {
|
||||
"0" as l <> rest
|
||||
| "1" as l <> rest
|
||||
| "2" as l <> rest
|
||||
| "3" as l <> rest
|
||||
| "4" as l <> rest
|
||||
| "5" as l <> rest
|
||||
| "6" as l <> rest
|
||||
| "7" as l <> rest
|
||||
| "8" as l <> rest
|
||||
| "9" as l <> rest -> Ok(#(l, rest))
|
||||
_ -> Error(Nil)
|
||||
case string.pop_grapheme(str) {
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
let i = string.utf_codepoint_to_int(codepoint)
|
||||
case i {
|
||||
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
// DIGIT (non-zero) = %x31–39
|
||||
fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) {
|
||||
case str {
|
||||
"1" as l <> rest
|
||||
| "2" as l <> rest
|
||||
| "3" as l <> rest
|
||||
| "4" as l <> rest
|
||||
| "5" as l <> rest
|
||||
| "6" as l <> rest
|
||||
| "7" as l <> rest
|
||||
| "8" as l <> rest
|
||||
| "9" as l <> rest -> Ok(#(l, rest))
|
||||
_ -> Error(Nil)
|
||||
case string.pop_grapheme(str) {
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
let i = string.utf_codepoint_to_int(codepoint)
|
||||
case i {
|
||||
_ if i >= 0x31 && i <= 0x39 -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -782,60 +788,17 @@ fn parse_digits(str: String, digits: String) {
|
||||
|
||||
// ALPHA = %x41–5A | %x61–7A
|
||||
fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
|
||||
case str {
|
||||
"a" as l <> rest
|
||||
| "b" as l <> rest
|
||||
| "c" as l <> rest
|
||||
| "d" as l <> rest
|
||||
| "e" as l <> rest
|
||||
| "f" as l <> rest
|
||||
| "g" as l <> rest
|
||||
| "h" as l <> rest
|
||||
| "i" as l <> rest
|
||||
| "j" as l <> rest
|
||||
| "k" as l <> rest
|
||||
| "l" as l <> rest
|
||||
| "m" as l <> rest
|
||||
| "n" as l <> rest
|
||||
| "o" as l <> rest
|
||||
| "p" as l <> rest
|
||||
| "q" as l <> rest
|
||||
| "r" as l <> rest
|
||||
| "s" as l <> rest
|
||||
| "t" as l <> rest
|
||||
| "u" as l <> rest
|
||||
| "v" as l <> rest
|
||||
| "w" as l <> rest
|
||||
| "x" as l <> rest
|
||||
| "y" as l <> rest
|
||||
| "z" as l <> rest
|
||||
| "A" as l <> rest
|
||||
| "B" as l <> rest
|
||||
| "C" as l <> rest
|
||||
| "D" as l <> rest
|
||||
| "E" as l <> rest
|
||||
| "F" as l <> rest
|
||||
| "G" as l <> rest
|
||||
| "H" as l <> rest
|
||||
| "I" as l <> rest
|
||||
| "J" as l <> rest
|
||||
| "K" as l <> rest
|
||||
| "L" as l <> rest
|
||||
| "M" as l <> rest
|
||||
| "N" as l <> rest
|
||||
| "O" as l <> rest
|
||||
| "P" as l <> rest
|
||||
| "Q" as l <> rest
|
||||
| "R" as l <> rest
|
||||
| "S" as l <> rest
|
||||
| "T" as l <> rest
|
||||
| "U" as l <> rest
|
||||
| "V" as l <> rest
|
||||
| "W" as l <> rest
|
||||
| "X" as l <> rest
|
||||
| "Y" as l <> rest
|
||||
| "Z" as l <> rest -> Ok(#(l, rest))
|
||||
_ -> Error(Nil)
|
||||
case string.pop_grapheme(str) {
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
let i = string.utf_codepoint_to_int(codepoint)
|
||||
case i {
|
||||
_ if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
|
||||
_ if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -370,31 +370,19 @@ fn unescape_percent(str: String) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_hex_digit(str) {
|
||||
case str {
|
||||
"0" as l <> rest
|
||||
| "1" as l <> rest
|
||||
| "2" as l <> rest
|
||||
| "3" as l <> rest
|
||||
| "4" as l <> rest
|
||||
| "5" as l <> rest
|
||||
| "6" as l <> rest
|
||||
| "7" as l <> rest
|
||||
| "8" as l <> rest
|
||||
| "9" as l <> rest
|
||||
| "a" as l <> rest
|
||||
| "b" as l <> rest
|
||||
| "c" as l <> rest
|
||||
| "d" as l <> rest
|
||||
| "e" as l <> rest
|
||||
| "f" as l <> rest
|
||||
| "A" as l <> rest
|
||||
| "B" as l <> rest
|
||||
| "C" as l <> rest
|
||||
| "D" as l <> rest
|
||||
| "E" as l <> rest
|
||||
| "F" as l <> rest -> Ok(#(l, rest))
|
||||
_ -> Error(Nil)
|
||||
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
|
||||
case string.pop_grapheme(str) {
|
||||
Ok(#(char, tail)) -> {
|
||||
let assert [codepoint] = string.to_utf_codepoints(char)
|
||||
let i = string.utf_codepoint_to_int(codepoint)
|
||||
case i {
|
||||
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
|
||||
_ if i >= 0x41 && i <= 0x46 -> Ok(#(char, tail))
|
||||
_ if i >= 0x61 && i <= 0x66 -> Ok(#(char, tail))
|
||||
_ -> Error(Nil)
|
||||
}
|
||||
}
|
||||
Error(_) -> Error(Nil)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import gleam/uri as uri2
|
||||
import gluri as uri
|
||||
import gluri/internal/parser
|
||||
import glychee/benchmark
|
||||
import glychee/configuration
|
||||
|
||||
@@ -15,43 +14,43 @@ pub fn main() {
|
||||
// ip_benchmark()
|
||||
}
|
||||
|
||||
@target(erlang)
|
||||
pub fn ip_benchmark() {
|
||||
benchmark.run(
|
||||
[
|
||||
benchmark.Function("ip_benchmark", fn(data) {
|
||||
fn() {
|
||||
let _ = parser.parse_dec_octet(data)
|
||||
Nil
|
||||
}
|
||||
}),
|
||||
],
|
||||
[
|
||||
benchmark.Data("173", "173"),
|
||||
benchmark.Data("5", "5"),
|
||||
benchmark.Data("200", "200"),
|
||||
benchmark.Data("255", "255"),
|
||||
benchmark.Data("fail", "2b"),
|
||||
],
|
||||
)
|
||||
}
|
||||
// @target(erlang)
|
||||
// pub fn ip_benchmark() {
|
||||
// benchmark.run(
|
||||
// [
|
||||
// benchmark.Function("ip_benchmark", fn(data) {
|
||||
// fn() {
|
||||
// let _ = parser.parse_dec_octet(data)
|
||||
// Nil
|
||||
// }
|
||||
// }),
|
||||
// ],
|
||||
// [
|
||||
// benchmark.Data("173", "173"),
|
||||
// benchmark.Data("5", "5"),
|
||||
// benchmark.Data("200", "200"),
|
||||
// benchmark.Data("255", "255"),
|
||||
// benchmark.Data("fail", "2b"),
|
||||
// ],
|
||||
// )
|
||||
// }
|
||||
|
||||
@target(erlang)
|
||||
pub fn reg_name_benchmark() {
|
||||
benchmark.run(
|
||||
[
|
||||
benchmark.Function("reg_name_benchmark", fn(data) {
|
||||
fn() {
|
||||
let _ = parser.parse_reg_name(data)
|
||||
Nil
|
||||
}
|
||||
}),
|
||||
],
|
||||
[
|
||||
benchmark.Data("long", "github.com"),
|
||||
],
|
||||
)
|
||||
}
|
||||
// @target(erlang)
|
||||
// pub fn reg_name_benchmark() {
|
||||
// benchmark.run(
|
||||
// [
|
||||
// benchmark.Function("reg_name_benchmark", fn(data) {
|
||||
// fn() {
|
||||
// let _ = parser.parse_reg_name(data)
|
||||
// Nil
|
||||
// }
|
||||
// }),
|
||||
// ],
|
||||
// [
|
||||
// benchmark.Data("long", "github.com"),
|
||||
// ],
|
||||
// )
|
||||
// }
|
||||
|
||||
@target(erlang)
|
||||
pub fn parse_benchmark() {
|
||||
@@ -75,6 +74,10 @@ pub fn parse_benchmark() {
|
||||
"long",
|
||||
"https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
|
||||
),
|
||||
benchmark.Data(
|
||||
"with user",
|
||||
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
|
||||
),
|
||||
benchmark.Data("ipv4", "https://192.255.36.4/"),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -9,6 +9,66 @@ pub fn main() {
|
||||
startest.run(startest.default_config())
|
||||
}
|
||||
|
||||
pub fn parse_general_tests() {
|
||||
describe("general parsing", [
|
||||
it("mailto parsing", fn() {
|
||||
uri.parse("mailto:Joe@example.com")
|
||||
|> should.equal(Ok(
|
||||
Uri(..empty, scheme: Some("mailto"), path: "Joe@example.com"),
|
||||
))
|
||||
uri.parse("mailto:Joe@example.com?hello#bye")
|
||||
|> should.equal(Ok(
|
||||
Uri(
|
||||
..empty,
|
||||
scheme: Some("mailto"),
|
||||
path: "Joe@example.com",
|
||||
query: Some("hello"),
|
||||
fragment: Some("bye"),
|
||||
),
|
||||
))
|
||||
}),
|
||||
it("ai gen pass", fn() {
|
||||
let _ = uri.parse("https://example.com") |> should.be_ok
|
||||
let _ =
|
||||
uri.parse("http://www.example.org/resource?id=123&lang=en")
|
||||
|> should.be_ok
|
||||
let _ =
|
||||
uri.parse("ftp://ftp.example.net/pub/files/archive.tar.gz")
|
||||
|> should.be_ok
|
||||
let _ = uri.parse("mailto:user+alias@example.com") |> should.be_ok
|
||||
let _ = uri.parse("urn:isbn:978-3-16-148410-0") |> should.be_ok
|
||||
let _ =
|
||||
uri.parse("ws://socket.example.com:8080/chat?room=42#section2")
|
||||
|> should.be_ok
|
||||
let _ =
|
||||
uri.parse("https://sub.domain.co.uk/path/to/resource/") |> should.be_ok
|
||||
let _ =
|
||||
uri.parse("file:///C:/Windows/System32/drivers/etc/hosts")
|
||||
|> should.be_ok
|
||||
let _ =
|
||||
uri.parse("git+ssh://git@example.com:2222/repo.git") |> should.be_ok
|
||||
let _ =
|
||||
uri.parse(
|
||||
"https://xn--fsqu00a.xn--0zwm56d/%E8%B7%AF%E5%BE%84?%E6%9F%A5%E8%AF%A2=%E5%80%BC#%E7%89%87%E6%AE%B5",
|
||||
)
|
||||
|> should.be_ok
|
||||
Nil
|
||||
}),
|
||||
it("ai gen fail", fn() {
|
||||
let _ = uri.parse("ht!tp://example.com") |> should.be_error
|
||||
let _ = uri.parse("http://exa mple.com") |> should.be_error
|
||||
let _ = uri.parse("://missing-scheme.com") |> should.be_error
|
||||
let _ = uri.parse("http://example.com:80a/") |> should.be_error
|
||||
let _ = uri.parse("http://[2001:db8::1") |> should.be_error
|
||||
let _ = uri.parse("http://example.com/%ZZ") |> should.be_error
|
||||
let _ = uri.parse("http://example.com?%") |> should.be_error
|
||||
let _ = uri.parse("`https://example.com/invalid") |> should.be_error
|
||||
let _ = uri.parse("http://example.com?foo=bar%2") |> should.be_error
|
||||
let _ = uri.parse("http://example.com:12345abc/") |> should.be_error
|
||||
}),
|
||||
])
|
||||
}
|
||||
|
||||
pub fn parse_scheme_tests() {
|
||||
describe("scheme parsing", [
|
||||
it("simple parse", fn() {
|
||||
|
||||
Reference in New Issue
Block a user