20 Commits
v2.0.1 ... main

Author SHA1 Message Date
af619a54da docs: Updated changelog
Some checks failed
test / test (push) Has been cancelled
2025-10-27 19:19:06 +00:00
e51116c2b2 fix: Fix uri encoding/decoding and add new tests
Some checks failed
test / test (push) Has been cancelled
2025-10-27 19:13:37 +00:00
56e3682237 build: Update version & repository 2025-10-27 19:13:37 +00:00
7f631dc13c perf: Changed benchmarks titles 2025-10-27 19:13:37 +00:00
91807aac61 perf: Removed erlang/js specific parsing
Some checks failed
test / test (push) Has been cancelled
This currently seems to compile exactly the same in 1.12.0 and 1.13.0 so
therefore has a performance regression
2025-10-19 19:16:33 +01:00
7b7e689892 perf: Added benchmarks and changelog
Some checks failed
test / test (push) Has been cancelled
2025-10-03 11:15:59 +01:00
22d13bdf7d perf: Split parsers into erlang/js targets 2025-10-03 11:10:36 +01:00
3fc9a61afe build: Updated version 2025-10-03 10:59:46 +01:00
b9d1077425 perf: Revert back to pattern matching for ranges
May need to do more in depth checks and optimisations may need to be
removed if the core gleam compiler fixes the performance issues with
pattern matching + capture.
Possibly pattern matching may be better for smaller ranges but would
need to do benchmarks to find out what the cut-off point is.
2025-10-03 10:55:48 +01:00
3cd6d5d4af perf: Fix minor perf issue for JS 2025-09-22 12:55:09 +01:00
a00af69b56 build: Updated version 2025-09-22 11:53:13 +01:00
c6ee27fa7a docs: Updated changelog 2025-09-22 11:46:34 +01:00
5c4a444231 perf: Reworked ascii/digit parsing to speed up things 2025-09-22 11:45:20 +01:00
452117db63 test: Added some random uris for testing 2025-09-22 11:44:54 +01:00
5da4ea66b1 refactor: Rewrote bool.guard to standard pattern match 2025-09-18 18:22:34 +01:00
321e203778 build: Update erlang version for github actions 2025-09-17 18:24:19 +01:00
1ac5e05e1a build: Update stdlib dependency 2025-09-16 09:58:15 +01:00
cc110b414f docs: Updated changelog 2025-09-15 15:26:57 +01:00
246706d4fc test: Added tests for mailto 2025-09-15 15:03:36 +01:00
897124be27 perf: Tweak to userinfo parse to not repeat @ check 2025-09-15 14:39:39 +01:00
12 changed files with 589 additions and 193 deletions

View File

@@ -14,7 +14,7 @@ jobs:
- uses: actions/checkout@v4
- uses: erlef/setup-beam@v1
with:
otp-version: "28.0.4"
otp-version: "28.1"
gleam-version: "1.12.0"
rebar3-version: "3.25.1"
elixir-version: "1.18.4"

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
*.ez
/build
erl_crash.dump
node_modules

View File

@@ -12,3 +12,18 @@
- Improved parsing performance significantly and reduced memory usage up to 50%
- Significantly improved IPV4 parsing performance
## v2.0.2
- Minor performance improvement for uris with userinfo
- More performance improvements for ascii/digit parsing
## 2.0.3
- Minor performance improvement for erlang
- Major performance improvement for js
## 2.0.4
- Reverted some optimisations as they are unnecessary for Gleam v1.14.0+
- Fix uri encoding/decoding (I think)

View File

@@ -1,12 +1,12 @@
name = "gluri"
version = "2.0.1"
version = "2.0.4"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.
#
description = "Uri (RFC 3986) library for Gleam"
licences = ["Apache-2.0"]
repository = { type = "github", user = "pendletong", repo = "uri" }
repository = { type = "gitea", host = "git.pendleton.ie", user = "pendletong", repo = "uri" }
links = [{ title = "RFC 3986", href = "https://www.ietf.org/rfc/rfc3986.txt" }]
#
# For a full reference of all the available options, you can have a look at

View File

@@ -3,7 +3,7 @@
packages = [
{ name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" },
{ name = "benchee", version = "1.4.0", build_tools = ["mix"], requirements = ["deep_merge", "statistex", "table"], otp_app = "benchee", source = "hex", outer_checksum = "299CD10DD8CE51C9EA3DDB74BB150F93D25E968F93E4C1FA31698A8E4FA5D715" },
{ name = "benchee", version = "1.5.0", build_tools = ["mix"], requirements = ["deep_merge", "statistex", "table"], otp_app = "benchee", source = "hex", outer_checksum = "5B075393AEA81B8AE74EADD1C28B1D87E8A63696C649D8293DB7C4DF3EB67535" },
{ name = "bigben", version = "1.0.1", build_tools = ["gleam"], requirements = ["birl", "gleam_erlang", "gleam_otp", "gleam_stdlib"], otp_app = "bigben", source = "hex", outer_checksum = "190E489610A80D76C48BACC75EB8314BD184FF0220AB0F251ABE760B993B91BB" },
{ name = "birl", version = "1.8.0", build_tools = ["gleam"], requirements = ["gleam_regexp", "gleam_stdlib", "ranger"], otp_app = "birl", source = "hex", outer_checksum = "2AC7BA26F998E3DFADDB657148BD5DDFE966958AD4D6D6957DD0D22E5B56C400" },
{ name = "deep_merge", version = "1.0.0", build_tools = ["mix"], requirements = [], otp_app = "deep_merge", source = "hex", outer_checksum = "CE708E5F094B9CD4E8F2BE4F00D2F4250C4095BE93F8CD6D018C753894885430" },
@@ -14,9 +14,9 @@ packages = [
{ name = "gleam_erlang", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "1124AD3AA21143E5AF0FC5CF3D9529F6DB8CA03E43A55711B60B6B7B3874375C" },
{ name = "gleam_javascript", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_javascript", source = "hex", outer_checksum = "EF6C77A506F026C6FB37941889477CD5E4234FCD4337FF0E9384E297CB8F97EB" },
{ name = "gleam_json", version = "3.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "874FA3C3BB6E22DD2BB111966BD40B3759E9094E05257899A7C08F5DE77EC049" },
{ name = "gleam_otp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "7987CBEBC8060B88F14575DEF546253F3116EBE2A5DA6FD82F38243FCE97C54B" },
{ name = "gleam_otp", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "BA6A294E295E428EC1562DC1C11EA7530DCB981E8359134BEABC8493B7B2258E" },
{ name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" },
{ name = "gleam_stdlib", version = "0.63.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "E1D5EC07638F606E48F0EA1556044DD805F2ACE9092A6F6AFBE4A0CC4DA21C2F" },
{ name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" },
{ name = "gleam_time", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "DCDDC040CE97DA3D2A925CDBBA08D8A78681139745754A83998641C8A3F6587E" },
{ name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" },
{ name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" },

18
package-lock.json generated Normal file
View File

@@ -0,0 +1,18 @@
{
"name": "uri",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"mitata": "^1.0.34"
}
},
"node_modules/mitata": {
"version": "1.0.34",
"resolved": "https://registry.npmjs.org/mitata/-/mitata-1.0.34.tgz",
"integrity": "sha512-Mc3zrtNBKIMeHSCQ0XqRLo1vbdIx1wvFV9c8NJAiyho6AjNfMY8bVhbS12bwciUdd1t4rj8099CH3N3NFahaUA==",
"license": "MIT"
}
}
}

6
package.json Normal file
View File

@@ -0,0 +1,6 @@
{
"type": "module",
"dependencies": {
"mitata": "^1.0.34"
}
}

View File

@@ -1,4 +1,3 @@
import gleam/bool
import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
@@ -168,7 +167,7 @@ fn parse_authority(str: String) -> Result(#(Uri, String), Nil) {
}
fn parse_authority_part(str: String) -> Result(#(Uri, String), Nil) {
let #(userinfo, rest) = parse_userinfo(str, "")
let #(userinfo, rest) = parse_userinfo(str)
use #(host, rest) <- result.try(parse_host(rest))
@@ -180,8 +179,14 @@ fn parse_authority_part(str: String) -> Result(#(Uri, String), Nil) {
}
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
fn parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
use <- bool.guard(when: !string.contains(str, "@"), return: #(None, str))
fn parse_userinfo(str: String) -> #(Option(String), String) {
case string.contains(str, "@") {
True -> do_parse_userinfo(str, "")
False -> #(None, str)
}
}
fn do_parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
case str {
"@" <> rest -> #(Some(userinfo), rest)
"" -> #(None, userinfo <> str)
@@ -202,7 +207,7 @@ fn parse_userinfo(str: String, userinfo: String) -> #(Option(String), String) {
str,
)
{
Ok(#(part, rest)) -> parse_userinfo(rest, userinfo <> part)
Ok(#(part, rest)) -> do_parse_userinfo(rest, userinfo <> part)
Error(_) -> #(None, userinfo <> str)
}
}
@@ -421,7 +426,7 @@ fn parse_ipv4address(str: String) {
// / "1" 2DIGIT ; 100-199
// / "2" %x30-34 DIGIT ; 200-249
// / "25" %x30-35 ; 250-255
pub fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
try_parsers(
[
parse_this_then(_, [
@@ -486,7 +491,7 @@ pub fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
}
// reg-name = *( unreserved / pct-encoded / sub-delims )
pub fn parse_reg_name(str: String) {
fn parse_reg_name(str: String) {
// can't error
case do_parse_reg_name(str, "") {
@@ -722,19 +727,21 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
fn parse_sub_delim(str: String) {
// %21 / %24 / %26 / %27 / %28 / %29
// / %2A / %2B / %2C / %3B / %3D
fn parse_sub_delim(str: String) -> Result(#(String, String), Nil) {
case str {
"!" as l <> rest
| "$" as l <> rest
| "&" as l <> rest
| "'" as l <> rest
| "(" as l <> rest
| ")" as l <> rest
| "*" as l <> rest
| "+" as l <> rest
| "," as l <> rest
| ";" as l <> rest
| "=" as l <> rest -> Ok(#(l, rest))
"!" as char <> tail
| "$" as char <> tail
| "&" as char <> tail
| "'" as char <> tail
| "(" as char <> tail
| ")" as char <> tail
| "*" as char <> tail
| "+" as char <> tail
| "," as char <> tail
| ";" as char <> tail
| "=" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
@@ -742,31 +749,34 @@ fn parse_sub_delim(str: String) {
// DIGIT = %x3039
fn parse_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as l <> rest
| "1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest -> Ok(#(l, rest))
"0" as char <> tail
| "1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
// DIGIT (non-zero) = %x3139
fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) {
case str {
"1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest -> Ok(#(l, rest))
"1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
@@ -783,58 +793,59 @@ fn parse_digits(str: String, digits: String) {
// ALPHA = %x415A | %x617A
fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
case str {
"a" as l <> rest
| "b" as l <> rest
| "c" as l <> rest
| "d" as l <> rest
| "e" as l <> rest
| "f" as l <> rest
| "g" as l <> rest
| "h" as l <> rest
| "i" as l <> rest
| "j" as l <> rest
| "k" as l <> rest
| "l" as l <> rest
| "m" as l <> rest
| "n" as l <> rest
| "o" as l <> rest
| "p" as l <> rest
| "q" as l <> rest
| "r" as l <> rest
| "s" as l <> rest
| "t" as l <> rest
| "u" as l <> rest
| "v" as l <> rest
| "w" as l <> rest
| "x" as l <> rest
| "y" as l <> rest
| "z" as l <> rest
| "A" as l <> rest
| "B" as l <> rest
| "C" as l <> rest
| "D" as l <> rest
| "E" as l <> rest
| "F" as l <> rest
| "G" as l <> rest
| "H" as l <> rest
| "I" as l <> rest
| "J" as l <> rest
| "K" as l <> rest
| "L" as l <> rest
| "M" as l <> rest
| "N" as l <> rest
| "O" as l <> rest
| "P" as l <> rest
| "Q" as l <> rest
| "R" as l <> rest
| "S" as l <> rest
| "T" as l <> rest
| "U" as l <> rest
| "V" as l <> rest
| "W" as l <> rest
| "X" as l <> rest
| "Y" as l <> rest
| "Z" as l <> rest -> Ok(#(l, rest))
"a" as char <> tail
| "b" as char <> tail
| "c" as char <> tail
| "d" as char <> tail
| "e" as char <> tail
| "f" as char <> tail
| "g" as char <> tail
| "h" as char <> tail
| "i" as char <> tail
| "j" as char <> tail
| "k" as char <> tail
| "l" as char <> tail
| "m" as char <> tail
| "n" as char <> tail
| "o" as char <> tail
| "p" as char <> tail
| "q" as char <> tail
| "r" as char <> tail
| "s" as char <> tail
| "t" as char <> tail
| "u" as char <> tail
| "v" as char <> tail
| "w" as char <> tail
| "x" as char <> tail
| "y" as char <> tail
| "z" as char <> tail
| "A" as char <> tail
| "B" as char <> tail
| "C" as char <> tail
| "D" as char <> tail
| "E" as char <> tail
| "F" as char <> tail
| "G" as char <> tail
| "H" as char <> tail
| "I" as char <> tail
| "J" as char <> tail
| "K" as char <> tail
| "L" as char <> tail
| "M" as char <> tail
| "N" as char <> tail
| "O" as char <> tail
| "P" as char <> tail
| "Q" as char <> tail
| "R" as char <> tail
| "S" as char <> tail
| "T" as char <> tail
| "U" as char <> tail
| "V" as char <> tail
| "W" as char <> tail
| "X" as char <> tail
| "Y" as char <> tail
| "Z" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}

View File

@@ -109,7 +109,7 @@ pub fn parse_min_max(
do_parse_min_max(str, "", min, max, parse_fn)
}
pub fn do_parse_min_max(
fn do_parse_min_max(
str: d,
acc: String,
min: Int,
@@ -370,30 +370,31 @@ fn unescape_percent(str: String) -> String {
}
}
pub fn parse_hex_digit(str) {
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as l <> rest
| "1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest
| "a" as l <> rest
| "b" as l <> rest
| "c" as l <> rest
| "d" as l <> rest
| "e" as l <> rest
| "f" as l <> rest
| "A" as l <> rest
| "B" as l <> rest
| "C" as l <> rest
| "D" as l <> rest
| "E" as l <> rest
| "F" as l <> rest -> Ok(#(l, rest))
"0" as char <> tail
| "1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail
| "a" as char <> tail
| "b" as char <> tail
| "c" as char <> tail
| "d" as char <> tail
| "e" as char <> tail
| "f" as char <> tail
| "A" as char <> tail
| "B" as char <> tail
| "C" as char <> tail
| "D" as char <> tail
| "E" as char <> tail
| "F" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
@@ -402,14 +403,6 @@ pub fn parse_hex_digits(str, min, max) {
parse_min_max(str, min, max, parse_hex_digit)
}
fn encoding_not_needed(i: Int) -> Bool {
// $-_.+!*'()
case i {
36 | 45 | 95 | 46 | 43 | 33 | 42 | 39 | 40 | 41 -> True
_ -> False
}
}
fn is_unreserved_char(i: Int) -> Bool {
case i {
45 | 46 | 95 | 126 -> True
@@ -659,13 +652,13 @@ pub fn do_percent_encode(str: String) -> String {
fn encode_codepoint(codepoint: Int) -> String {
case codepoint <= 127 {
True -> {
case is_unreserved_char(codepoint) || encoding_not_needed(codepoint) {
case is_unreserved_char(codepoint) {
True -> {
let assert Ok(cpnt) = string.utf_codepoint(codepoint)
string.from_utf_codepoints([cpnt])
}
False -> {
"%" <> int.to_base16(codepoint)
"%" <> string.pad_start(int.to_base16(codepoint), 2, "0")
}
}
}

View File

@@ -1,6 +1,6 @@
import gleam/string
import gleam/uri as uri2
import gluri as uri
import gluri/internal/parser
import glychee/benchmark
import glychee/configuration
@@ -10,48 +10,49 @@ pub fn main() {
configuration.set_pair(configuration.Warmup, 2)
configuration.set_pair(configuration.Parallel, 2)
// pop_benchmark()
parse_benchmark()
// reg_name_benchmark()
// ip_benchmark()
}
@target(erlang)
pub fn ip_benchmark() {
benchmark.run(
[
benchmark.Function("ip_benchmark", fn(data) {
fn() {
let _ = parser.parse_dec_octet(data)
Nil
}
}),
],
[
benchmark.Data("173", "173"),
benchmark.Data("5", "5"),
benchmark.Data("200", "200"),
benchmark.Data("255", "255"),
benchmark.Data("fail", "2b"),
],
)
}
// @target(erlang)
// pub fn ip_benchmark() {
// benchmark.run(
// [
// benchmark.Function("ip_benchmark", fn(data) {
// fn() {
// let _ = parser.parse_dec_octet(data)
// Nil
// }
// }),
// ],
// [
// benchmark.Data("173", "173"),
// benchmark.Data("5", "5"),
// benchmark.Data("200", "200"),
// benchmark.Data("255", "255"),
// benchmark.Data("fail", "2b"),
// ],
// )
// }
@target(erlang)
pub fn reg_name_benchmark() {
benchmark.run(
[
benchmark.Function("reg_name_benchmark", fn(data) {
fn() {
let _ = parser.parse_reg_name(data)
Nil
}
}),
],
[
benchmark.Data("long", "github.com"),
],
)
}
// @target(erlang)
// pub fn reg_name_benchmark() {
// benchmark.run(
// [
// benchmark.Function("reg_name_benchmark", fn(data) {
// fn() {
// let _ = parser.parse_reg_name(data)
// Nil
// }
// }),
// ],
// [
// benchmark.Data("long", "github.com"),
// ],
// )
// }
@target(erlang)
pub fn parse_benchmark() {
@@ -75,7 +76,288 @@ pub fn parse_benchmark() {
"long",
"https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
benchmark.Data(
"with user",
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}
@target(erlang)
pub fn pop_benchmark() {
benchmark.run(
[
benchmark.Function("pop with range", fn(data) { fn() { pop(data, "") } }),
benchmark.Function("pop check char", fn(data) { fn() { pop4(data, "") } }),
benchmark.Function("pop check result", fn(data) {
fn() { pop5(data, "") }
}),
benchmark.Function("letter as var <> tail", fn(data) {
fn() { pop2(data, "") }
}),
benchmark.Function("letter <> tail", fn(data) { fn() { pop3(data, "") } }),
],
[
// benchmark.Data("long", "abcdefghijklmnopqrstuvwxyz"),
benchmark.Data(
"with user",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
// benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}
pub fn pop(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x41 && i <= 0x5A -> pop(tail, char)
_ if i >= 0x61 && i <= 0x7A -> pop(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop2(input, _) {
case input {
"a" as j <> tail
| "b" as j <> tail
| "c" as j <> tail
| "d" as j <> tail
| "e" as j <> tail
| "f" as j <> tail
| "g" as j <> tail
| "h" as j <> tail
| "i" as j <> tail
| "j" as j <> tail
| "k" as j <> tail
| "l" as j <> tail
| "m" as j <> tail
| "n" as j <> tail
| "o" as j <> tail
| "p" as j <> tail
| "q" as j <> tail
| "r" as j <> tail
| "s" as j <> tail
| "t" as j <> tail
| "u" as j <> tail
| "v" as j <> tail
| "w" as j <> tail
| "x" as j <> tail
| "y" as j <> tail
| "z" as j <> tail
| "A" as j <> tail
| "B" as j <> tail
| "C" as j <> tail
| "D" as j <> tail
| "E" as j <> tail
| "F" as j <> tail
| "G" as j <> tail
| "H" as j <> tail
| "I" as j <> tail
| "J" as j <> tail
| "K" as j <> tail
| "L" as j <> tail
| "M" as j <> tail
| "N" as j <> tail
| "O" as j <> tail
| "P" as j <> tail
| "Q" as j <> tail
| "R" as j <> tail
| "S" as j <> tail
| "T" as j <> tail
| "U" as j <> tail
| "V" as j <> tail
| "W" as j <> tail
| "X" as j <> tail
| "Y" as j <> tail
| "Z" as j <> tail -> pop2(tail, j)
_ -> Nil
}
}
pub fn pop3(input, _) {
case input {
"a" <> tail
| "b" <> tail
| "c" <> tail
| "d" <> tail
| "e" <> tail
| "f" <> tail
| "g" <> tail
| "h" <> tail
| "i" <> tail
| "j" <> tail
| "k" <> tail
| "l" <> tail
| "m" <> tail
| "n" <> tail
| "o" <> tail
| "p" <> tail
| "q" <> tail
| "r" <> tail
| "s" <> tail
| "t" <> tail
| "u" <> tail
| "v" <> tail
| "w" <> tail
| "x" <> tail
| "y" <> tail
| "z" <> tail
| "A" <> tail
| "B" <> tail
| "C" <> tail
| "D" <> tail
| "E" <> tail
| "F" <> tail
| "G" <> tail
| "H" <> tail
| "I" <> tail
| "J" <> tail
| "K" <> tail
| "L" <> tail
| "M" <> tail
| "N" <> tail
| "O" <> tail
| "P" <> tail
| "Q" <> tail
| "R" <> tail
| "S" <> tail
| "T" <> tail
| "U" <> tail
| "V" <> tail
| "W" <> tail
| "X" <> tail
| "Y" <> tail
| "Z" <> tail -> pop3(tail, "")
_ -> Nil
}
}
pub fn pop4(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
case char {
"a"
| "b"
| "c"
| "d"
| "e"
| "f"
| "g"
| "h"
| "i"
| "j"
| "k"
| "l"
| "m"
| "n"
| "o"
| "p"
| "q"
| "r"
| "s"
| "t"
| "u"
| "v"
| "w"
| "x"
| "y"
| "z"
| "A"
| "B"
| "C"
| "D"
| "E"
| "F"
| "G"
| "H"
| "I"
| "J"
| "K"
| "L"
| "M"
| "N"
| "O"
| "P"
| "Q"
| "R"
| "S"
| "T"
| "U"
| "V"
| "W"
| "X"
| "Y"
| "Z" -> pop4(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop5(input, _) {
case string.pop_grapheme(input) {
Ok(#("a" as char, tail))
| Ok(#("b" as char, tail))
| Ok(#("c" as char, tail))
| Ok(#("d" as char, tail))
| Ok(#("e" as char, tail))
| Ok(#("f" as char, tail))
| Ok(#("g" as char, tail))
| Ok(#("h" as char, tail))
| Ok(#("i" as char, tail))
| Ok(#("j" as char, tail))
| Ok(#("k" as char, tail))
| Ok(#("l" as char, tail))
| Ok(#("m" as char, tail))
| Ok(#("n" as char, tail))
| Ok(#("o" as char, tail))
| Ok(#("p" as char, tail))
| Ok(#("q" as char, tail))
| Ok(#("r" as char, tail))
| Ok(#("s" as char, tail))
| Ok(#("t" as char, tail))
| Ok(#("u" as char, tail))
| Ok(#("v" as char, tail))
| Ok(#("w" as char, tail))
| Ok(#("x" as char, tail))
| Ok(#("y" as char, tail))
| Ok(#("z" as char, tail))
| Ok(#("A" as char, tail))
| Ok(#("B" as char, tail))
| Ok(#("C" as char, tail))
| Ok(#("D" as char, tail))
| Ok(#("E" as char, tail))
| Ok(#("F" as char, tail))
| Ok(#("G" as char, tail))
| Ok(#("H" as char, tail))
| Ok(#("I" as char, tail))
| Ok(#("J" as char, tail))
| Ok(#("K" as char, tail))
| Ok(#("L" as char, tail))
| Ok(#("M" as char, tail))
| Ok(#("N" as char, tail))
| Ok(#("O" as char, tail))
| Ok(#("P" as char, tail))
| Ok(#("Q" as char, tail))
| Ok(#("R" as char, tail))
| Ok(#("S" as char, tail))
| Ok(#("T" as char, tail))
| Ok(#("U" as char, tail))
| Ok(#("V" as char, tail))
| Ok(#("W" as char, tail))
| Ok(#("X" as char, tail))
| Ok(#("Y" as char, tail))
| Ok(#("Z" as char, tail)) -> pop4(tail, char)
_ -> Nil
}
}

16
test/benchmark.js Normal file
View File

@@ -0,0 +1,16 @@
import { run, bench, boxplot, summary } from "mitata";
import { parse } from "../build/dev/javascript/gluri/gluri.mjs";
import { parse as parse2 } from "../build/dev/javascript/gleam_stdlib/gleam/uri.mjs";
bench("parse", () =>
parse(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
bench("parse2", () =>
parse2(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
await run();

View File

@@ -9,6 +9,66 @@ pub fn main() {
startest.run(startest.default_config())
}
pub fn parse_general_tests() {
describe("general parsing", [
it("mailto parsing", fn() {
uri.parse("mailto:Joe@example.com")
|> should.equal(Ok(
Uri(..empty, scheme: Some("mailto"), path: "Joe@example.com"),
))
uri.parse("mailto:Joe@example.com?hello#bye")
|> should.equal(Ok(
Uri(
..empty,
scheme: Some("mailto"),
path: "Joe@example.com",
query: Some("hello"),
fragment: Some("bye"),
),
))
}),
it("ai gen pass", fn() {
let _ = uri.parse("https://example.com") |> should.be_ok
let _ =
uri.parse("http://www.example.org/resource?id=123&lang=en")
|> should.be_ok
let _ =
uri.parse("ftp://ftp.example.net/pub/files/archive.tar.gz")
|> should.be_ok
let _ = uri.parse("mailto:user+alias@example.com") |> should.be_ok
let _ = uri.parse("urn:isbn:978-3-16-148410-0") |> should.be_ok
let _ =
uri.parse("ws://socket.example.com:8080/chat?room=42#section2")
|> should.be_ok
let _ =
uri.parse("https://sub.domain.co.uk/path/to/resource/") |> should.be_ok
let _ =
uri.parse("file:///C:/Windows/System32/drivers/etc/hosts")
|> should.be_ok
let _ =
uri.parse("git+ssh://git@example.com:2222/repo.git") |> should.be_ok
let _ =
uri.parse(
"https://xn--fsqu00a.xn--0zwm56d/%E8%B7%AF%E5%BE%84?%E6%9F%A5%E8%AF%A2=%E5%80%BC#%E7%89%87%E6%AE%B5",
)
|> should.be_ok
Nil
}),
it("ai gen fail", fn() {
let _ = uri.parse("ht!tp://example.com") |> should.be_error
let _ = uri.parse("http://exa mple.com") |> should.be_error
let _ = uri.parse("://missing-scheme.com") |> should.be_error
let _ = uri.parse("http://example.com:80a/") |> should.be_error
let _ = uri.parse("http://[2001:db8::1") |> should.be_error
let _ = uri.parse("http://example.com/%ZZ") |> should.be_error
let _ = uri.parse("http://example.com?%") |> should.be_error
let _ = uri.parse("`https://example.com/invalid") |> should.be_error
let _ = uri.parse("http://example.com?foo=bar%2") |> should.be_error
let _ = uri.parse("http://example.com:12345abc/") |> should.be_error
}),
])
}
pub fn parse_scheme_tests() {
describe("scheme parsing", [
it("simple parse", fn() {
@@ -1086,37 +1146,31 @@ pub fn equivalence_tests() {
])
}
const percent_codec_fixtures = [
#(" ", "%20"),
#(",", "%2C"),
#(";", "%3B"),
#(":", "%3A"),
#("!", "!"),
#("?", "%3F"),
#("'", "'"),
#("(", "("),
#(")", ")"),
#("[", "%5B"),
#("@", "%40"),
#("/", "%2F"),
#("\\", "%5C"),
const percent_encode_examples = [
#("", ""),
#("%", "%25"),
#("%%", "%25%25"),
#(" \r\n\t\u{B}\f", "%20%0D%0A%09%0B%0C"),
#(
"-_.~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"-_.~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
#("\u{0}", "%00"),
#("abc\u{00}def", "abc%00def"),
#("&", "%26"),
#("#", "%23"),
#("=", "%3D"),
#("~", "~"),
#("ñ", "%C3%B1"),
#("-", "-"),
#("_", "_"),
#(".", "."),
#("*", "*"),
#("+", "+"),
#("100% great+fun", "100%25%20great+fun"),
#(
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
"%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F%3A%3B%3C%3D%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~",
),
#("“Aha”", "%E2%80%9CAha%E2%80%9D"),
#("\u{201C}Aha\u{201D}", "%E2%80%9CAha%E2%80%9D"),
#("*+,=>/", "%2A%2B%2C%3D%3E%2F"),
]
pub fn percent_encode_tests() {
describe("percent encoding", [
it("encoding", fn() {
percent_codec_fixtures
percent_encode_examples
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_encode(a)
@@ -1125,7 +1179,7 @@ pub fn percent_encode_tests() {
Nil
}),
it("decoding", fn() {
percent_codec_fixtures
percent_encode_examples
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_decode(b)