35 Commits

Author SHA1 Message Date
af619a54da docs: Updated changelog
Some checks failed
test / test (push) Has been cancelled
2025-10-27 19:19:06 +00:00
e51116c2b2 fix: Fix uri encoding/decoding and add new tests
Some checks failed
test / test (push) Has been cancelled
2025-10-27 19:13:37 +00:00
56e3682237 build: Update version & repository 2025-10-27 19:13:37 +00:00
7f631dc13c perf: Changed benchmarks titles 2025-10-27 19:13:37 +00:00
91807aac61 perf: Removed erlang/js specific parsing
Some checks failed
test / test (push) Has been cancelled
This currently seems to compile exactly the same in 1.12.0 and 1.13.0 so
therefore has a performance regression
2025-10-19 19:16:33 +01:00
7b7e689892 perf: Added benchmarks and changelog
Some checks failed
test / test (push) Has been cancelled
2025-10-03 11:15:59 +01:00
22d13bdf7d perf: Split parsers into erlang/js targets 2025-10-03 11:10:36 +01:00
3fc9a61afe build: Updated version 2025-10-03 10:59:46 +01:00
b9d1077425 perf: Revert back to pattern matching for ranges
May need to do more in depth checks and optimisations may need to be
removed if the core gleam compiler fixes the performance issues with
pattern matching + capture.
Possibly pattern matching may be better for smaller ranges but would
need to do benchmarks to find out what the cut-off point is.
2025-10-03 10:55:48 +01:00
3cd6d5d4af perf: Fix minor perf issue for JS 2025-09-22 12:55:09 +01:00
a00af69b56 build: Updated version 2025-09-22 11:53:13 +01:00
c6ee27fa7a docs: Updated changelog 2025-09-22 11:46:34 +01:00
5c4a444231 perf: Reworked ascii/digit parsing to speed up things 2025-09-22 11:45:20 +01:00
452117db63 test: Added some random uris for testing 2025-09-22 11:44:54 +01:00
5da4ea66b1 refactor: Rewrote bool.guard to standard pattern match 2025-09-18 18:22:34 +01:00
321e203778 build: Update erlang version for github actions 2025-09-17 18:24:19 +01:00
1ac5e05e1a build: Update stdlib dependency 2025-09-16 09:58:15 +01:00
cc110b414f docs: Updated changelog 2025-09-15 15:26:57 +01:00
246706d4fc test: Added tests for mailto 2025-09-15 15:03:36 +01:00
897124be27 perf: Tweak to userinfo parse to not repeat @ check 2025-09-15 14:39:39 +01:00
027f94e666 build: Update dependencies and increase version 2025-09-14 22:16:51 +01:00
869c5cf06c perf: Don't parse for userinfo if not necessary
If the URI doesn't contain @ then there can't be a userinfo so
completely ignore this part of parsing to improve performance
2025-09-14 22:14:49 +01:00
67798d1dcf style: Removed unused Stop/Continue imports 2025-09-14 21:37:08 +01:00
6131aa01e7 perf: Improved dec_octet parsing
Removed the list folding method and reverted to a standard
try_parser/parse_this_then method as used in the rest of the parser
2025-09-14 21:28:30 +01:00
2ee6741308 refactor: Renamed some fns and removed duplicate fns 2025-09-14 19:40:32 +01:00
e5b5545bd1 perf: Rewrote parse_min_max to avoid folding 2025-09-14 18:47:05 +01:00
47da8071cf refactor: Moved the order of functions around to follow the ABNF doc 2025-09-14 17:51:31 +01:00
8b8d3e577e perf: Add tweak to parse known schema more quickly
http, https, and a few other urls can be assumed to be possible schemes.
If we check for these then we cut down on the character by character
parsing that would otherwise happen
2025-09-14 17:11:04 +01:00
4cad0c5bc3 refactor: Correctly name abempty parse fn 2025-09-14 17:09:37 +01:00
0e293fc85e refactor: Tweak port parsing 2025-09-14 17:09:04 +01:00
4d29a5de5a test: Modified scratch tests 2025-09-14 12:03:30 +01:00
1174a17c97 perf: Optimisations for parsing between options
Added a specific parse try function which takes the essence of
list.fold_until but makes it specific for fn(String)->Result(#(a,
String),Nil) parsers

???

??
2025-09-14 12:03:29 +01:00
428bd53002 refactor: Changed tuple to type for clarity 2025-09-14 11:49:59 +01:00
91bfe0285f test: Added benchmark tests 2025-09-14 11:49:59 +01:00
e13f80c483 build: github action update 2025-09-14 11:49:58 +01:00
15 changed files with 1378 additions and 744 deletions

View File

@@ -14,10 +14,10 @@ jobs:
- uses: actions/checkout@v4
- uses: erlef/setup-beam@v1
with:
otp-version: "27.1.2"
otp-version: "28.1"
gleam-version: "1.12.0"
rebar3-version: "3"
# elixir-version: "1"
rebar3-version: "3.25.1"
elixir-version: "1.18.4"
- run: gleam deps download
- run: gleam test
- run: gleam format --check src test

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
*.ez
/build
erl_crash.dump
node_modules

View File

@@ -7,3 +7,23 @@
## v2.0.0
- Removed types.Uri. Now gluri uses the stdlib Uri type (and empty)
## v2.0.1
- Improved parsing performance significantly and reduced memory usage up to 50%
- Significantly improved IPV4 parsing performance
## v2.0.2
- Minor performance improvement for uris with userinfo
- More performance improvements for ascii/digit parsing
## 2.0.3
- Minor performance improvement for erlang
- Major performance improvement for js
## 2.0.4
- Reverted some optimisations as they are unnecessary for Gleam v1.14.0+
- Fix uri encoding/decoding (I think)

View File

@@ -1,12 +1,12 @@
name = "gluri"
version = "2.0.0"
version = "2.0.4"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.
#
description = "Uri (RFC 3986) library for Gleam"
licences = ["Apache-2.0"]
repository = { type = "github", user = "pendletong", repo = "uri" }
repository = { type = "gitea", host = "git.pendleton.ie", user = "pendletong", repo = "uri" }
links = [{ title = "RFC 3986", href = "https://www.ietf.org/rfc/rfc3986.txt" }]
#
# For a full reference of all the available options, you can have a look at
@@ -20,3 +20,4 @@ splitter = ">= 1.1.0 and < 2.0.0"
[dev-dependencies]
gleeunit = ">= 1.0.0 and < 2.0.0"
startest = ">= 0.7.0 and < 1.0.0"
glychee = ">= 1.1.2 and < 2.0.0"

View File

@@ -3,8 +3,10 @@
packages = [
{ name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" },
{ name = "benchee", version = "1.5.0", build_tools = ["mix"], requirements = ["deep_merge", "statistex", "table"], otp_app = "benchee", source = "hex", outer_checksum = "5B075393AEA81B8AE74EADD1C28B1D87E8A63696C649D8293DB7C4DF3EB67535" },
{ name = "bigben", version = "1.0.1", build_tools = ["gleam"], requirements = ["birl", "gleam_erlang", "gleam_otp", "gleam_stdlib"], otp_app = "bigben", source = "hex", outer_checksum = "190E489610A80D76C48BACC75EB8314BD184FF0220AB0F251ABE760B993B91BB" },
{ name = "birl", version = "1.8.0", build_tools = ["gleam"], requirements = ["gleam_regexp", "gleam_stdlib", "ranger"], otp_app = "birl", source = "hex", outer_checksum = "2AC7BA26F998E3DFADDB657148BD5DDFE966958AD4D6D6957DD0D22E5B56C400" },
{ name = "deep_merge", version = "1.0.0", build_tools = ["mix"], requirements = [], otp_app = "deep_merge", source = "hex", outer_checksum = "CE708E5F094B9CD4E8F2BE4F00D2F4250C4095BE93F8CD6D018C753894885430" },
{ name = "exception", version = "2.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "exception", source = "hex", outer_checksum = "329D269D5C2A314F7364BD2711372B6F2C58FA6F39981572E5CA68624D291F8C" },
{ name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" },
{ name = "gleam_community_ansi", version = "1.4.3", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_regexp", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "8A62AE9CC6EA65BEA630D95016D6C07E4F9973565FA3D0DE68DC4200D8E0DD27" },
@@ -12,23 +14,26 @@ packages = [
{ name = "gleam_erlang", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "1124AD3AA21143E5AF0FC5CF3D9529F6DB8CA03E43A55711B60B6B7B3874375C" },
{ name = "gleam_javascript", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_javascript", source = "hex", outer_checksum = "EF6C77A506F026C6FB37941889477CD5E4234FCD4337FF0E9384E297CB8F97EB" },
{ name = "gleam_json", version = "3.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "874FA3C3BB6E22DD2BB111966BD40B3759E9094E05257899A7C08F5DE77EC049" },
{ name = "gleam_otp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "7987CBEBC8060B88F14575DEF546253F3116EBE2A5DA6FD82F38243FCE97C54B" },
{ name = "gleam_otp", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "BA6A294E295E428EC1562DC1C11EA7530DCB981E8359134BEABC8493B7B2258E" },
{ name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" },
{ name = "gleam_stdlib", version = "0.63.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "5E216C7D5E8BE22359C9D7DAA2CFBD66039BC12565542F34CD033C5BB57071ED" },
{ name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" },
{ name = "gleam_time", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "DCDDC040CE97DA3D2A925CDBBA08D8A78681139745754A83998641C8A3F6587E" },
{ name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" },
{ name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" },
{ name = "glint", version = "1.2.1", build_tools = ["gleam"], requirements = ["gleam_community_ansi", "gleam_community_colour", "gleam_stdlib", "snag"], otp_app = "glint", source = "hex", outer_checksum = "2214C7CEFDE457CEE62140C3D4899B964E05236DA74E4243DFADF4AF29C382BB" },
{ name = "glychee", version = "1.1.2", build_tools = ["gleam"], requirements = ["benchee"], otp_app = "glychee", source = "hex", outer_checksum = "41784216C213F223095BB3FC3EDDB60CC537835B2340A868EA3931193F7F3824" },
{ name = "ranger", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_yielder"], otp_app = "ranger", source = "hex", outer_checksum = "C8988E8F8CDBD3E7F4D8F2E663EF76490390899C2B2885A6432E942495B3E854" },
{ name = "simplifile", version = "2.3.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0A868DAC6063D9E983477981839810DC2E553285AB4588B87E3E9C96A7FB4CB4" },
{ name = "snag", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "snag", source = "hex", outer_checksum = "7E9F06390040EB5FAB392CE642771484136F2EC103A92AE11BA898C8167E6E17" },
{ name = "splitter", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "splitter", source = "hex", outer_checksum = "05564A381580395DCDEFF4F88A64B021E8DAFA6540AE99B4623962F52976AA9D" },
{ name = "startest", version = "0.7.0", build_tools = ["gleam"], requirements = ["argv", "bigben", "birl", "exception", "gleam_community_ansi", "gleam_erlang", "gleam_javascript", "gleam_regexp", "gleam_stdlib", "glint", "simplifile", "tom"], otp_app = "startest", source = "hex", outer_checksum = "71B9CB82C4B8779A4BD54C7151DF7D0B0F778D0DDE805B782B44EFA7BA8F50DA" },
{ name = "statistex", version = "1.1.0", build_tools = ["mix"], requirements = [], otp_app = "statistex", source = "hex", outer_checksum = "F5950EA26AD43246BA2CCE54324AC394A4E7408FDCF98B8E230F503A0CBA9CF5" },
{ name = "tom", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_time"], otp_app = "tom", source = "hex", outer_checksum = "74D0C5A3761F7A7D06994755D4D5AD854122EF8E9F9F76A3E7547606D8C77091" },
]
[requirements]
gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
glychee = { version = ">= 1.1.2 and < 2.0.0" }
splitter = { version = ">= 1.1.0 and < 2.0.0" }
startest = { version = ">= 0.7.0 and < 1.0.0" }

18
package-lock.json generated Normal file
View File

@@ -0,0 +1,18 @@
{
"name": "uri",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"mitata": "^1.0.34"
}
},
"node_modules/mitata": {
"version": "1.0.34",
"resolved": "https://registry.npmjs.org/mitata/-/mitata-1.0.34.tgz",
"integrity": "sha512-Mc3zrtNBKIMeHSCQ0XqRLo1vbdIx1wvFV9c8NJAiyho6AjNfMY8bVhbS12bwciUdd1t4rj8099CH3N3NFahaUA==",
"license": "MIT"
}
}
}

6
package.json Normal file
View File

@@ -0,0 +1,6 @@
{
"type": "module",
"dependencies": {
"mitata": "^1.0.34"
}
}

View File

@@ -3,7 +3,7 @@ import gleam/int
import gleam/list
import gleam/option.{Some}
import gleam/string
import gleam/uri.{type Uri, Uri}
import gleam/uri.{type Uri}
import gluri/internal/parser
import gluri/internal/utils

File diff suppressed because it is too large Load Diff

View File

@@ -7,17 +7,21 @@ import gleam/string
import gleam/uri.{type Uri, Uri}
import splitter.{type Splitter}
pub const scheme_port = [
#("http", 80),
#("https", 443),
#("ftp", 21),
#("ws", 80),
#("wss", 443),
type Scheme {
Scheme(name: String, port: Int)
}
const scheme_port = [
Scheme("http", 80),
Scheme("https", 443),
Scheme("ftp", 21),
Scheme("ws", 80),
Scheme("wss", 443),
]
pub fn get_port_for_scheme(scheme: String) -> Option(Int) {
list.find(scheme_port, fn(sp) { sp.0 == scheme })
|> result.map(fn(sp) { sp.1 })
list.find(scheme_port, fn(sp) { sp.name == scheme })
|> result.map(fn(sp) { sp.port })
|> option.from_result
}
@@ -82,6 +86,152 @@ fn merge_paths(base: Uri, relative: Uri) -> String {
}
}
pub fn try_parsers(
over list: List(fn(String) -> Result(#(a, String), Nil)),
against static_data: String,
) -> Result(#(a, String), Nil) {
case list {
[] -> Error(Nil)
[first, ..rest] ->
case first(static_data) {
Error(_) -> try_parsers(rest, static_data)
Ok(r) -> Ok(r)
}
}
}
pub fn parse_min_max(
str: f,
min: Int,
max: Int,
parse_fn: fn(f) -> Result(#(String, f), g),
) -> Result(#(String, f), Nil) {
do_parse_min_max(str, "", min, max, parse_fn)
}
fn do_parse_min_max(
str: d,
acc: String,
min: Int,
max: Int,
parse_fn: fn(d) -> Result(#(String, d), e),
) -> Result(#(String, d), Nil) {
case parse_fn(str) {
Error(_) -> {
case min > 0 {
True -> Error(Nil)
False -> Ok(#(acc, str))
}
}
Ok(#(l, rest)) -> {
case max {
1 -> Ok(#(acc <> l, rest))
_ -> do_parse_min_max(rest, acc <> l, min - 1, max - 1, parse_fn)
}
}
}
}
pub fn parse_optional(
to_parse str: String,
with opt_fn: fn(String) -> Result(#(String, String), Nil),
) -> #(String, String) {
case opt_fn(str) {
Error(Nil) -> #("", str)
Ok(r) -> r
}
}
pub fn parse_optional_result(
to_parse str: String,
with opt_fn: fn(String) -> Result(#(String, String), Nil),
) -> Result(#(String, String), Nil) {
parse_optional(str, opt_fn) |> Ok
}
pub fn parse_this_then(
to_parse str: String,
with parsers: List(fn(String) -> Result(#(String, String), Nil)),
) -> Result(#(String, String), Nil) {
do_parse_this_then(str, "", parsers)
}
fn do_parse_this_then(
to_parse str: String,
from initial: String,
with parsers: List(fn(String) -> Result(#(String, String), Nil)),
) -> Result(#(String, String), Nil) {
case parsers {
[] -> Ok(#(initial, str))
[head, ..tail] -> {
case head(str) {
Ok(#(res, rest)) -> do_parse_this_then(rest, initial <> res, tail)
Error(_) -> Error(Nil)
}
}
}
}
pub fn parse_multiple(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
) -> Result(#(String, String), Nil) {
case do_parse_multiple(str, to_run, "") {
Ok(#("", _)) | Error(Nil) -> Error(Nil)
Ok(#(r, rest)) -> Ok(#(r, rest))
}
}
fn do_parse_multiple(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
acc ret: String,
) -> Result(#(String, String), Nil) {
case str {
"" -> Ok(#(ret, str))
_ ->
case to_run(str) {
Ok(#(r, rest)) -> do_parse_multiple(rest, to_run, ret <> r)
Error(_) -> Ok(#(ret, str))
}
}
}
pub fn combine_uris(uris: List(Uri)) -> Uri {
list.fold(uris, Uri(None, None, None, None, "", None, None), fn(acc, uri) {
let acc = case uri {
Uri(Some(scheme), _, _, _, _, _, _) -> Uri(..acc, scheme: Some(scheme))
_ -> acc
}
let acc = case uri {
Uri(_, Some(userinfo), _, _, _, _, _) ->
Uri(..acc, userinfo: Some(userinfo))
_ -> acc
}
let acc = case uri {
Uri(_, _, Some(host), _, _, _, _) -> Uri(..acc, host: Some(host))
_ -> acc
}
let acc = case uri {
Uri(_, _, _, Some(port), _, _, _) -> Uri(..acc, port: Some(port))
_ -> acc
}
let acc = case uri {
Uri(_, _, _, _, path, _, _) if path != "" -> Uri(..acc, path: path)
_ -> acc
}
let acc = case uri {
Uri(_, _, _, _, _, Some(query), _) -> Uri(..acc, query: Some(query))
_ -> acc
}
case uri {
Uri(_, _, _, _, _, _, Some(fragment)) ->
Uri(..acc, fragment: Some(fragment))
_ -> acc
}
})
}
pub fn normalise(uri: Uri) -> Uri {
let percent_splitter = splitter.new(["%"])
let percent_normaliser = normalise_percent(percent_splitter, _)
@@ -220,40 +370,37 @@ fn unescape_percent(str: String) -> String {
}
}
pub fn parse_hex_digit(str) {
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as l <> rest
| "1" as l <> rest
| "2" as l <> rest
| "3" as l <> rest
| "4" as l <> rest
| "5" as l <> rest
| "6" as l <> rest
| "7" as l <> rest
| "8" as l <> rest
| "9" as l <> rest
| "a" as l <> rest
| "b" as l <> rest
| "c" as l <> rest
| "d" as l <> rest
| "e" as l <> rest
| "f" as l <> rest
| "A" as l <> rest
| "B" as l <> rest
| "C" as l <> rest
| "D" as l <> rest
| "E" as l <> rest
| "F" as l <> rest -> Ok(#(l, rest))
"0" as char <> tail
| "1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail
| "a" as char <> tail
| "b" as char <> tail
| "c" as char <> tail
| "d" as char <> tail
| "e" as char <> tail
| "f" as char <> tail
| "A" as char <> tail
| "B" as char <> tail
| "C" as char <> tail
| "D" as char <> tail
| "E" as char <> tail
| "F" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
fn encoding_not_needed(i: Int) -> Bool {
// $-_.+!*'()
case i {
36 | 45 | 95 | 46 | 43 | 33 | 42 | 39 | 40 | 41 -> True
_ -> False
}
pub fn parse_hex_digits(str, min, max) {
parse_min_max(str, min, max, parse_hex_digit)
}
fn is_unreserved_char(i: Int) -> Bool {
@@ -505,13 +652,13 @@ pub fn do_percent_encode(str: String) -> String {
fn encode_codepoint(codepoint: Int) -> String {
case codepoint <= 127 {
True -> {
case is_unreserved_char(codepoint) || encoding_not_needed(codepoint) {
case is_unreserved_char(codepoint) {
True -> {
let assert Ok(cpnt) = string.utf_codepoint(codepoint)
string.from_utf_codepoints([cpnt])
}
False -> {
"%" <> int.to_base16(codepoint)
"%" <> string.pad_start(int.to_base16(codepoint), 2, "0")
}
}
}

363
test/benchmark.gleam Normal file
View File

@@ -0,0 +1,363 @@
import gleam/string
import gleam/uri as uri2
import gluri as uri
import glychee/benchmark
import glychee/configuration
@target(erlang)
pub fn main() {
configuration.initialize()
configuration.set_pair(configuration.Warmup, 2)
configuration.set_pair(configuration.Parallel, 2)
// pop_benchmark()
parse_benchmark()
// reg_name_benchmark()
// ip_benchmark()
}
// @target(erlang)
// pub fn ip_benchmark() {
// benchmark.run(
// [
// benchmark.Function("ip_benchmark", fn(data) {
// fn() {
// let _ = parser.parse_dec_octet(data)
// Nil
// }
// }),
// ],
// [
// benchmark.Data("173", "173"),
// benchmark.Data("5", "5"),
// benchmark.Data("200", "200"),
// benchmark.Data("255", "255"),
// benchmark.Data("fail", "2b"),
// ],
// )
// }
// @target(erlang)
// pub fn reg_name_benchmark() {
// benchmark.run(
// [
// benchmark.Function("reg_name_benchmark", fn(data) {
// fn() {
// let _ = parser.parse_reg_name(data)
// Nil
// }
// }),
// ],
// [
// benchmark.Data("long", "github.com"),
// ],
// )
// }
@target(erlang)
pub fn parse_benchmark() {
benchmark.run(
[
benchmark.Function("parse_benchmark", fn(data) {
fn() {
let _ = uri.parse(data)
Nil
}
}),
benchmark.Function("stdlib_parse_benchmark", fn(data) {
fn() {
let _ = uri2.parse(data)
Nil
}
}),
],
[
benchmark.Data(
"long",
"https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
benchmark.Data(
"with user",
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}
@target(erlang)
pub fn pop_benchmark() {
benchmark.run(
[
benchmark.Function("pop with range", fn(data) { fn() { pop(data, "") } }),
benchmark.Function("pop check char", fn(data) { fn() { pop4(data, "") } }),
benchmark.Function("pop check result", fn(data) {
fn() { pop5(data, "") }
}),
benchmark.Function("letter as var <> tail", fn(data) {
fn() { pop2(data, "") }
}),
benchmark.Function("letter <> tail", fn(data) { fn() { pop3(data, "") } }),
],
[
// benchmark.Data("long", "abcdefghijklmnopqrstuvwxyz"),
benchmark.Data(
"with user",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
// benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}
pub fn pop(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x41 && i <= 0x5A -> pop(tail, char)
_ if i >= 0x61 && i <= 0x7A -> pop(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop2(input, _) {
case input {
"a" as j <> tail
| "b" as j <> tail
| "c" as j <> tail
| "d" as j <> tail
| "e" as j <> tail
| "f" as j <> tail
| "g" as j <> tail
| "h" as j <> tail
| "i" as j <> tail
| "j" as j <> tail
| "k" as j <> tail
| "l" as j <> tail
| "m" as j <> tail
| "n" as j <> tail
| "o" as j <> tail
| "p" as j <> tail
| "q" as j <> tail
| "r" as j <> tail
| "s" as j <> tail
| "t" as j <> tail
| "u" as j <> tail
| "v" as j <> tail
| "w" as j <> tail
| "x" as j <> tail
| "y" as j <> tail
| "z" as j <> tail
| "A" as j <> tail
| "B" as j <> tail
| "C" as j <> tail
| "D" as j <> tail
| "E" as j <> tail
| "F" as j <> tail
| "G" as j <> tail
| "H" as j <> tail
| "I" as j <> tail
| "J" as j <> tail
| "K" as j <> tail
| "L" as j <> tail
| "M" as j <> tail
| "N" as j <> tail
| "O" as j <> tail
| "P" as j <> tail
| "Q" as j <> tail
| "R" as j <> tail
| "S" as j <> tail
| "T" as j <> tail
| "U" as j <> tail
| "V" as j <> tail
| "W" as j <> tail
| "X" as j <> tail
| "Y" as j <> tail
| "Z" as j <> tail -> pop2(tail, j)
_ -> Nil
}
}
pub fn pop3(input, _) {
case input {
"a" <> tail
| "b" <> tail
| "c" <> tail
| "d" <> tail
| "e" <> tail
| "f" <> tail
| "g" <> tail
| "h" <> tail
| "i" <> tail
| "j" <> tail
| "k" <> tail
| "l" <> tail
| "m" <> tail
| "n" <> tail
| "o" <> tail
| "p" <> tail
| "q" <> tail
| "r" <> tail
| "s" <> tail
| "t" <> tail
| "u" <> tail
| "v" <> tail
| "w" <> tail
| "x" <> tail
| "y" <> tail
| "z" <> tail
| "A" <> tail
| "B" <> tail
| "C" <> tail
| "D" <> tail
| "E" <> tail
| "F" <> tail
| "G" <> tail
| "H" <> tail
| "I" <> tail
| "J" <> tail
| "K" <> tail
| "L" <> tail
| "M" <> tail
| "N" <> tail
| "O" <> tail
| "P" <> tail
| "Q" <> tail
| "R" <> tail
| "S" <> tail
| "T" <> tail
| "U" <> tail
| "V" <> tail
| "W" <> tail
| "X" <> tail
| "Y" <> tail
| "Z" <> tail -> pop3(tail, "")
_ -> Nil
}
}
pub fn pop4(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
case char {
"a"
| "b"
| "c"
| "d"
| "e"
| "f"
| "g"
| "h"
| "i"
| "j"
| "k"
| "l"
| "m"
| "n"
| "o"
| "p"
| "q"
| "r"
| "s"
| "t"
| "u"
| "v"
| "w"
| "x"
| "y"
| "z"
| "A"
| "B"
| "C"
| "D"
| "E"
| "F"
| "G"
| "H"
| "I"
| "J"
| "K"
| "L"
| "M"
| "N"
| "O"
| "P"
| "Q"
| "R"
| "S"
| "T"
| "U"
| "V"
| "W"
| "X"
| "Y"
| "Z" -> pop4(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop5(input, _) {
case string.pop_grapheme(input) {
Ok(#("a" as char, tail))
| Ok(#("b" as char, tail))
| Ok(#("c" as char, tail))
| Ok(#("d" as char, tail))
| Ok(#("e" as char, tail))
| Ok(#("f" as char, tail))
| Ok(#("g" as char, tail))
| Ok(#("h" as char, tail))
| Ok(#("i" as char, tail))
| Ok(#("j" as char, tail))
| Ok(#("k" as char, tail))
| Ok(#("l" as char, tail))
| Ok(#("m" as char, tail))
| Ok(#("n" as char, tail))
| Ok(#("o" as char, tail))
| Ok(#("p" as char, tail))
| Ok(#("q" as char, tail))
| Ok(#("r" as char, tail))
| Ok(#("s" as char, tail))
| Ok(#("t" as char, tail))
| Ok(#("u" as char, tail))
| Ok(#("v" as char, tail))
| Ok(#("w" as char, tail))
| Ok(#("x" as char, tail))
| Ok(#("y" as char, tail))
| Ok(#("z" as char, tail))
| Ok(#("A" as char, tail))
| Ok(#("B" as char, tail))
| Ok(#("C" as char, tail))
| Ok(#("D" as char, tail))
| Ok(#("E" as char, tail))
| Ok(#("F" as char, tail))
| Ok(#("G" as char, tail))
| Ok(#("H" as char, tail))
| Ok(#("I" as char, tail))
| Ok(#("J" as char, tail))
| Ok(#("K" as char, tail))
| Ok(#("L" as char, tail))
| Ok(#("M" as char, tail))
| Ok(#("N" as char, tail))
| Ok(#("O" as char, tail))
| Ok(#("P" as char, tail))
| Ok(#("Q" as char, tail))
| Ok(#("R" as char, tail))
| Ok(#("S" as char, tail))
| Ok(#("T" as char, tail))
| Ok(#("U" as char, tail))
| Ok(#("V" as char, tail))
| Ok(#("W" as char, tail))
| Ok(#("X" as char, tail))
| Ok(#("Y" as char, tail))
| Ok(#("Z" as char, tail)) -> pop4(tail, char)
_ -> Nil
}
}

16
test/benchmark.js Normal file
View File

@@ -0,0 +1,16 @@
import { run, bench, boxplot, summary } from "mitata";
import { parse } from "../build/dev/javascript/gluri/gluri.mjs";
import { parse as parse2 } from "../build/dev/javascript/gleam_stdlib/gleam/uri.mjs";
bench("parse", () =>
parse(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
bench("parse2", () =>
parse2(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
await run();

View File

@@ -9,6 +9,66 @@ pub fn main() {
startest.run(startest.default_config())
}
pub fn parse_general_tests() {
describe("general parsing", [
it("mailto parsing", fn() {
uri.parse("mailto:Joe@example.com")
|> should.equal(Ok(
Uri(..empty, scheme: Some("mailto"), path: "Joe@example.com"),
))
uri.parse("mailto:Joe@example.com?hello#bye")
|> should.equal(Ok(
Uri(
..empty,
scheme: Some("mailto"),
path: "Joe@example.com",
query: Some("hello"),
fragment: Some("bye"),
),
))
}),
it("ai gen pass", fn() {
let _ = uri.parse("https://example.com") |> should.be_ok
let _ =
uri.parse("http://www.example.org/resource?id=123&lang=en")
|> should.be_ok
let _ =
uri.parse("ftp://ftp.example.net/pub/files/archive.tar.gz")
|> should.be_ok
let _ = uri.parse("mailto:user+alias@example.com") |> should.be_ok
let _ = uri.parse("urn:isbn:978-3-16-148410-0") |> should.be_ok
let _ =
uri.parse("ws://socket.example.com:8080/chat?room=42#section2")
|> should.be_ok
let _ =
uri.parse("https://sub.domain.co.uk/path/to/resource/") |> should.be_ok
let _ =
uri.parse("file:///C:/Windows/System32/drivers/etc/hosts")
|> should.be_ok
let _ =
uri.parse("git+ssh://git@example.com:2222/repo.git") |> should.be_ok
let _ =
uri.parse(
"https://xn--fsqu00a.xn--0zwm56d/%E8%B7%AF%E5%BE%84?%E6%9F%A5%E8%AF%A2=%E5%80%BC#%E7%89%87%E6%AE%B5",
)
|> should.be_ok
Nil
}),
it("ai gen fail", fn() {
let _ = uri.parse("ht!tp://example.com") |> should.be_error
let _ = uri.parse("http://exa mple.com") |> should.be_error
let _ = uri.parse("://missing-scheme.com") |> should.be_error
let _ = uri.parse("http://example.com:80a/") |> should.be_error
let _ = uri.parse("http://[2001:db8::1") |> should.be_error
let _ = uri.parse("http://example.com/%ZZ") |> should.be_error
let _ = uri.parse("http://example.com?%") |> should.be_error
let _ = uri.parse("`https://example.com/invalid") |> should.be_error
let _ = uri.parse("http://example.com?foo=bar%2") |> should.be_error
let _ = uri.parse("http://example.com:12345abc/") |> should.be_error
}),
])
}
pub fn parse_scheme_tests() {
describe("scheme parsing", [
it("simple parse", fn() {
@@ -1086,37 +1146,31 @@ pub fn equivalence_tests() {
])
}
const percent_codec_fixtures = [
#(" ", "%20"),
#(",", "%2C"),
#(";", "%3B"),
#(":", "%3A"),
#("!", "!"),
#("?", "%3F"),
#("'", "'"),
#("(", "("),
#(")", ")"),
#("[", "%5B"),
#("@", "%40"),
#("/", "%2F"),
#("\\", "%5C"),
const percent_encode_examples = [
#("", ""),
#("%", "%25"),
#("%%", "%25%25"),
#(" \r\n\t\u{B}\f", "%20%0D%0A%09%0B%0C"),
#(
"-_.~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"-_.~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
#("\u{0}", "%00"),
#("abc\u{00}def", "abc%00def"),
#("&", "%26"),
#("#", "%23"),
#("=", "%3D"),
#("~", "~"),
#("ñ", "%C3%B1"),
#("-", "-"),
#("_", "_"),
#(".", "."),
#("*", "*"),
#("+", "+"),
#("100% great+fun", "100%25%20great+fun"),
#(
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
"%21%22%23%24%25%26%27%28%29%2A%2B%2C-.%2F%3A%3B%3C%3D%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~",
),
#("“Aha”", "%E2%80%9CAha%E2%80%9D"),
#("\u{201C}Aha\u{201D}", "%E2%80%9CAha%E2%80%9D"),
#("*+,=>/", "%2A%2B%2C%3D%3E%2F"),
]
pub fn percent_encode_tests() {
describe("percent encoding", [
it("encoding", fn() {
percent_codec_fixtures
percent_encode_examples
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_encode(a)
@@ -1125,7 +1179,7 @@ pub fn percent_encode_tests() {
Nil
}),
it("decoding", fn() {
percent_codec_fixtures
percent_encode_examples
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_decode(b)

View File

@@ -1,12 +1,20 @@
// import gleam/result
// import gleam/uri as uri2
import gleam/uri as uri2
// import splitter
// import types.{Uri}
import gluri as uri
pub fn main() {
uri.parse("http://my_host.com") |> echo
// uri.parse("https://192.255.36.4/") |> echo
// uri.parse(
// "https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
// )
// |> echo
let _ = uri.parse("/abc/def") |> echo
let _ = uri2.parse("/abc/def") |> echo
let _ = uri.parse("/abc/") |> echo
Nil
}

View File

@@ -90,5 +90,3 @@ RFC 3986 URI Generic Syntax January 2005
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
" # % < > [ \ ] ^ ` { | }