15 Commits

Author SHA1 Message Date
027f94e666 build: Update dependencies and increase version 2025-09-14 22:16:51 +01:00
869c5cf06c perf: Don't parse for userinfo if not necessary
If the URI doesn't contain @ then there can't be a userinfo so
completely ignore this part of parsing to improve performance
2025-09-14 22:14:49 +01:00
67798d1dcf style: Removed unused Stop/Continue imports 2025-09-14 21:37:08 +01:00
6131aa01e7 perf: Improved dec_octet parsing
Removed the list folding method and reverted to a standard
try_parser/parse_this_then method as used in the rest of the parser
2025-09-14 21:28:30 +01:00
2ee6741308 refactor: Renamed some fns and removed duplicate fns 2025-09-14 19:40:32 +01:00
e5b5545bd1 perf: Rewrote parse_min_max to avoid folding 2025-09-14 18:47:05 +01:00
47da8071cf refactor: Moved the order of functions around to follow the ABNF doc 2025-09-14 17:51:31 +01:00
8b8d3e577e perf: Add tweak to parse known schema more quickly
http, https, and a few other urls can be assumed to be possible schemes.
If we check for these then we cut down on the character by character
parsing that would otherwise happen
2025-09-14 17:11:04 +01:00
4cad0c5bc3 refactor: Correctly name abempty parse fn 2025-09-14 17:09:37 +01:00
0e293fc85e refactor: Tweak port parsing 2025-09-14 17:09:04 +01:00
4d29a5de5a test: Modified scratch tests 2025-09-14 12:03:30 +01:00
1174a17c97 perf: Optimisations for parsing between options
Added a specific parse try function which takes the essence of
list.fold_until but makes it specific for fn(String)->Result(#(a,
String),Nil) parsers

???

??
2025-09-14 12:03:29 +01:00
428bd53002 refactor: Changed tuple to type for clarity 2025-09-14 11:49:59 +01:00
91bfe0285f test: Added benchmark tests 2025-09-14 11:49:59 +01:00
e13f80c483 build: github action update 2025-09-14 11:49:58 +01:00
10 changed files with 873 additions and 635 deletions

View File

@@ -14,10 +14,10 @@ jobs:
- uses: actions/checkout@v4
- uses: erlef/setup-beam@v1
with:
otp-version: "27.1.2"
otp-version: "28.0.4"
gleam-version: "1.12.0"
rebar3-version: "3"
# elixir-version: "1"
rebar3-version: "3.25.1"
elixir-version: "1.18.4"
- run: gleam deps download
- run: gleam test
- run: gleam format --check src test

View File

@@ -7,3 +7,8 @@
## v2.0.0
- Removed types.Uri. Now gluri uses the stdlib Uri type (and empty)
## v2.0.1
- Improved parsing performance significantly and reduced memory usage up to 50%
- Significantly improved IPV4 parsing performance

View File

@@ -1,5 +1,5 @@
name = "gluri"
version = "2.0.0"
version = "2.0.1"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.
@@ -20,3 +20,4 @@ splitter = ">= 1.1.0 and < 2.0.0"
[dev-dependencies]
gleeunit = ">= 1.0.0 and < 2.0.0"
startest = ">= 0.7.0 and < 1.0.0"
glychee = ">= 1.1.2 and < 2.0.0"

View File

@@ -3,8 +3,10 @@
packages = [
{ name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" },
{ name = "benchee", version = "1.4.0", build_tools = ["mix"], requirements = ["deep_merge", "statistex", "table"], otp_app = "benchee", source = "hex", outer_checksum = "299CD10DD8CE51C9EA3DDB74BB150F93D25E968F93E4C1FA31698A8E4FA5D715" },
{ name = "bigben", version = "1.0.1", build_tools = ["gleam"], requirements = ["birl", "gleam_erlang", "gleam_otp", "gleam_stdlib"], otp_app = "bigben", source = "hex", outer_checksum = "190E489610A80D76C48BACC75EB8314BD184FF0220AB0F251ABE760B993B91BB" },
{ name = "birl", version = "1.8.0", build_tools = ["gleam"], requirements = ["gleam_regexp", "gleam_stdlib", "ranger"], otp_app = "birl", source = "hex", outer_checksum = "2AC7BA26F998E3DFADDB657148BD5DDFE966958AD4D6D6957DD0D22E5B56C400" },
{ name = "deep_merge", version = "1.0.0", build_tools = ["mix"], requirements = [], otp_app = "deep_merge", source = "hex", outer_checksum = "CE708E5F094B9CD4E8F2BE4F00D2F4250C4095BE93F8CD6D018C753894885430" },
{ name = "exception", version = "2.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "exception", source = "hex", outer_checksum = "329D269D5C2A314F7364BD2711372B6F2C58FA6F39981572E5CA68624D291F8C" },
{ name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" },
{ name = "gleam_community_ansi", version = "1.4.3", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_regexp", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "8A62AE9CC6EA65BEA630D95016D6C07E4F9973565FA3D0DE68DC4200D8E0DD27" },
@@ -14,21 +16,24 @@ packages = [
{ name = "gleam_json", version = "3.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "874FA3C3BB6E22DD2BB111966BD40B3759E9094E05257899A7C08F5DE77EC049" },
{ name = "gleam_otp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "7987CBEBC8060B88F14575DEF546253F3116EBE2A5DA6FD82F38243FCE97C54B" },
{ name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" },
{ name = "gleam_stdlib", version = "0.63.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "5E216C7D5E8BE22359C9D7DAA2CFBD66039BC12565542F34CD033C5BB57071ED" },
{ name = "gleam_stdlib", version = "0.63.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "E1D5EC07638F606E48F0EA1556044DD805F2ACE9092A6F6AFBE4A0CC4DA21C2F" },
{ name = "gleam_time", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "DCDDC040CE97DA3D2A925CDBBA08D8A78681139745754A83998641C8A3F6587E" },
{ name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" },
{ name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" },
{ name = "glint", version = "1.2.1", build_tools = ["gleam"], requirements = ["gleam_community_ansi", "gleam_community_colour", "gleam_stdlib", "snag"], otp_app = "glint", source = "hex", outer_checksum = "2214C7CEFDE457CEE62140C3D4899B964E05236DA74E4243DFADF4AF29C382BB" },
{ name = "glychee", version = "1.1.2", build_tools = ["gleam"], requirements = ["benchee"], otp_app = "glychee", source = "hex", outer_checksum = "41784216C213F223095BB3FC3EDDB60CC537835B2340A868EA3931193F7F3824" },
{ name = "ranger", version = "1.4.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_yielder"], otp_app = "ranger", source = "hex", outer_checksum = "C8988E8F8CDBD3E7F4D8F2E663EF76490390899C2B2885A6432E942495B3E854" },
{ name = "simplifile", version = "2.3.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0A868DAC6063D9E983477981839810DC2E553285AB4588B87E3E9C96A7FB4CB4" },
{ name = "snag", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "snag", source = "hex", outer_checksum = "7E9F06390040EB5FAB392CE642771484136F2EC103A92AE11BA898C8167E6E17" },
{ name = "splitter", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "splitter", source = "hex", outer_checksum = "05564A381580395DCDEFF4F88A64B021E8DAFA6540AE99B4623962F52976AA9D" },
{ name = "startest", version = "0.7.0", build_tools = ["gleam"], requirements = ["argv", "bigben", "birl", "exception", "gleam_community_ansi", "gleam_erlang", "gleam_javascript", "gleam_regexp", "gleam_stdlib", "glint", "simplifile", "tom"], otp_app = "startest", source = "hex", outer_checksum = "71B9CB82C4B8779A4BD54C7151DF7D0B0F778D0DDE805B782B44EFA7BA8F50DA" },
{ name = "statistex", version = "1.1.0", build_tools = ["mix"], requirements = [], otp_app = "statistex", source = "hex", outer_checksum = "F5950EA26AD43246BA2CCE54324AC394A4E7408FDCF98B8E230F503A0CBA9CF5" },
{ name = "tom", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_time"], otp_app = "tom", source = "hex", outer_checksum = "74D0C5A3761F7A7D06994755D4D5AD854122EF8E9F9F76A3E7547606D8C77091" },
]
[requirements]
gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
glychee = { version = ">= 1.1.2 and < 2.0.0" }
splitter = { version = ">= 1.1.0 and < 2.0.0" }
startest = { version = ">= 0.7.0 and < 1.0.0" }

View File

@@ -3,7 +3,7 @@ import gleam/int
import gleam/list
import gleam/option.{Some}
import gleam/string
import gleam/uri.{type Uri, Uri}
import gleam/uri.{type Uri}
import gluri/internal/parser
import gluri/internal/utils

File diff suppressed because it is too large Load Diff

View File

@@ -7,17 +7,21 @@ import gleam/string
import gleam/uri.{type Uri, Uri}
import splitter.{type Splitter}
pub const scheme_port = [
#("http", 80),
#("https", 443),
#("ftp", 21),
#("ws", 80),
#("wss", 443),
type Scheme {
Scheme(name: String, port: Int)
}
const scheme_port = [
Scheme("http", 80),
Scheme("https", 443),
Scheme("ftp", 21),
Scheme("ws", 80),
Scheme("wss", 443),
]
pub fn get_port_for_scheme(scheme: String) -> Option(Int) {
list.find(scheme_port, fn(sp) { sp.0 == scheme })
|> result.map(fn(sp) { sp.1 })
list.find(scheme_port, fn(sp) { sp.name == scheme })
|> result.map(fn(sp) { sp.port })
|> option.from_result
}
@@ -82,6 +86,152 @@ fn merge_paths(base: Uri, relative: Uri) -> String {
}
}
pub fn try_parsers(
over list: List(fn(String) -> Result(#(a, String), Nil)),
against static_data: String,
) -> Result(#(a, String), Nil) {
case list {
[] -> Error(Nil)
[first, ..rest] ->
case first(static_data) {
Error(_) -> try_parsers(rest, static_data)
Ok(r) -> Ok(r)
}
}
}
pub fn parse_min_max(
str: f,
min: Int,
max: Int,
parse_fn: fn(f) -> Result(#(String, f), g),
) -> Result(#(String, f), Nil) {
do_parse_min_max(str, "", min, max, parse_fn)
}
pub fn do_parse_min_max(
str: d,
acc: String,
min: Int,
max: Int,
parse_fn: fn(d) -> Result(#(String, d), e),
) -> Result(#(String, d), Nil) {
case parse_fn(str) {
Error(_) -> {
case min > 0 {
True -> Error(Nil)
False -> Ok(#(acc, str))
}
}
Ok(#(l, rest)) -> {
case max {
1 -> Ok(#(acc <> l, rest))
_ -> do_parse_min_max(rest, acc <> l, min - 1, max - 1, parse_fn)
}
}
}
}
pub fn parse_optional(
to_parse str: String,
with opt_fn: fn(String) -> Result(#(String, String), Nil),
) -> #(String, String) {
case opt_fn(str) {
Error(Nil) -> #("", str)
Ok(r) -> r
}
}
pub fn parse_optional_result(
to_parse str: String,
with opt_fn: fn(String) -> Result(#(String, String), Nil),
) -> Result(#(String, String), Nil) {
parse_optional(str, opt_fn) |> Ok
}
pub fn parse_this_then(
to_parse str: String,
with parsers: List(fn(String) -> Result(#(String, String), Nil)),
) -> Result(#(String, String), Nil) {
do_parse_this_then(str, "", parsers)
}
fn do_parse_this_then(
to_parse str: String,
from initial: String,
with parsers: List(fn(String) -> Result(#(String, String), Nil)),
) -> Result(#(String, String), Nil) {
case parsers {
[] -> Ok(#(initial, str))
[head, ..tail] -> {
case head(str) {
Ok(#(res, rest)) -> do_parse_this_then(rest, initial <> res, tail)
Error(_) -> Error(Nil)
}
}
}
}
pub fn parse_multiple(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
) -> Result(#(String, String), Nil) {
case do_parse_multiple(str, to_run, "") {
Ok(#("", _)) | Error(Nil) -> Error(Nil)
Ok(#(r, rest)) -> Ok(#(r, rest))
}
}
fn do_parse_multiple(
to_parse str: String,
with to_run: fn(String) -> Result(#(String, String), Nil),
acc ret: String,
) -> Result(#(String, String), Nil) {
case str {
"" -> Ok(#(ret, str))
_ ->
case to_run(str) {
Ok(#(r, rest)) -> do_parse_multiple(rest, to_run, ret <> r)
Error(_) -> Ok(#(ret, str))
}
}
}
pub fn combine_uris(uris: List(Uri)) -> Uri {
list.fold(uris, Uri(None, None, None, None, "", None, None), fn(acc, uri) {
let acc = case uri {
Uri(Some(scheme), _, _, _, _, _, _) -> Uri(..acc, scheme: Some(scheme))
_ -> acc
}
let acc = case uri {
Uri(_, Some(userinfo), _, _, _, _, _) ->
Uri(..acc, userinfo: Some(userinfo))
_ -> acc
}
let acc = case uri {
Uri(_, _, Some(host), _, _, _, _) -> Uri(..acc, host: Some(host))
_ -> acc
}
let acc = case uri {
Uri(_, _, _, Some(port), _, _, _) -> Uri(..acc, port: Some(port))
_ -> acc
}
let acc = case uri {
Uri(_, _, _, _, path, _, _) if path != "" -> Uri(..acc, path: path)
_ -> acc
}
let acc = case uri {
Uri(_, _, _, _, _, Some(query), _) -> Uri(..acc, query: Some(query))
_ -> acc
}
case uri {
Uri(_, _, _, _, _, _, Some(fragment)) ->
Uri(..acc, fragment: Some(fragment))
_ -> acc
}
})
}
pub fn normalise(uri: Uri) -> Uri {
let percent_splitter = splitter.new(["%"])
let percent_normaliser = normalise_percent(percent_splitter, _)
@@ -248,6 +398,10 @@ pub fn parse_hex_digit(str) {
}
}
pub fn parse_hex_digits(str, min, max) {
parse_min_max(str, min, max, parse_hex_digit)
}
fn encoding_not_needed(i: Int) -> Bool {
// $-_.+!*'()
case i {

81
test/benchmark.gleam Normal file
View File

@@ -0,0 +1,81 @@
import gleam/uri as uri2
import gluri as uri
import gluri/internal/parser
import glychee/benchmark
import glychee/configuration
@target(erlang)
pub fn main() {
configuration.initialize()
configuration.set_pair(configuration.Warmup, 2)
configuration.set_pair(configuration.Parallel, 2)
parse_benchmark()
// reg_name_benchmark()
// ip_benchmark()
}
@target(erlang)
pub fn ip_benchmark() {
benchmark.run(
[
benchmark.Function("ip_benchmark", fn(data) {
fn() {
let _ = parser.parse_dec_octet(data)
Nil
}
}),
],
[
benchmark.Data("173", "173"),
benchmark.Data("5", "5"),
benchmark.Data("200", "200"),
benchmark.Data("255", "255"),
benchmark.Data("fail", "2b"),
],
)
}
@target(erlang)
pub fn reg_name_benchmark() {
benchmark.run(
[
benchmark.Function("reg_name_benchmark", fn(data) {
fn() {
let _ = parser.parse_reg_name(data)
Nil
}
}),
],
[
benchmark.Data("long", "github.com"),
],
)
}
@target(erlang)
pub fn parse_benchmark() {
benchmark.run(
[
benchmark.Function("parse_benchmark", fn(data) {
fn() {
let _ = uri.parse(data)
Nil
}
}),
benchmark.Function("stdlib_parse_benchmark", fn(data) {
fn() {
let _ = uri2.parse(data)
Nil
}
}),
],
[
benchmark.Data(
"long",
"https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}

View File

@@ -1,12 +1,20 @@
// import gleam/result
// import gleam/uri as uri2
import gleam/uri as uri2
// import splitter
// import types.{Uri}
import gluri as uri
pub fn main() {
uri.parse("http://my_host.com") |> echo
// uri.parse("https://192.255.36.4/") |> echo
// uri.parse(
// "https://github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
// )
// |> echo
let _ = uri.parse("/abc/def") |> echo
let _ = uri2.parse("/abc/def") |> echo
let _ = uri.parse("/abc/") |> echo
Nil
}

View File

@@ -90,5 +90,3 @@ RFC 3986 URI Generic Syntax January 2005
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
" # % < > [ \ ] ^ ` { | }