perf: Optimisations for parsing between options

Added a specific parse try function which takes the essence of
list.fold_until but makes it specific for fn(String)->Result(#(a,
String),Nil) parsers

???

??
This commit is contained in:
2025-09-14 11:44:08 +01:00
parent 428bd53002
commit 1174a17c97
4 changed files with 108 additions and 110 deletions

View File

@@ -7,3 +7,7 @@
## v2.0.0 ## v2.0.0
- Removed types.Uri. Now gluri uses the stdlib Uri type (and empty) - Removed types.Uri. Now gluri uses the stdlib Uri type (and empty)
## v2.0.1
- Improved parsing performance slightly and reduced memory usage up to 50%

View File

@@ -3,7 +3,7 @@ import gleam/int
import gleam/list import gleam/list
import gleam/option.{Some} import gleam/option.{Some}
import gleam/string import gleam/string
import gleam/uri.{type Uri, Uri} import gleam/uri.{type Uri}
import gluri/internal/parser import gluri/internal/parser
import gluri/internal/utils import gluri/internal/utils

View File

@@ -40,7 +40,8 @@ pub fn parse(uri: String) -> Result(Uri, Nil) {
fn parse_query(str: String) -> Result(#(Uri, String), Nil) { fn parse_query(str: String) -> Result(#(Uri, String), Nil) {
case str { case str {
"?" <> rest -> { "?" <> rest -> {
let #(query, rest) = get_multiple_optional(parse_query_fragment, rest) let #(query, rest) =
utils.get_multiple_optional(parse_query_fragment, rest)
Ok(#(Uri(..empty, query: Some(query)), rest)) Ok(#(Uri(..empty, query: Some(query)), rest))
} }
_ -> Ok(#(empty, str)) _ -> Ok(#(empty, str))
@@ -50,7 +51,8 @@ fn parse_query(str: String) -> Result(#(Uri, String), Nil) {
fn parse_fragment(str: String) -> Result(#(Uri, String), Nil) { fn parse_fragment(str: String) -> Result(#(Uri, String), Nil) {
case str { case str {
"#" <> rest -> { "#" <> rest -> {
let #(fragment, rest) = get_multiple_optional(parse_query_fragment, rest) let #(fragment, rest) =
utils.get_multiple_optional(parse_query_fragment, rest)
Ok(#(Uri(..empty, fragment: Some(fragment)), rest)) Ok(#(Uri(..empty, fragment: Some(fragment)), rest))
} }
_ -> Ok(#(empty, str)) _ -> Ok(#(empty, str))
@@ -58,29 +60,27 @@ fn parse_fragment(str: String) -> Result(#(Uri, String), Nil) {
} }
fn parse_hier_part(str: String) -> Result(#(Uri, String), Nil) { fn parse_hier_part(str: String) -> Result(#(Uri, String), Nil) {
list.fold_until( utils.try_parsers(
[parse_authority, parse_absolute, parse_rootless, parse_empty], [parse_authority, parse_absolute, parse_rootless, parse_empty],
Error(Nil), str,
get_parser_fn(str),
) )
} }
fn parse_relative_part(str: String) -> Result(#(Uri, String), Nil) { fn parse_relative_part(str: String) -> Result(#(Uri, String), Nil) {
list.fold_until( utils.try_parsers(
[parse_authority, parse_absolute, parse_noscheme, parse_empty], [parse_authority, parse_absolute, parse_noscheme, parse_empty],
Error(Nil), str,
get_parser_fn(str),
) )
} }
fn parse_absolute(str: String) -> Result(#(Uri, String), Nil) { fn parse_absolute(str: String) -> Result(#(Uri, String), Nil) {
case str { case str {
"/" <> rest -> { "/" <> rest -> {
let assert Ok(#(seg, rest)) = use #(seg, rest) <- result.try(
parse_optional(rest, parse_this_then( parse_optional(rest, parse_this_then(
[ [
do_parse_segment_nz, do_parse_segment_nz,
get_multiple_optional_result( utils.get_multiple_optional_result(
fn(str) { fn(str) {
case str { case str {
"/" <> rest -> { "/" <> rest -> {
@@ -93,7 +93,8 @@ fn parse_absolute(str: String) -> Result(#(Uri, String), Nil) {
), ),
], ],
_, _,
)) )),
)
Ok(#(Uri(None, None, None, None, "/" <> seg, None, None), rest)) Ok(#(Uri(None, None, None, None, "/" <> seg, None, None), rest))
} }
@@ -105,7 +106,7 @@ fn parse_rootless(str: String) -> Result(#(Uri, String), Nil) {
use #(seg1, rest) <- result.try(do_parse_segment_nz(str)) use #(seg1, rest) <- result.try(do_parse_segment_nz(str))
let #(segs, rest) = let #(segs, rest) =
get_multiple_optional( utils.get_multiple_optional(
fn(str) { fn(str) {
case str { case str {
"/" <> rest -> { "/" <> rest -> {
@@ -124,7 +125,7 @@ fn parse_noscheme(str: String) -> Result(#(Uri, String), Nil) {
use #(seg1, rest) <- result.try(do_parse_segment_nz_nc(str)) use #(seg1, rest) <- result.try(do_parse_segment_nz_nc(str))
let #(segs, rest) = let #(segs, rest) =
get_multiple_optional( utils.get_multiple_optional(
fn(str) { fn(str) {
case str { case str {
"/" <> rest -> { "/" <> rest -> {
@@ -146,17 +147,6 @@ fn parse_optional(str, opt_fn) {
} }
} }
fn get_multiple_optional_result(opt_fn, str: String) {
get_multiple_optional(opt_fn, str) |> Ok
}
fn get_multiple_optional(opt_fn, str: String) {
case get_multiple(opt_fn, str) {
Error(_) -> #("", str)
Ok(r) -> r
}
}
fn parse_empty(str: String) -> Result(#(Uri, String), Nil) { fn parse_empty(str: String) -> Result(#(Uri, String), Nil) {
Ok(#(Uri(None, None, None, None, "", None, None), str)) Ok(#(Uri(None, None, None, None, "", None, None), str))
} }
@@ -211,20 +201,15 @@ fn parse_digits(str: String, digits: String) {
} }
fn parse_host(str: String) { fn parse_host(str: String) {
list.fold_until( utils.try_parsers([parse_ip_literal, parse_ipv4, parse_reg_name], str)
[parse_ip_literal, parse_ipv4, parse_reg_name],
Error(Nil),
get_parser_fn(str),
)
} }
fn parse_ip_literal(str: String) { fn parse_ip_literal(str: String) {
case str { case str {
"[" <> rest -> { "[" <> rest -> {
use #(ip, rest) <- result.try(list.fold_until( use #(ip, rest) <- result.try(utils.try_parsers(
[parse_ipv6, parse_ipfuture], [parse_ipv6, parse_ipfuture],
Error(Nil), rest,
get_parser_fn(rest),
)) ))
case rest { case rest {
"]" <> rest -> Ok(#(ip, rest)) "]" <> rest -> Ok(#(ip, rest))
@@ -236,7 +221,7 @@ fn parse_ip_literal(str: String) {
} }
fn parse_ipv6(str: String) { fn parse_ipv6(str: String) {
list.fold_until( utils.try_parsers(
[ [
parse_this_then([parse_min_max(_, 6, 6, parse_h16_colon), parse_ls32], _), parse_this_then([parse_min_max(_, 6, 6, parse_h16_colon), parse_ls32], _),
parse_this_then( parse_this_then(
@@ -303,8 +288,7 @@ fn parse_ipv6(str: String) {
_, _,
), ),
], ],
Error(Nil), str,
get_parser_fn(str),
) )
} }
@@ -335,7 +319,7 @@ fn parse_this_then(
} }
fn parse_ls32(str: String) -> Result(#(String, String), Nil) { fn parse_ls32(str: String) -> Result(#(String, String), Nil) {
list.fold_until([parse_h16_pair, parse_ipv4], Error(Nil), get_parser_fn(str)) utils.try_parsers([parse_h16_pair, parse_ipv4], str)
} }
fn parse_h16_pair(str: String) { fn parse_h16_pair(str: String) {
@@ -364,13 +348,16 @@ fn parse_h16_colon(str: String) {
fn parse_ipfuture(str: String) { fn parse_ipfuture(str: String) {
case str { case str {
"v" <> rest -> { "v" <> rest -> {
use #(v, rest) <- result.try(get_multiple(utils.parse_hex_digit, rest)) use #(v, rest) <- result.try(utils.get_multiple(
utils.parse_hex_digit,
rest,
))
case rest { case rest {
"." <> rest -> { "." <> rest -> {
use #(i, rest) <- result.try(get_multiple( use #(i, rest) <- result.try(utils.get_multiple(
fn(str) { fn(str) {
list.fold_until( utils.try_parsers(
[ [
parse_unreserved, parse_unreserved,
parse_sub_delim, parse_sub_delim,
@@ -381,8 +368,7 @@ fn parse_ipfuture(str: String) {
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
) )
}, },
rest, rest,
@@ -396,33 +382,8 @@ fn parse_ipfuture(str: String) {
} }
} }
fn get_multiple(
to_run: fn(String) -> Result(#(String, String), Nil),
str: String,
) -> Result(#(String, String), Nil) {
case do_get_multiple(to_run, str, "") {
Ok(#("", _)) | Error(Nil) -> Error(Nil)
Ok(#(r, rest)) -> Ok(#(r, rest))
}
}
fn do_get_multiple(
to_run: fn(String) -> Result(#(String, String), Nil),
str: String,
ret: String,
) -> Result(#(String, String), Nil) {
case str {
"" -> Ok(#(ret, str))
_ ->
case to_run(str) {
Ok(#(r, rest)) -> do_get_multiple(to_run, rest, ret <> r)
Error(_) -> Ok(#(ret, str))
}
}
}
fn parse_query_fragment(str: String) { fn parse_query_fragment(str: String) {
list.fold_until( utils.try_parsers(
[ [
do_parse_pchar, do_parse_pchar,
fn(str: String) { fn(str: String) {
@@ -432,13 +393,12 @@ fn parse_query_fragment(str: String) {
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
) )
} }
fn parse_abs_empty(str: String) -> #(String, String) { fn parse_abs_empty(str: String) -> #(String, String) {
get_multiple_optional( utils.get_multiple_optional(
fn(str) { fn(str) {
case str { case str {
"/" <> rest -> { "/" <> rest -> {
@@ -483,7 +443,7 @@ fn do_parse_segment_nz_nc(str: String) {
} }
fn do_parse_pchar(str: String) { fn do_parse_pchar(str: String) {
list.fold_until( utils.try_parsers(
[ [
parse_unreserved, parse_unreserved,
parse_pct_encoded, parse_pct_encoded,
@@ -495,13 +455,12 @@ fn do_parse_pchar(str: String) {
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
) )
} }
fn do_parse_pchar_nc(str: String) { fn do_parse_pchar_nc(str: String) {
list.fold_until( utils.try_parsers(
[ [
parse_unreserved, parse_unreserved,
parse_pct_encoded, parse_pct_encoded,
@@ -513,12 +472,11 @@ fn do_parse_pchar_nc(str: String) {
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
) )
} }
fn parse_reg_name(str: String) { pub fn parse_reg_name(str: String) {
// can't error // can't error
case do_parse_reg_name(str, "") { case do_parse_reg_name(str, "") {
@@ -529,10 +487,9 @@ fn parse_reg_name(str: String) {
fn do_parse_reg_name(str: String, reg_name: String) { fn do_parse_reg_name(str: String, reg_name: String) {
case case
list.fold_until( utils.try_parsers(
[parse_unreserved, parse_pct_encoded, parse_sub_delim], [parse_unreserved, parse_pct_encoded, parse_sub_delim],
Error(Nil), str,
get_parser_fn(str),
) )
{ {
Error(Nil) | Ok(#("", _)) -> Ok(#(reg_name, str)) Error(Nil) | Ok(#("", _)) -> Ok(#(reg_name, str))
@@ -589,16 +546,16 @@ fn parse_ipv4(str: String) {
Ok(#(oct1 <> "." <> oct2 <> "." <> oct3 <> "." <> oct4, rest)) Ok(#(oct1 <> "." <> oct2 <> "." <> oct3 <> "." <> oct4, rest))
} }
fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) { const octet_matches = [
let matches = [ ["2", "5", "012345"],
["2", "5", "012345"], ["2", "01234", "0123456789"],
["2", "01234", "0123456789"], ["1", "0123456789", "0123456789"],
["1", "0123456789", "0123456789"], ["123456789", "0123456789"],
["123456789", "0123456789"], ["0123456789"],
["0123456789"], ]
]
list.fold_until(matches, Error(Nil), fn(_, chars) { fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) {
list.fold_until(octet_matches, Error(Nil), fn(_, chars) {
case case
list.fold_until(chars, #("", str), fn(acc, charset) { list.fold_until(chars, #("", str), fn(acc, charset) {
let #(octet, str) = acc let #(octet, str) = acc
@@ -627,7 +584,7 @@ fn parse_userinfo(
"@" <> rest -> Ok(#(userinfo, rest)) "@" <> rest -> Ok(#(userinfo, rest))
"" -> Error(Nil) "" -> Error(Nil)
_ -> { _ -> {
use #(part, rest) <- result.try(list.fold_until( use #(part, rest) <- result.try(utils.try_parsers(
[ [
parse_unreserved, parse_unreserved,
parse_pct_encoded, parse_pct_encoded,
@@ -639,8 +596,7 @@ fn parse_userinfo(
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
)) ))
parse_userinfo(rest, userinfo <> part) parse_userinfo(rest, userinfo <> part)
} }
@@ -668,7 +624,7 @@ fn do_parse_scheme(
":" <> rest -> Ok(#(scheme, rest)) ":" <> rest -> Ok(#(scheme, rest))
"" -> Error(Nil) "" -> Error(Nil)
_ -> { _ -> {
use #(part, rest) <- result.try(list.fold_until( use #(part, rest) <- result.try(utils.try_parsers(
[ [
parse_alpha, parse_alpha,
parse_digit, parse_digit,
@@ -680,25 +636,13 @@ fn do_parse_scheme(
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
)) ))
do_parse_scheme(rest, scheme <> part) do_parse_scheme(rest, scheme <> part)
} }
} }
} }
fn get_parser_fn(
str: String,
) -> fn(a, fn(String) -> Result(b, c)) -> list.ContinueOrStop(Result(b, Nil)) {
fn(_, parse_fn) {
case parse_fn(str) {
Ok(r) -> Stop(Ok(r))
Error(_) -> Continue(Error(Nil))
}
}
}
fn parse_min_max(str, min, max, parse_fn) { fn parse_min_max(str, min, max, parse_fn) {
use <- bool.guard(when: min < 0 || max <= 0 || min > max, return: Error(Nil)) use <- bool.guard(when: min < 0 || max <= 0 || min > max, return: Error(Nil))
case case
@@ -799,7 +743,7 @@ fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
} }
fn parse_unreserved(str: String) -> Result(#(String, String), Nil) { fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
list.fold_until( utils.try_parsers(
[ [
parse_alpha, parse_alpha,
parse_digit, parse_digit,
@@ -813,8 +757,7 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
} }
}, },
], ],
Error(Nil), str,
get_parser_fn(str),
) )
} }

View File

@@ -1,6 +1,7 @@
import gleam/bool import gleam/bool
import gleam/int import gleam/int
import gleam/list import gleam/list
import gleam/option.{type Option, None, Some} import gleam/option.{type Option, None, Some}
import gleam/result import gleam/result
import gleam/string import gleam/string
@@ -86,6 +87,56 @@ fn merge_paths(base: Uri, relative: Uri) -> String {
} }
} }
pub fn try_parsers(
over list: List(fn(String) -> Result(#(a, String), Nil)),
against static_data: String,
) -> Result(#(a, String), Nil) {
case list {
[] -> Error(Nil)
[first, ..rest] ->
case first(static_data) {
Error(_) -> try_parsers(rest, static_data)
Ok(r) -> Ok(r)
}
}
}
pub fn get_multiple(
to_run: fn(String) -> Result(#(String, String), Nil),
str: String,
) -> Result(#(String, String), Nil) {
case do_get_multiple(to_run, str, "") {
Ok(#("", _)) | Error(Nil) -> Error(Nil)
Ok(#(r, rest)) -> Ok(#(r, rest))
}
}
pub fn get_multiple_optional(opt_fn, str: String) {
case get_multiple(opt_fn, str) {
Error(_) -> #("", str)
Ok(r) -> r
}
}
pub fn get_multiple_optional_result(opt_fn, str: String) {
get_multiple_optional(opt_fn, str) |> Ok
}
fn do_get_multiple(
to_run: fn(String) -> Result(#(String, String), Nil),
str: String,
ret: String,
) -> Result(#(String, String), Nil) {
case str {
"" -> Ok(#(ret, str))
_ ->
case to_run(str) {
Ok(#(r, rest)) -> do_get_multiple(to_run, rest, ret <> r)
Error(_) -> Ok(#(ret, str))
}
}
}
pub fn normalise(uri: Uri) -> Uri { pub fn normalise(uri: Uri) -> Uri {
let percent_splitter = splitter.new(["%"]) let percent_splitter = splitter.new(["%"])
let percent_normaliser = normalise_percent(percent_splitter, _) let percent_normaliser = normalise_percent(percent_splitter, _)