From 1174a17c97c849a0740c25545c89baf1b43331fb Mon Sep 17 00:00:00 2001 From: Gareth Pendleton Date: Sun, 14 Sep 2025 11:44:08 +0100 Subject: [PATCH] perf: Optimisations for parsing between options Added a specific parse try function which takes the essence of list.fold_until but makes it specific for fn(String)->Result(#(a, String),Nil) parsers ??? ?? --- CHANGELOG.md | 4 + src/gluri.gleam | 2 +- src/gluri/internal/parser.gleam | 161 +++++++++++--------------------- src/gluri/internal/utils.gleam | 51 ++++++++++ 4 files changed, 108 insertions(+), 110 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 280f6ae..30575b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,3 +7,7 @@ ## v2.0.0 - Removed types.Uri. Now gluri uses the stdlib Uri type (and empty) + +## v2.0.1 + +- Improved parsing performance slightly and reduced memory usage up to 50% diff --git a/src/gluri.gleam b/src/gluri.gleam index 1243357..40f52a3 100644 --- a/src/gluri.gleam +++ b/src/gluri.gleam @@ -3,7 +3,7 @@ import gleam/int import gleam/list import gleam/option.{Some} import gleam/string -import gleam/uri.{type Uri, Uri} +import gleam/uri.{type Uri} import gluri/internal/parser import gluri/internal/utils diff --git a/src/gluri/internal/parser.gleam b/src/gluri/internal/parser.gleam index 81014ce..937e3bd 100644 --- a/src/gluri/internal/parser.gleam +++ b/src/gluri/internal/parser.gleam @@ -40,7 +40,8 @@ pub fn parse(uri: String) -> Result(Uri, Nil) { fn parse_query(str: String) -> Result(#(Uri, String), Nil) { case str { "?" <> rest -> { - let #(query, rest) = get_multiple_optional(parse_query_fragment, rest) + let #(query, rest) = + utils.get_multiple_optional(parse_query_fragment, rest) Ok(#(Uri(..empty, query: Some(query)), rest)) } _ -> Ok(#(empty, str)) @@ -50,7 +51,8 @@ fn parse_query(str: String) -> Result(#(Uri, String), Nil) { fn parse_fragment(str: String) -> Result(#(Uri, String), Nil) { case str { "#" <> rest -> { - let #(fragment, rest) = get_multiple_optional(parse_query_fragment, rest) + let #(fragment, rest) = + utils.get_multiple_optional(parse_query_fragment, rest) Ok(#(Uri(..empty, fragment: Some(fragment)), rest)) } _ -> Ok(#(empty, str)) @@ -58,29 +60,27 @@ fn parse_fragment(str: String) -> Result(#(Uri, String), Nil) { } fn parse_hier_part(str: String) -> Result(#(Uri, String), Nil) { - list.fold_until( + utils.try_parsers( [parse_authority, parse_absolute, parse_rootless, parse_empty], - Error(Nil), - get_parser_fn(str), + str, ) } fn parse_relative_part(str: String) -> Result(#(Uri, String), Nil) { - list.fold_until( + utils.try_parsers( [parse_authority, parse_absolute, parse_noscheme, parse_empty], - Error(Nil), - get_parser_fn(str), + str, ) } fn parse_absolute(str: String) -> Result(#(Uri, String), Nil) { case str { "/" <> rest -> { - let assert Ok(#(seg, rest)) = + use #(seg, rest) <- result.try( parse_optional(rest, parse_this_then( [ do_parse_segment_nz, - get_multiple_optional_result( + utils.get_multiple_optional_result( fn(str) { case str { "/" <> rest -> { @@ -93,7 +93,8 @@ fn parse_absolute(str: String) -> Result(#(Uri, String), Nil) { ), ], _, - )) + )), + ) Ok(#(Uri(None, None, None, None, "/" <> seg, None, None), rest)) } @@ -105,7 +106,7 @@ fn parse_rootless(str: String) -> Result(#(Uri, String), Nil) { use #(seg1, rest) <- result.try(do_parse_segment_nz(str)) let #(segs, rest) = - get_multiple_optional( + utils.get_multiple_optional( fn(str) { case str { "/" <> rest -> { @@ -124,7 +125,7 @@ fn parse_noscheme(str: String) -> Result(#(Uri, String), Nil) { use #(seg1, rest) <- result.try(do_parse_segment_nz_nc(str)) let #(segs, rest) = - get_multiple_optional( + utils.get_multiple_optional( fn(str) { case str { "/" <> rest -> { @@ -146,17 +147,6 @@ fn parse_optional(str, opt_fn) { } } -fn get_multiple_optional_result(opt_fn, str: String) { - get_multiple_optional(opt_fn, str) |> Ok -} - -fn get_multiple_optional(opt_fn, str: String) { - case get_multiple(opt_fn, str) { - Error(_) -> #("", str) - Ok(r) -> r - } -} - fn parse_empty(str: String) -> Result(#(Uri, String), Nil) { Ok(#(Uri(None, None, None, None, "", None, None), str)) } @@ -211,20 +201,15 @@ fn parse_digits(str: String, digits: String) { } fn parse_host(str: String) { - list.fold_until( - [parse_ip_literal, parse_ipv4, parse_reg_name], - Error(Nil), - get_parser_fn(str), - ) + utils.try_parsers([parse_ip_literal, parse_ipv4, parse_reg_name], str) } fn parse_ip_literal(str: String) { case str { "[" <> rest -> { - use #(ip, rest) <- result.try(list.fold_until( + use #(ip, rest) <- result.try(utils.try_parsers( [parse_ipv6, parse_ipfuture], - Error(Nil), - get_parser_fn(rest), + rest, )) case rest { "]" <> rest -> Ok(#(ip, rest)) @@ -236,7 +221,7 @@ fn parse_ip_literal(str: String) { } fn parse_ipv6(str: String) { - list.fold_until( + utils.try_parsers( [ parse_this_then([parse_min_max(_, 6, 6, parse_h16_colon), parse_ls32], _), parse_this_then( @@ -303,8 +288,7 @@ fn parse_ipv6(str: String) { _, ), ], - Error(Nil), - get_parser_fn(str), + str, ) } @@ -335,7 +319,7 @@ fn parse_this_then( } fn parse_ls32(str: String) -> Result(#(String, String), Nil) { - list.fold_until([parse_h16_pair, parse_ipv4], Error(Nil), get_parser_fn(str)) + utils.try_parsers([parse_h16_pair, parse_ipv4], str) } fn parse_h16_pair(str: String) { @@ -364,13 +348,16 @@ fn parse_h16_colon(str: String) { fn parse_ipfuture(str: String) { case str { "v" <> rest -> { - use #(v, rest) <- result.try(get_multiple(utils.parse_hex_digit, rest)) + use #(v, rest) <- result.try(utils.get_multiple( + utils.parse_hex_digit, + rest, + )) case rest { "." <> rest -> { - use #(i, rest) <- result.try(get_multiple( + use #(i, rest) <- result.try(utils.get_multiple( fn(str) { - list.fold_until( + utils.try_parsers( [ parse_unreserved, parse_sub_delim, @@ -381,8 +368,7 @@ fn parse_ipfuture(str: String) { } }, ], - Error(Nil), - get_parser_fn(str), + str, ) }, rest, @@ -396,33 +382,8 @@ fn parse_ipfuture(str: String) { } } -fn get_multiple( - to_run: fn(String) -> Result(#(String, String), Nil), - str: String, -) -> Result(#(String, String), Nil) { - case do_get_multiple(to_run, str, "") { - Ok(#("", _)) | Error(Nil) -> Error(Nil) - Ok(#(r, rest)) -> Ok(#(r, rest)) - } -} - -fn do_get_multiple( - to_run: fn(String) -> Result(#(String, String), Nil), - str: String, - ret: String, -) -> Result(#(String, String), Nil) { - case str { - "" -> Ok(#(ret, str)) - _ -> - case to_run(str) { - Ok(#(r, rest)) -> do_get_multiple(to_run, rest, ret <> r) - Error(_) -> Ok(#(ret, str)) - } - } -} - fn parse_query_fragment(str: String) { - list.fold_until( + utils.try_parsers( [ do_parse_pchar, fn(str: String) { @@ -432,13 +393,12 @@ fn parse_query_fragment(str: String) { } }, ], - Error(Nil), - get_parser_fn(str), + str, ) } fn parse_abs_empty(str: String) -> #(String, String) { - get_multiple_optional( + utils.get_multiple_optional( fn(str) { case str { "/" <> rest -> { @@ -483,7 +443,7 @@ fn do_parse_segment_nz_nc(str: String) { } fn do_parse_pchar(str: String) { - list.fold_until( + utils.try_parsers( [ parse_unreserved, parse_pct_encoded, @@ -495,13 +455,12 @@ fn do_parse_pchar(str: String) { } }, ], - Error(Nil), - get_parser_fn(str), + str, ) } fn do_parse_pchar_nc(str: String) { - list.fold_until( + utils.try_parsers( [ parse_unreserved, parse_pct_encoded, @@ -513,12 +472,11 @@ fn do_parse_pchar_nc(str: String) { } }, ], - Error(Nil), - get_parser_fn(str), + str, ) } -fn parse_reg_name(str: String) { +pub fn parse_reg_name(str: String) { // can't error case do_parse_reg_name(str, "") { @@ -529,10 +487,9 @@ fn parse_reg_name(str: String) { fn do_parse_reg_name(str: String, reg_name: String) { case - list.fold_until( + utils.try_parsers( [parse_unreserved, parse_pct_encoded, parse_sub_delim], - Error(Nil), - get_parser_fn(str), + str, ) { Error(Nil) | Ok(#("", _)) -> Ok(#(reg_name, str)) @@ -589,16 +546,16 @@ fn parse_ipv4(str: String) { Ok(#(oct1 <> "." <> oct2 <> "." <> oct3 <> "." <> oct4, rest)) } -fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) { - let matches = [ - ["2", "5", "012345"], - ["2", "01234", "0123456789"], - ["1", "0123456789", "0123456789"], - ["123456789", "0123456789"], - ["0123456789"], - ] +const octet_matches = [ + ["2", "5", "012345"], + ["2", "01234", "0123456789"], + ["1", "0123456789", "0123456789"], + ["123456789", "0123456789"], + ["0123456789"], +] - list.fold_until(matches, Error(Nil), fn(_, chars) { +fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) { + list.fold_until(octet_matches, Error(Nil), fn(_, chars) { case list.fold_until(chars, #("", str), fn(acc, charset) { let #(octet, str) = acc @@ -627,7 +584,7 @@ fn parse_userinfo( "@" <> rest -> Ok(#(userinfo, rest)) "" -> Error(Nil) _ -> { - use #(part, rest) <- result.try(list.fold_until( + use #(part, rest) <- result.try(utils.try_parsers( [ parse_unreserved, parse_pct_encoded, @@ -639,8 +596,7 @@ fn parse_userinfo( } }, ], - Error(Nil), - get_parser_fn(str), + str, )) parse_userinfo(rest, userinfo <> part) } @@ -668,7 +624,7 @@ fn do_parse_scheme( ":" <> rest -> Ok(#(scheme, rest)) "" -> Error(Nil) _ -> { - use #(part, rest) <- result.try(list.fold_until( + use #(part, rest) <- result.try(utils.try_parsers( [ parse_alpha, parse_digit, @@ -680,25 +636,13 @@ fn do_parse_scheme( } }, ], - Error(Nil), - get_parser_fn(str), + str, )) do_parse_scheme(rest, scheme <> part) } } } -fn get_parser_fn( - str: String, -) -> fn(a, fn(String) -> Result(b, c)) -> list.ContinueOrStop(Result(b, Nil)) { - fn(_, parse_fn) { - case parse_fn(str) { - Ok(r) -> Stop(Ok(r)) - Error(_) -> Continue(Error(Nil)) - } - } -} - fn parse_min_max(str, min, max, parse_fn) { use <- bool.guard(when: min < 0 || max <= 0 || min > max, return: Error(Nil)) case @@ -799,7 +743,7 @@ fn parse_alpha(str: String) -> Result(#(String, String), Nil) { } fn parse_unreserved(str: String) -> Result(#(String, String), Nil) { - list.fold_until( + utils.try_parsers( [ parse_alpha, parse_digit, @@ -813,8 +757,7 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) { } }, ], - Error(Nil), - get_parser_fn(str), + str, ) } diff --git a/src/gluri/internal/utils.gleam b/src/gluri/internal/utils.gleam index 369f393..328e5f3 100644 --- a/src/gluri/internal/utils.gleam +++ b/src/gluri/internal/utils.gleam @@ -1,6 +1,7 @@ import gleam/bool import gleam/int import gleam/list + import gleam/option.{type Option, None, Some} import gleam/result import gleam/string @@ -86,6 +87,56 @@ fn merge_paths(base: Uri, relative: Uri) -> String { } } +pub fn try_parsers( + over list: List(fn(String) -> Result(#(a, String), Nil)), + against static_data: String, +) -> Result(#(a, String), Nil) { + case list { + [] -> Error(Nil) + [first, ..rest] -> + case first(static_data) { + Error(_) -> try_parsers(rest, static_data) + Ok(r) -> Ok(r) + } + } +} + +pub fn get_multiple( + to_run: fn(String) -> Result(#(String, String), Nil), + str: String, +) -> Result(#(String, String), Nil) { + case do_get_multiple(to_run, str, "") { + Ok(#("", _)) | Error(Nil) -> Error(Nil) + Ok(#(r, rest)) -> Ok(#(r, rest)) + } +} + +pub fn get_multiple_optional(opt_fn, str: String) { + case get_multiple(opt_fn, str) { + Error(_) -> #("", str) + Ok(r) -> r + } +} + +pub fn get_multiple_optional_result(opt_fn, str: String) { + get_multiple_optional(opt_fn, str) |> Ok +} + +fn do_get_multiple( + to_run: fn(String) -> Result(#(String, String), Nil), + str: String, + ret: String, +) -> Result(#(String, String), Nil) { + case str { + "" -> Ok(#(ret, str)) + _ -> + case to_run(str) { + Ok(#(r, rest)) -> do_get_multiple(to_run, rest, ret <> r) + Error(_) -> Ok(#(ret, str)) + } + } +} + pub fn normalise(uri: Uri) -> Uri { let percent_splitter = splitter.new(["%"]) let percent_normaliser = normalise_percent(percent_splitter, _)