diff --git a/CHANGELOG.md b/CHANGELOG.md index 30575b6..127315f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,3 +11,4 @@ ## v2.0.1 - Improved parsing performance slightly and reduced memory usage up to 50% +- Significantly improved IPV4 parsing performance diff --git a/src/gluri/internal/parser.gleam b/src/gluri/internal/parser.gleam index 9a0dc80..a2acb6a 100644 --- a/src/gluri/internal/parser.gleam +++ b/src/gluri/internal/parser.gleam @@ -421,39 +421,73 @@ fn parse_ipv4address(str: String) { Ok(#(oct1 <> "." <> oct2 <> "." <> oct3 <> "." <> oct4, rest)) } -const octet_matches = [ - ["2", "5", "012345"], - ["2", "01234", "0123456789"], - ["1", "0123456789", "0123456789"], - ["123456789", "0123456789"], - ["0123456789"], -] - // dec-octet = DIGIT ; 0-9 // / %x31-39 DIGIT ; 10-99 // / "1" 2DIGIT ; 100-199 // / "2" %x30-34 DIGIT ; 200-249 // / "25" %x30-35 ; 250-255 -fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) { - list.fold_until(octet_matches, Error(Nil), fn(_, chars) { - case - list.fold_until(chars, #("", str), fn(acc, charset) { - let #(octet, str) = acc - case string.pop_grapheme(str) { - Error(_) -> Stop(#("", "")) - Ok(#(char, rest)) -> { - case string.contains(charset, char) { - True -> Continue(#(octet <> char, rest)) - False -> Stop(#("", "")) - } +pub fn parse_dec_octet(str: String) -> Result(#(String, String), Nil) { + try_parsers( + [ + parse_this_then(_, [ + fn(str) { + case str { + "2" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) } - } - }) - { - #("", _) -> Continue(Error(Nil)) - #(octet, rest) -> Stop(Ok(#(octet, rest))) - } - }) + }, + fn(str) { + case str { + "5" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } + }, + fn(str) { + case str { + "0" as l <> rest + | "1" as l <> rest + | "2" as l <> rest + | "3" as l <> rest + | "4" as l <> rest + | "5" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } + }, + ]), + parse_this_then(_, [ + fn(str) { + case str { + "2" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } + }, + fn(str) { + case str { + "0" as l <> rest + | "1" as l <> rest + | "2" as l <> rest + | "3" as l <> rest + | "4" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } + }, + parse_digit, + ]), + parse_this_then(_, [ + fn(str) { + case str { + "1" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } + }, + parse_digit, + parse_digit, + ]), + parse_this_then(_, [parse_digit_nz, parse_digit]), + parse_digit, + ], + str, + ) } // reg-name = *( unreserved / pct-encoded / sub-delims ) @@ -727,6 +761,21 @@ fn parse_digit(str: String) -> Result(#(String, String), Nil) { } } +fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) { + case str { + "1" as l <> rest + | "2" as l <> rest + | "3" as l <> rest + | "4" as l <> rest + | "5" as l <> rest + | "6" as l <> rest + | "7" as l <> rest + | "8" as l <> rest + | "9" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } +} + fn parse_digits(str: String, digits: String) { case parse_digit(str) { Ok(#(d, rest)) -> { diff --git a/src/gluri/internal/utils.gleam b/src/gluri/internal/utils.gleam index e7ac767..8d71711 100644 --- a/src/gluri/internal/utils.gleam +++ b/src/gluri/internal/utils.gleam @@ -154,15 +154,23 @@ pub fn parse_this_then( to_parse str: String, with parsers: List(fn(String) -> Result(#(String, String), Nil)), ) -> Result(#(String, String), Nil) { - list.fold_until(parsers, Ok(#("", str)), fn(acc, parser) { - let assert Ok(#(res, str)) = acc - case parser(str) { - Ok(#(res2, rest)) -> { - Continue(Ok(#(res <> res2, rest))) + do_parse_this_then(str, "", parsers) +} + +fn do_parse_this_then( + to_parse str: String, + from initial: String, + with parsers: List(fn(String) -> Result(#(String, String), Nil)), +) -> Result(#(String, String), Nil) { + case parsers { + [] -> Ok(#(initial, str)) + [head, ..tail] -> { + case head(str) { + Ok(#(res, rest)) -> do_parse_this_then(rest, initial <> res, tail) + Error(_) -> Error(Nil) } - Error(Nil) -> Stop(Error(Nil)) } - }) + } } pub fn parse_multiple( diff --git a/test/benchmark.gleam b/test/benchmark.gleam index 2e44eb8..7b6ac06 100644 --- a/test/benchmark.gleam +++ b/test/benchmark.gleam @@ -12,6 +12,28 @@ pub fn main() { parse_benchmark() // reg_name_benchmark() + // ip_benchmark() +} + +@target(erlang) +pub fn ip_benchmark() { + benchmark.run( + [ + benchmark.Function("ip_benchmark", fn(data) { + fn() { + let _ = parser.parse_dec_octet(data) + Nil + } + }), + ], + [ + benchmark.Data("173", "173"), + benchmark.Data("5", "5"), + benchmark.Data("200", "200"), + benchmark.Data("255", "255"), + benchmark.Data("fail", "2b"), + ], + ) } @target(erlang)