4 Commits

Author SHA1 Message Date
7b7e689892 perf: Added benchmarks and changelog
Some checks failed
test / test (push) Has been cancelled
2025-10-03 11:15:59 +01:00
22d13bdf7d perf: Split parsers into erlang/js targets 2025-10-03 11:10:36 +01:00
3fc9a61afe build: Updated version 2025-10-03 10:59:46 +01:00
b9d1077425 perf: Revert back to pattern matching for ranges
May need to do more in depth checks and optimisations may need to be
removed if the core gleam compiler fixes the performance issues with
pattern matching + capture.
Possibly pattern matching may be better for smaller ranges but would
need to do benchmarks to find out what the cut-off point is.
2025-10-03 10:55:48 +01:00
9 changed files with 583 additions and 53 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
*.ez
/build
erl_crash.dump
node_modules

View File

@@ -17,3 +17,8 @@
- Minor performance improvement for uris with userinfo
- More performance improvements for ascii/digit parsing
## 2.0.3
- Minor performance improvement for erlang
- Major performance improvement for js

View File

@@ -1,5 +1,5 @@
name = "gluri"
version = "2.0.2"
version = "2.0.3"
# Fill out these fields if you intend to generate HTML documentation or publish
# your project to the Hex package manager.

18
package-lock.json generated Normal file
View File

@@ -0,0 +1,18 @@
{
"name": "uri",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"mitata": "^1.0.34"
}
},
"node_modules/mitata": {
"version": "1.0.34",
"resolved": "https://registry.npmjs.org/mitata/-/mitata-1.0.34.tgz",
"integrity": "sha512-Mc3zrtNBKIMeHSCQ0XqRLo1vbdIx1wvFV9c8NJAiyho6AjNfMY8bVhbS12bwciUdd1t4rj8099CH3N3NFahaUA==",
"license": "MIT"
}
}
}

6
package.json Normal file
View File

@@ -0,0 +1,6 @@
{
"type": "module",
"dependencies": {
"mitata": "^1.0.34"
}
}

View File

@@ -729,51 +729,111 @@ fn parse_unreserved(str: String) -> Result(#(String, String), Nil) {
// / "*" / "+" / "," / ";" / "="
// %21 / %24 / %26 / %27 / %28 / %29
// / %2A / %2B / %2C / %3B / %3D
@target(erlang)
fn parse_sub_delim(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x26 && i <= 0x2C -> Ok(#(char, tail))
_ if i == 0x21 -> Ok(#(char, tail))
_ if i == 0x24 -> Ok(#(char, tail))
_ if i == 0x3B -> Ok(#(char, tail))
_ if i == 0x3D -> Ok(#(char, tail))
Ok(#("!" as char, tail))
| Ok(#("$" as char, tail))
| Ok(#("&" as char, tail))
| Ok(#("'" as char, tail))
| Ok(#("(" as char, tail))
| Ok(#(")" as char, tail))
| Ok(#("*" as char, tail))
| Ok(#("+" as char, tail))
| Ok(#("," as char, tail))
| Ok(#(";" as char, tail))
| Ok(#("=" as char, tail)) -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
@target(javascript)
fn parse_sub_delim(str: String) -> Result(#(String, String), Nil) {
case str {
"!" as char <> tail
| "$" as char <> tail
| "&" as char <> tail
| "'" as char <> tail
| "(" as char <> tail
| ")" as char <> tail
| "*" as char <> tail
| "+" as char <> tail
| "," as char <> tail
| ";" as char <> tail
| "=" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
// DIGIT = %x3039
@target(erlang)
fn parse_digit(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
Ok(#("0" as char, tail))
| Ok(#("1" as char, tail))
| Ok(#("2" as char, tail))
| Ok(#("3" as char, tail))
| Ok(#("4" as char, tail))
| Ok(#("5" as char, tail))
| Ok(#("6" as char, tail))
| Ok(#("7" as char, tail))
| Ok(#("8" as char, tail))
| Ok(#("9" as char, tail)) -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
@target(javascript)
fn parse_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as char <> tail
| "1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
// DIGIT (non-zero) = %x3139
@target(erlang)
fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x31 && i <= 0x39 -> Ok(#(char, tail))
Ok(#("1" as char, tail))
| Ok(#("2" as char, tail))
| Ok(#("3" as char, tail))
| Ok(#("4" as char, tail))
| Ok(#("5" as char, tail))
| Ok(#("6" as char, tail))
| Ok(#("7" as char, tail))
| Ok(#("8" as char, tail))
| Ok(#("9" as char, tail)) -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
@target(javascript)
fn parse_digit_nz(str: String) -> Result(#(String, String), Nil) {
case str {
"1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
@@ -787,18 +847,123 @@ fn parse_digits(str: String, digits: String) {
}
// ALPHA = %x415A | %x617A
@target(erlang)
fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x41 && i <= 0x5A -> Ok(#(char, tail))
_ if i >= 0x61 && i <= 0x7A -> Ok(#(char, tail))
Ok(#("a" as char, tail))
| Ok(#("b" as char, tail))
| Ok(#("c" as char, tail))
| Ok(#("d" as char, tail))
| Ok(#("e" as char, tail))
| Ok(#("f" as char, tail))
| Ok(#("g" as char, tail))
| Ok(#("h" as char, tail))
| Ok(#("i" as char, tail))
| Ok(#("j" as char, tail))
| Ok(#("k" as char, tail))
| Ok(#("l" as char, tail))
| Ok(#("m" as char, tail))
| Ok(#("n" as char, tail))
| Ok(#("o" as char, tail))
| Ok(#("p" as char, tail))
| Ok(#("q" as char, tail))
| Ok(#("r" as char, tail))
| Ok(#("s" as char, tail))
| Ok(#("t" as char, tail))
| Ok(#("u" as char, tail))
| Ok(#("v" as char, tail))
| Ok(#("w" as char, tail))
| Ok(#("x" as char, tail))
| Ok(#("y" as char, tail))
| Ok(#("z" as char, tail))
| Ok(#("A" as char, tail))
| Ok(#("B" as char, tail))
| Ok(#("C" as char, tail))
| Ok(#("D" as char, tail))
| Ok(#("E" as char, tail))
| Ok(#("F" as char, tail))
| Ok(#("G" as char, tail))
| Ok(#("H" as char, tail))
| Ok(#("I" as char, tail))
| Ok(#("J" as char, tail))
| Ok(#("K" as char, tail))
| Ok(#("L" as char, tail))
| Ok(#("M" as char, tail))
| Ok(#("N" as char, tail))
| Ok(#("O" as char, tail))
| Ok(#("P" as char, tail))
| Ok(#("Q" as char, tail))
| Ok(#("R" as char, tail))
| Ok(#("S" as char, tail))
| Ok(#("T" as char, tail))
| Ok(#("U" as char, tail))
| Ok(#("V" as char, tail))
| Ok(#("W" as char, tail))
| Ok(#("X" as char, tail))
| Ok(#("Y" as char, tail))
| Ok(#("Z" as char, tail)) -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
@target(javascript)
fn parse_alpha(str: String) -> Result(#(String, String), Nil) {
case str {
"a" as char <> tail
| "b" as char <> tail
| "c" as char <> tail
| "d" as char <> tail
| "e" as char <> tail
| "f" as char <> tail
| "g" as char <> tail
| "h" as char <> tail
| "i" as char <> tail
| "j" as char <> tail
| "k" as char <> tail
| "l" as char <> tail
| "m" as char <> tail
| "n" as char <> tail
| "o" as char <> tail
| "p" as char <> tail
| "q" as char <> tail
| "r" as char <> tail
| "s" as char <> tail
| "t" as char <> tail
| "u" as char <> tail
| "v" as char <> tail
| "w" as char <> tail
| "x" as char <> tail
| "y" as char <> tail
| "z" as char <> tail
| "A" as char <> tail
| "B" as char <> tail
| "C" as char <> tail
| "D" as char <> tail
| "E" as char <> tail
| "F" as char <> tail
| "G" as char <> tail
| "H" as char <> tail
| "I" as char <> tail
| "J" as char <> tail
| "K" as char <> tail
| "L" as char <> tail
| "M" as char <> tail
| "N" as char <> tail
| "O" as char <> tail
| "P" as char <> tail
| "Q" as char <> tail
| "R" as char <> tail
| "S" as char <> tail
| "T" as char <> tail
| "U" as char <> tail
| "V" as char <> tail
| "W" as char <> tail
| "X" as char <> tail
| "Y" as char <> tail
| "Z" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}

View File

@@ -370,19 +370,63 @@ fn unescape_percent(str: String) -> String {
}
}
@target(erlang)
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case string.pop_grapheme(str) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x30 && i <= 0x39 -> Ok(#(char, tail))
_ if i >= 0x41 && i <= 0x46 -> Ok(#(char, tail))
_ if i >= 0x61 && i <= 0x66 -> Ok(#(char, tail))
Ok(#("0" as char, tail))
| Ok(#("1" as char, tail))
| Ok(#("2" as char, tail))
| Ok(#("3" as char, tail))
| Ok(#("4" as char, tail))
| Ok(#("5" as char, tail))
| Ok(#("6" as char, tail))
| Ok(#("7" as char, tail))
| Ok(#("8" as char, tail))
| Ok(#("9" as char, tail))
| Ok(#("a" as char, tail))
| Ok(#("b" as char, tail))
| Ok(#("c" as char, tail))
| Ok(#("d" as char, tail))
| Ok(#("e" as char, tail))
| Ok(#("f" as char, tail))
| Ok(#("A" as char, tail))
| Ok(#("B" as char, tail))
| Ok(#("C" as char, tail))
| Ok(#("D" as char, tail))
| Ok(#("E" as char, tail))
| Ok(#("F" as char, tail)) -> Ok(#(char, tail))
_ -> Error(Nil)
}
}
Error(_) -> Error(Nil)
}
@target(javascript)
pub fn parse_hex_digit(str: String) -> Result(#(String, String), Nil) {
case str {
"0" as char <> tail
| "1" as char <> tail
| "2" as char <> tail
| "3" as char <> tail
| "4" as char <> tail
| "5" as char <> tail
| "6" as char <> tail
| "7" as char <> tail
| "8" as char <> tail
| "9" as char <> tail
| "a" as char <> tail
| "b" as char <> tail
| "c" as char <> tail
| "d" as char <> tail
| "e" as char <> tail
| "f" as char <> tail
| "A" as char <> tail
| "B" as char <> tail
| "C" as char <> tail
| "D" as char <> tail
| "E" as char <> tail
| "F" as char <> tail -> Ok(#(char, tail))
_ -> Error(Nil)
}
}

View File

@@ -1,3 +1,4 @@
import gleam/string
import gleam/uri as uri2
import gluri as uri
import glychee/benchmark
@@ -9,6 +10,7 @@ pub fn main() {
configuration.set_pair(configuration.Warmup, 2)
configuration.set_pair(configuration.Parallel, 2)
// pop_benchmark()
parse_benchmark()
// reg_name_benchmark()
// ip_benchmark()
@@ -82,3 +84,276 @@ pub fn parse_benchmark() {
],
)
}
@target(erlang)
pub fn pop_benchmark() {
benchmark.run(
[
benchmark.Function("pop", fn(data) { fn() { pop(data, "") } }),
benchmark.Function("pop2", fn(data) { fn() { pop4(data, "") } }),
benchmark.Function("pop3", fn(data) { fn() { pop5(data, "") } }),
benchmark.Function("match", fn(data) { fn() { pop2(data, "") } }),
benchmark.Function("match_2", fn(data) { fn() { pop3(data, "") } }),
],
[
// benchmark.Data("long", "abcdefghijklmnopqrstuvwxyz"),
benchmark.Data(
"with user",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
// benchmark.Data("ipv4", "https://192.255.36.4/"),
],
)
}
pub fn pop(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
let assert [codepoint] = string.to_utf_codepoints(char)
let i = string.utf_codepoint_to_int(codepoint)
case i {
_ if i >= 0x41 && i <= 0x5A -> pop(tail, char)
_ if i >= 0x61 && i <= 0x7A -> pop(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop2(input, _) {
case input {
"a" as j <> tail
| "b" as j <> tail
| "c" as j <> tail
| "d" as j <> tail
| "e" as j <> tail
| "f" as j <> tail
| "g" as j <> tail
| "h" as j <> tail
| "i" as j <> tail
| "j" as j <> tail
| "k" as j <> tail
| "l" as j <> tail
| "m" as j <> tail
| "n" as j <> tail
| "o" as j <> tail
| "p" as j <> tail
| "q" as j <> tail
| "r" as j <> tail
| "s" as j <> tail
| "t" as j <> tail
| "u" as j <> tail
| "v" as j <> tail
| "w" as j <> tail
| "x" as j <> tail
| "y" as j <> tail
| "z" as j <> tail
| "A" as j <> tail
| "B" as j <> tail
| "C" as j <> tail
| "D" as j <> tail
| "E" as j <> tail
| "F" as j <> tail
| "G" as j <> tail
| "H" as j <> tail
| "I" as j <> tail
| "J" as j <> tail
| "K" as j <> tail
| "L" as j <> tail
| "M" as j <> tail
| "N" as j <> tail
| "O" as j <> tail
| "P" as j <> tail
| "Q" as j <> tail
| "R" as j <> tail
| "S" as j <> tail
| "T" as j <> tail
| "U" as j <> tail
| "V" as j <> tail
| "W" as j <> tail
| "X" as j <> tail
| "Y" as j <> tail
| "Z" as j <> tail -> pop2(tail, j)
_ -> Nil
}
}
pub fn pop3(input, _) {
case input {
"a" <> tail
| "b" <> tail
| "c" <> tail
| "d" <> tail
| "e" <> tail
| "f" <> tail
| "g" <> tail
| "h" <> tail
| "i" <> tail
| "j" <> tail
| "k" <> tail
| "l" <> tail
| "m" <> tail
| "n" <> tail
| "o" <> tail
| "p" <> tail
| "q" <> tail
| "r" <> tail
| "s" <> tail
| "t" <> tail
| "u" <> tail
| "v" <> tail
| "w" <> tail
| "x" <> tail
| "y" <> tail
| "z" <> tail
| "A" <> tail
| "B" <> tail
| "C" <> tail
| "D" <> tail
| "E" <> tail
| "F" <> tail
| "G" <> tail
| "H" <> tail
| "I" <> tail
| "J" <> tail
| "K" <> tail
| "L" <> tail
| "M" <> tail
| "N" <> tail
| "O" <> tail
| "P" <> tail
| "Q" <> tail
| "R" <> tail
| "S" <> tail
| "T" <> tail
| "U" <> tail
| "V" <> tail
| "W" <> tail
| "X" <> tail
| "Y" <> tail
| "Z" <> tail -> pop3(tail, "")
_ -> Nil
}
}
pub fn pop4(input, _) {
case string.pop_grapheme(input) {
Ok(#(char, tail)) -> {
case char {
"a"
| "b"
| "c"
| "d"
| "e"
| "f"
| "g"
| "h"
| "i"
| "j"
| "k"
| "l"
| "m"
| "n"
| "o"
| "p"
| "q"
| "r"
| "s"
| "t"
| "u"
| "v"
| "w"
| "x"
| "y"
| "z"
| "A"
| "B"
| "C"
| "D"
| "E"
| "F"
| "G"
| "H"
| "I"
| "J"
| "K"
| "L"
| "M"
| "N"
| "O"
| "P"
| "Q"
| "R"
| "S"
| "T"
| "U"
| "V"
| "W"
| "X"
| "Y"
| "Z" -> pop4(tail, char)
_ -> Nil
}
}
Error(_) -> Nil
}
}
pub fn pop5(input, _) {
case string.pop_grapheme(input) {
Ok(#("a" as char, tail))
| Ok(#("b" as char, tail))
| Ok(#("c" as char, tail))
| Ok(#("d" as char, tail))
| Ok(#("e" as char, tail))
| Ok(#("f" as char, tail))
| Ok(#("g" as char, tail))
| Ok(#("h" as char, tail))
| Ok(#("i" as char, tail))
| Ok(#("j" as char, tail))
| Ok(#("k" as char, tail))
| Ok(#("l" as char, tail))
| Ok(#("m" as char, tail))
| Ok(#("n" as char, tail))
| Ok(#("o" as char, tail))
| Ok(#("p" as char, tail))
| Ok(#("q" as char, tail))
| Ok(#("r" as char, tail))
| Ok(#("s" as char, tail))
| Ok(#("t" as char, tail))
| Ok(#("u" as char, tail))
| Ok(#("v" as char, tail))
| Ok(#("w" as char, tail))
| Ok(#("x" as char, tail))
| Ok(#("y" as char, tail))
| Ok(#("z" as char, tail))
| Ok(#("A" as char, tail))
| Ok(#("B" as char, tail))
| Ok(#("C" as char, tail))
| Ok(#("D" as char, tail))
| Ok(#("E" as char, tail))
| Ok(#("F" as char, tail))
| Ok(#("G" as char, tail))
| Ok(#("H" as char, tail))
| Ok(#("I" as char, tail))
| Ok(#("J" as char, tail))
| Ok(#("K" as char, tail))
| Ok(#("L" as char, tail))
| Ok(#("M" as char, tail))
| Ok(#("N" as char, tail))
| Ok(#("O" as char, tail))
| Ok(#("P" as char, tail))
| Ok(#("Q" as char, tail))
| Ok(#("R" as char, tail))
| Ok(#("S" as char, tail))
| Ok(#("T" as char, tail))
| Ok(#("U" as char, tail))
| Ok(#("V" as char, tail))
| Ok(#("W" as char, tail))
| Ok(#("X" as char, tail))
| Ok(#("Y" as char, tail))
| Ok(#("Z" as char, tail)) -> pop4(tail, char)
_ -> Nil
}
}

16
test/benchmark.js Normal file
View File

@@ -0,0 +1,16 @@
import { run, bench, boxplot, summary } from "mitata";
import { parse } from "../build/dev/javascript/gluri/gluri.mjs";
import { parse as parse2 } from "../build/dev/javascript/gleam_stdlib/gleam/uri.mjs";
bench("parse", () =>
parse(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
bench("parse2", () =>
parse2(
"https://test_name:user%20$$$@github.com/gleam-lang/stdlib/issues/523#issuecomment-3288230480",
),
);
await run();