feat: More work on uri api

This commit is contained in:
2025-09-08 00:33:07 +01:00
parent e8f17bbba1
commit 7f19120f08
3 changed files with 676 additions and 250 deletions

505
src/internal/utils.gleam Normal file
View File

@@ -0,0 +1,505 @@
import gleam/bool
import gleam/int
import gleam/list
import gleam/option.{None, Some}
import gleam/result
import gleam/string
import internal/parser
import splitter.{type Splitter}
import types.{type Uri, Uri}
pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) {
use <- bool.guard(when: base.scheme == None, return: Error(Nil))
let uri = case relative.scheme {
Some(_) -> {
Uri(..relative, path: remove_dot_segments(relative.path))
}
None -> {
let scheme = base.scheme
case relative.host, relative.port, relative.userinfo {
Some(_), _, _ | _, Some(_), _ | _, _, Some(_) -> {
Uri(..relative, scheme:, path: remove_dot_segments(relative.path))
}
_, _, _ -> {
case relative.path {
"" -> {
let query = case relative.query {
Some(_) -> relative.query
_ -> base.query
}
Uri(..base, query:)
}
"/" <> _ -> {
Uri(
..base,
path: remove_dot_segments(relative.path),
query: relative.query,
)
}
_ -> {
let path = merge_paths(base, relative)
Uri(
..base,
path: remove_dot_segments(path),
query: relative.query,
)
}
}
}
}
}
}
Uri(..uri, fragment: relative.fragment) |> Ok
}
fn has_authority(uri: Uri) -> Bool {
case uri.host {
Some(_) -> True
_ -> False
}
}
fn merge_paths(base: Uri, relative: Uri) -> String {
case has_authority(base), base.path {
True, "" -> "/" <> relative.path
_, _ -> {
remove_segment(base.path) <> "/" <> relative.path
}
}
}
pub fn normalise(uri: Uri) -> Uri {
let percent_splitter = splitter.new(["%"])
let percent_normaliser = normalise_percent(percent_splitter, _)
let scheme = uri.scheme |> option.map(string.lowercase)
let userinfo = uri.userinfo |> option.map(percent_normaliser)
let port = uri.port
let host =
uri.host |> option.map(string.lowercase) |> option.map(percent_normaliser)
let path = uri.path |> percent_normaliser |> remove_dot_segments
let query = uri.query |> option.map(percent_normaliser)
let fragment = uri.fragment |> option.map(percent_normaliser)
Uri(scheme, userinfo, host, port, path, query, fragment)
}
fn remove_dot_segments(path: String) -> String {
do_remove_dot_segments(path, "")
}
fn do_remove_dot_segments(path: String, acc: String) -> String {
case path {
"../" <> rest | "./" <> rest -> do_remove_dot_segments(rest, acc)
"/./" <> rest -> do_remove_dot_segments("/" <> rest, acc)
"/." -> acc <> "/"
"/../" <> rest -> do_remove_dot_segments("/" <> rest, remove_segment(acc))
"/.." -> remove_segment(acc) <> "/"
"." | ".." | "" -> acc <> path
_ -> {
let assert Ok(#(char, rest)) = string.pop_grapheme(path)
do_remove_dot_segments(rest, acc <> char)
}
}
}
fn remove_segment(path: String) -> String {
path |> echo |> string.reverse |> do_remove_segment |> string.reverse
}
fn do_remove_segment(path: String) -> String {
case path {
"/" <> rest -> rest
"" -> ""
_ -> {
do_remove_segment(path |> string.drop_start(1))
}
}
}
fn normalise_percent(percent_splitter: Splitter, str: String) -> String {
do_normalise_percent(percent_splitter, str, "")
}
fn do_normalise_percent(
percent_splitter: Splitter,
str: String,
res: String,
) -> String {
let #(before, pc, after) = splitter.split(percent_splitter, str)
case pc {
"" -> res <> before
_ -> {
case after {
"" -> res <> before
_ -> {
let #(pc_val, rest) = case parser.parse_hex_digit(after) {
Ok(#(pc1, rest)) -> {
case parser.parse_hex_digit(rest) {
Ok(#(pc2, rest)) -> {
let hex = pc1 <> pc2
let v = unescape_percent(hex)
case v == hex {
True -> #("%" <> string.uppercase(v), rest)
False -> #(string.lowercase(v), rest)
}
}
Error(_) -> #("", after)
}
}
Error(_) -> #("", after)
}
do_normalise_percent(percent_splitter, rest, res <> before <> pc_val)
}
}
}
}
}
fn unescape_percent(str: String) -> String {
case int.base_parse(str, 16) {
Error(_) -> str
Ok(ascii) -> {
case is_unreserved_char(ascii) {
True -> {
let assert Ok(cpnt) = string.utf_codepoint(ascii)
string.from_utf_codepoints([cpnt])
}
False -> str
}
}
}
}
fn encoding_not_needed(i: Int) -> Bool {
// $-_.+!*'()
case i {
36 | 45 | 95 | 46 | 43 | 33 | 42 | 39 | 40 | 41 -> True
_ -> False
}
}
fn is_unreserved_char(i: Int) -> Bool {
case i {
45 | 46 | 95 | 126 -> True
_ if i >= 48 && i <= 57 -> True
_ if i >= 65 && i <= 90 -> True
_ if i >= 97 && i <= 122 -> True
_ -> False
}
}
pub fn percent_decode(str: String) -> Result(String, Nil) {
let percent_splitter = splitter.new(["%"])
do_percent_decode(percent_splitter, str, "")
}
fn do_percent_decode(
splitter: splitter.Splitter,
str: String,
acc: String,
) -> Result(String, Nil) {
case splitter.split(splitter, str) {
#(before, "", "") -> Ok(acc <> before)
#(before, "%", after) -> {
use #(hd1, rest) <- result.try(parser.parse_hex_digit(after))
use #(hd2, rest) <- result.try(parser.parse_hex_digit(rest))
use char <- result.try(int.base_parse(hd1 <> hd2, 16))
case int.bitwise_and(char, 128) {
0 -> {
use char <- result.try(string.utf_codepoint(char))
do_percent_decode(
splitter,
rest,
acc <> before <> string.from_utf_codepoints([char]),
)
}
_ -> {
case int.bitwise_and(char, 224) {
192 -> {
use #(char, rest) <- result.try(decode_2byte_utf(hd1 <> hd2, rest))
do_percent_decode(splitter, rest, acc <> before <> char)
}
_ -> {
case int.bitwise_and(char, 240) {
224 -> {
use #(char, rest) <- result.try(decode_3byte_utf(
hd1 <> hd2,
rest,
))
do_percent_decode(splitter, rest, acc <> before <> char)
}
_ -> {
case int.bitwise_and(char, 248) {
240 -> {
use #(char, rest) <- result.try(decode_4byte_utf(
hd1 <> hd2,
rest,
))
do_percent_decode(splitter, rest, acc <> before <> char)
}
_ -> Error(Nil)
}
}
}
}
}
}
}
}
_ -> Error(Nil)
}
}
pub fn decode_3byte_utf(
first_byte: String,
rest: String,
) -> Result(#(String, String), Nil) {
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest))
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd5, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd6, rest) <- result.try(parser.parse_hex_digit(rest))
use bytes <- result.try(int.base_parse(
first_byte <> hd3 <> hd4 <> hd5 <> hd6,
16,
))
let assert <<
_:size(4),
w:size(4),
_:size(2),
x:size(4),
y1:size(2),
_:size(2),
y2:size(2),
z:size(4),
>> = <<bytes:size(24)>>
let assert <<i:size(16)>> = <<
w:size(4),
x:size(4),
y1:size(2),
y2:size(2),
z:size(4),
>>
use res <- result.try(string.utf_codepoint(i))
Ok(#(string.from_utf_codepoints([res]), rest))
}
pub fn decode_2byte_utf(
first_byte: String,
rest: String,
) -> Result(#(String, String), Nil) {
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest))
use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16))
let assert <<
_:size(3),
x:size(3),
y1:size(2),
_:size(2),
y2:size(2),
z:size(4),
>> = <<bytes:size(16)>>
let assert <<i:size(16)>> = <<
0:size(5),
x:size(3),
y1:size(2),
y2:size(2),
z:size(4),
>>
use res <- result.try(string.utf_codepoint(i))
Ok(#(string.from_utf_codepoints([res]), rest))
}
fn decode_4byte_utf(
first_byte: String,
rest: String,
) -> Result(#(String, String), Nil) {
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest))
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd5, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd6, rest) <- result.try(parser.parse_hex_digit(rest))
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd7, rest) <- result.try(parser.parse_hex_digit(rest))
use #(hd8, rest) <- result.try(parser.parse_hex_digit(rest))
use bytes <- result.try(int.base_parse(
first_byte <> hd3 <> hd4 <> hd5 <> hd6 <> hd7 <> hd8,
16,
))
let assert <<
_:size(5),
u:size(1),
v1:size(2),
_:size(2),
v2:size(2),
w:size(4),
_:size(2),
x:size(4),
y1:size(2),
_:size(2),
y2:size(2),
z:size(4),
>> = <<bytes:size(32)>>
let assert <<i:size(24)>> = <<
0:size(3),
u:size(1),
v1:size(2),
v2:size(2),
w:size(4),
x:size(4),
y1:size(2),
y2:size(2),
z:size(4),
>>
use res <- result.try(string.utf_codepoint(i))
Ok(#(string.from_utf_codepoints([res]), rest))
}
pub fn do_percent_encode(str: String) -> String {
string.to_utf_codepoints(str)
|> list.map(string.utf_codepoint_to_int)
|> list.map(encode_codepoint)
|> string.concat
}
fn encode_codepoint(codepoint: Int) -> String {
case codepoint <= 127 {
True -> {
case is_unreserved_char(codepoint) || encoding_not_needed(codepoint) {
True -> {
let assert Ok(cpnt) = string.utf_codepoint(codepoint)
string.from_utf_codepoints([cpnt])
}
False -> {
"%" <> int.to_base16(codepoint)
}
}
}
False -> {
case codepoint <= 2047 {
True -> {
let assert <<_:size(5), x:size(3), y1:size(2), y2:size(2), z:size(4)>> = <<
codepoint:size(16),
>>
let res = <<
6:size(3),
x:size(3),
y1:size(2),
2:size(2),
y2:size(2),
z:size(4),
>>
let assert <<b1:size(8), b2:size(8)>> = res
"%" <> int.to_base16(b1) <> "%" <> int.to_base16(b2)
}
False -> {
case codepoint <= 65_535 {
True -> {
let assert <<
w:size(4),
x:size(4),
y1:size(2),
y2:size(2),
z:size(4),
>> = <<
codepoint:size(16),
>>
let res = <<
14:size(4),
w:size(4),
2:size(2),
x:size(4),
y1:size(2),
2:size(2),
y2:size(2),
z:size(4),
>>
let assert <<b1:size(8), b2:size(8), b3:size(8)>> = res
"%"
<> int.to_base16(b1)
<> "%"
<> int.to_base16(b2)
<> "%"
<> int.to_base16(b3)
}
False -> {
let assert <<
_:size(3),
u:size(1),
v1:size(2),
v2:size(2),
w:size(4),
x:size(4),
y1:size(2),
y2:size(2),
z:size(4),
>> = <<codepoint:size(24)>>
let res = <<
30:size(5),
u:size(1),
v1:size(2),
2:size(2),
v2:size(2),
w:size(4),
2:size(2),
x:size(4),
y1:size(2),
2:size(2),
y2:size(2),
z:size(4),
>>
let assert <<b1:size(8), b2:size(8), b3:size(8), b4:size(8)>> =
res
"%"
<> int.to_base16(b1)
<> "%"
<> int.to_base16(b2)
<> "%"
<> int.to_base16(b3)
<> "%"
<> int.to_base16(b4)
}
}
}
}
}
}
}

View File

@@ -1,10 +1,9 @@
import gleam/bool
import gleam/int
import gleam/option.{None, Some}
import gleam/string
import gleam/uri
import internal/parser
import splitter.{type Splitter}
import internal/utils
import types.{type Uri, Uri}
pub fn parse(uri: String) -> Result(Uri, Nil) {
@@ -12,92 +11,47 @@ pub fn parse(uri: String) -> Result(Uri, Nil) {
}
pub fn to_string(uri: Uri) -> String {
let parts = case uri.fragment {
Some(fragment) -> ["#", fragment]
None -> []
let uri_string = case uri.scheme {
Some(scheme) -> scheme <> ":"
_ -> ""
}
let parts = case uri.query {
Some(query) -> ["?", query, ..parts]
None -> parts
let uri_string = case uri.host {
Some(_) -> {
uri_string
<> "//"
<> case uri.userinfo {
Some(userinfo) -> userinfo <> "@"
_ -> ""
}
<> case uri.host {
Some(host) -> host
_ -> ""
}
<> case uri.port {
Some(port) -> ":" <> int.to_string(port)
_ -> ""
}
}
_ -> uri_string
}
let parts = [uri.path, ..parts]
let parts = case uri.host, string.starts_with(uri.path, "/") {
Some(host), False if host != "" -> ["/", ..parts]
_, _ -> parts
}
let parts = case uri.host, uri.port {
Some(_), Some(port) -> [":", int.to_string(port), ..parts]
_, _ -> parts
}
let parts = case uri.scheme, uri.userinfo, uri.host {
Some(s), Some(u), Some(h) -> [s, "://", u, "@", h, ..parts]
Some(s), None, Some(h) -> [s, "://", h, ..parts]
Some(s), Some(_), None | Some(s), None, None -> [s, ":", ..parts]
None, None, Some(h) -> ["//", h, ..parts]
_, _, _ -> parts
}
string.concat(parts)
let uri_string = uri_string <> uri.path
let uri_string =
uri_string
<> case uri.query {
Some(query) -> "?" <> query
_ -> ""
}
let uri_string =
uri_string
<> case uri.fragment {
Some(fragment) -> "#" <> fragment
_ -> ""
}
uri_string
}
pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) {
use <- bool.guard(when: base.scheme == None, return: Error(Nil))
let uri = case relative.scheme {
Some(_) -> {
Uri(..relative, path: remove_dot_segments(relative.path))
}
None -> {
let scheme = base.scheme
case relative.host, relative.port, relative.userinfo {
Some(_), _, _ | _, Some(_), _ | _, _, Some(_) -> {
Uri(..relative, scheme:, path: remove_dot_segments(relative.path))
}
_, _, _ -> {
case relative.path {
"" -> {
let query = case relative.query {
Some(_) -> relative.query
_ -> base.query
}
Uri(..base, query:)
}
"/" <> _ -> {
Uri(
..base,
path: remove_dot_segments(relative.path),
query: relative.query,
)
}
_ -> {
let path = merge_paths(base, relative)
Uri(
..base,
path: remove_dot_segments(path),
query: relative.query,
)
}
}
}
}
}
}
Uri(..uri, fragment: relative.fragment) |> Ok
}
fn has_authority(uri: Uri) -> Bool {
case uri.host, uri.userinfo, uri.port {
Some(_), _, _ | _, Some(_), _ | _, _, Some(_) -> True
_, _, _ -> False
}
}
fn merge_paths(base: Uri, relative: Uri) -> String {
case has_authority(base), base.path {
True, "" -> "/" <> relative.path
_, _ -> {
remove_segment(base.path) <> "/" <> relative.path
}
}
utils.merge(base, relative)
}
pub fn normalize(uri: Uri) -> Uri {
@@ -105,170 +59,7 @@ pub fn normalize(uri: Uri) -> Uri {
}
pub fn normalise(uri: Uri) -> Uri {
let percent_splitter = splitter.new(["%"])
let percent_normaliser = normalise_percent(percent_splitter, _)
let scheme = uri.scheme |> option.map(string.lowercase)
let userinfo = uri.userinfo |> option.map(percent_normaliser)
let port = uri.port
let host =
uri.host |> option.map(string.lowercase) |> option.map(percent_normaliser)
let path = uri.path |> percent_normaliser |> remove_dot_segments
let query = uri.query |> option.map(percent_normaliser)
let fragment = uri.fragment |> option.map(percent_normaliser)
Uri(scheme, userinfo, host, port, path, query, fragment)
}
fn remove_dot_segments(path: String) -> String {
do_remove_dot_segments(path, "")
}
fn do_remove_dot_segments(path: String, acc: String) -> String {
case path {
"../" <> rest | "./" <> rest -> do_remove_dot_segments(rest, acc)
"/./" <> rest -> do_remove_dot_segments("/" <> rest, acc)
"/." -> acc <> "/"
"/../" <> rest -> do_remove_dot_segments("/" <> rest, remove_segment(acc))
"/.." -> remove_segment(acc) <> "/"
"." | ".." | "" -> acc <> path
_ -> {
let assert Ok(#(char, rest)) = string.pop_grapheme(path)
do_remove_dot_segments(rest, acc <> char)
}
}
}
fn remove_segment(path: String) -> String {
path |> echo |> string.reverse |> do_remove_segment |> string.reverse
}
fn do_remove_segment(path: String) -> String {
case path {
"/" <> rest -> rest
"" -> ""
_ -> {
do_remove_segment(path |> string.drop_start(1))
}
}
}
fn normalise_percent(percent_splitter: Splitter, str: String) -> String {
do_normalise_percent(percent_splitter, str, "")
}
fn do_normalise_percent(
percent_splitter: Splitter,
str: String,
res: String,
) -> String {
let #(before, pc, after) = splitter.split(percent_splitter, str)
case pc {
"" -> res <> before
_ -> {
case after {
"" -> res <> before
_ -> {
let #(pc_val, rest) = case parser.parse_hex_digit(after) {
Ok(#(pc1, rest)) -> {
case parser.parse_hex_digit(rest) {
Ok(#(pc2, rest)) -> {
let hex = pc1 <> pc2
let v = unescape_percent(hex)
case v == hex {
True -> #("%" <> string.uppercase(v), rest)
False -> #(v, rest)
}
}
Error(_) -> #("", after)
}
}
Error(_) -> #("", after)
}
do_normalise_percent(percent_splitter, rest, res <> before <> pc_val)
}
}
}
}
}
fn unescape_percent(str: String) -> String {
case int.base_parse(str, 16) {
Error(_) -> str
Ok(ascii) -> {
case ascii {
45
| 46
| 95
| 126
| 48
| 49
| 50
| 51
| 52
| 53
| 54
| 55
| 56
| 57
| 65
| 66
| 67
| 68
| 69
| 70
| 71
| 72
| 73
| 74
| 75
| 76
| 77
| 78
| 79
| 80
| 81
| 82
| 83
| 84
| 85
| 86
| 87
| 88
| 89
| 90
| 97
| 98
| 99
| 100
| 101
| 102
| 103
| 104
| 105
| 106
| 107
| 108
| 109
| 110
| 111
| 112
| 113
| 114
| 115
| 116
| 117
| 118
| 119
| 120
| 121
| 122 -> {
let assert Ok(cpnt) = string.utf_codepoint(ascii)
string.from_utf_codepoints([cpnt])
}
_ -> str
}
}
}
utils.normalise(uri)
}
pub fn are_equivalent(uri1: Uri, uri2: Uri) {
@@ -277,9 +68,7 @@ pub fn are_equivalent(uri1: Uri, uri2: Uri) {
let uri1 = normalise(uri1)
let uri2 = normalise(uri2)
use <- bool.guard(when: uri1 == uri2, return: True)
False
uri1 == uri2
}
pub fn to_uri(uri: Uri) -> uri.Uri {
@@ -305,3 +94,23 @@ pub fn from_uri(uri: uri.Uri) -> Uri {
uri.fragment,
)
}
pub fn percent_decode(value: String) -> Result(String, Nil) {
utils.percent_decode(value)
}
pub fn percent_encode(value: String) -> String {
utils.do_percent_encode(value)
}
pub fn query_to_string(query: List(#(String, String))) -> String {
todo
}
pub fn parse_query(query: String) -> Result(List(#(String, String)), Nil) {
todo
}
pub fn origin(uri: Uri) -> Result(String, Nil) {
todo
}

View File

@@ -1,3 +1,4 @@
import gleam/list
import gleam/option.{None, Some}
import gleeunit/should
import startest.{describe, it}
@@ -1094,6 +1095,117 @@ pub fn normalise_tests() {
}),
])
}
pub fn to_string_tests() {
describe("to_string test", [
it("simple test", fn() {
let test_uri =
types.Uri(
Some("https"),
Some("weebl:bob"),
Some("example.com"),
Some(1234),
"/path",
Some("query=true"),
Some("fragment"),
)
uri.to_string(test_uri)
|> should.equal(
"https://weebl:bob@example.com:1234/path?query=true#fragment",
)
}),
it("path only", fn() {
types.Uri(..types.empty_uri, path: "/")
|> uri.to_string
|> should.equal("/")
types.Uri(..types.empty_uri, path: "/blah")
|> uri.to_string
|> should.equal("/blah")
types.Uri(..types.empty_uri, userinfo: Some("user"), path: "/blah")
|> uri.to_string
|> should.equal("/blah")
types.Uri(..types.empty_uri, path: "")
|> uri.to_string
|> should.equal("")
}),
])
}
pub fn equivalence_tests() {
describe("equivalence tests", [
it("equal", fn() {
let uri1 = uri.parse("http://example.com") |> should.be_ok
let uri2 = uri.parse("HTTP://EXAMPLE.COM") |> should.be_ok
uri.are_equivalent(uri1, uri2) |> should.be_true
let uri1 = uri.parse("http://example.com") |> should.be_ok
let uri2 = uri.parse("HTTP://EX%41MPLE.COM") |> should.be_ok |> echo
uri.are_equivalent(uri1, uri2) |> should.be_true
let uri1 = uri.parse("http://example.com/a/b/c") |> should.be_ok
let uri2 =
uri.parse("HTTP://EXaMPLE.COM/a/d/../b/e/../c") |> should.be_ok |> echo
uri.are_equivalent(uri1, uri2) |> should.be_true
let uri1 = uri.parse("http://example.com/a/b/c") |> should.be_ok
let uri2 =
uri.parse("HTTP://EXaMPLE.COM/a/../../../../a/b/e/../c")
|> should.be_ok
|> echo
uri.are_equivalent(uri1, uri2) |> should.be_true
}),
])
}
pub fn percent_encode_tests() {
describe("percent encoding", [
it("encoding", fn() {
percent_codec_fixtures
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_encode(a)
|> should.equal(b)
})
Nil
}),
it("decoding", fn() {
percent_codec_fixtures
|> list.map(fn(t) {
let #(a, b) = t
uri.percent_decode(b)
|> should.equal(Ok(a))
})
Nil
}),
])
}
const percent_codec_fixtures = [
#(" ", "%20"),
#(",", "%2C"),
#(";", "%3B"),
#(":", "%3A"),
#("!", "!"),
#("?", "%3F"),
#("'", "'"),
#("(", "("),
#(")", ")"),
#("[", "%5B"),
#("@", "%40"),
#("/", "%2F"),
#("\\", "%5C"),
#("&", "%26"),
#("#", "%23"),
#("=", "%3D"),
#("~", "~"),
#("ñ", "%C3%B1"),
#("-", "-"),
#("_", "_"),
#(".", "."),
#("*", "*"),
#("+", "+"),
#("100% great+fun", "100%25%20great+fun"),
]
// gleeunit test functions end in `_test`
// pub fn uri_test() {
// match("uri:")