feat: Added port/scheme normalisation

This commit is contained in:
2025-09-08 13:25:03 +01:00
parent 381b3a9430
commit 62943e11e7
3 changed files with 137 additions and 39 deletions

View File

@@ -1,7 +1,7 @@
import gleam/bool import gleam/bool
import gleam/int import gleam/int
import gleam/list import gleam/list
import gleam/option.{None, Some} import gleam/option.{type Option, None, Some}
import gleam/result import gleam/result
import gleam/string import gleam/string
import splitter.{type Splitter} import splitter.{type Splitter}
@@ -15,9 +15,10 @@ pub const scheme_port = [
#("wss", 443), #("wss", 443),
] ]
pub fn get_port_for_scheme(scheme: String) -> Result(Int, Nil) { pub fn get_port_for_scheme(scheme: String) -> Option(Int) {
list.find(scheme_port, fn(sp) { sp.0 == scheme }) list.find(scheme_port, fn(sp) { sp.0 == scheme })
|> result.map(fn(sp) { sp.1 }) |> result.map(fn(sp) { sp.1 })
|> option.from_result
} }
pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) { pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) {
@@ -86,7 +87,7 @@ pub fn normalise(uri: Uri) -> Uri {
let percent_normaliser = normalise_percent(percent_splitter, _) let percent_normaliser = normalise_percent(percent_splitter, _)
let scheme = uri.scheme |> option.map(string.lowercase) let scheme = uri.scheme |> option.map(string.lowercase)
let userinfo = uri.userinfo |> option.map(percent_normaliser) let userinfo = uri.userinfo |> option.map(percent_normaliser)
let port = uri.port let port = uri.port |> scheme_normalisation(scheme)
let host = let host =
uri.host |> option.map(string.lowercase) |> option.map(percent_normaliser) uri.host |> option.map(string.lowercase) |> option.map(percent_normaliser)
let path = uri.path |> percent_normaliser |> remove_dot_segments let path = uri.path |> percent_normaliser |> remove_dot_segments
@@ -96,6 +97,21 @@ pub fn normalise(uri: Uri) -> Uri {
Uri(scheme, userinfo, host, port, path, query, fragment) Uri(scheme, userinfo, host, port, path, query, fragment)
} }
fn scheme_normalisation(
port: Option(Int),
scheme: Option(String),
) -> Option(Int) {
case scheme, port {
Some(scheme), Some(_) -> {
case get_port_for_scheme(scheme) == port {
True -> None
False -> port
}
}
_, _ -> port
}
}
fn remove_dot_segments(path: String) -> String { fn remove_dot_segments(path: String) -> String {
do_remove_dot_segments(path, "") do_remove_dot_segments(path, "")
} }
@@ -258,6 +274,7 @@ fn do_percent_decode(
_ -> { _ -> {
case int.bitwise_and(char, 224) { case int.bitwise_and(char, 224) {
192 -> { 192 -> {
"2bytes" |> echo
use #(char, rest) <- result.try(decode_2byte_utf(hd1 <> hd2, rest)) use #(char, rest) <- result.try(decode_2byte_utf(hd1 <> hd2, rest))
do_percent_decode(splitter, rest, acc <> before <> char) do_percent_decode(splitter, rest, acc <> before <> char)
@@ -295,6 +312,43 @@ fn do_percent_decode(
} }
} }
pub fn decode_2byte_utf(
first_byte: String,
rest: String,
) -> Result(#(String, String), Nil) {
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd3, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil))
use #(hd4, rest) <- result.try(parse_hex_digit(rest))
use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16))
let assert <<
_:size(3),
x:size(3),
y1:size(2),
_:size(2),
y2:size(2),
z:size(4),
>> = <<bytes:size(16)>>
let assert <<i:size(16)>> = <<
0:size(5),
x:size(3),
y1:size(2),
y2:size(2),
z:size(4),
>>
use res <- result.try(string.utf_codepoint(i))
Ok(#(string.from_utf_codepoints([res]), rest))
}
pub fn decode_3byte_utf( pub fn decode_3byte_utf(
first_byte: String, first_byte: String,
rest: String, rest: String,
@@ -304,12 +358,18 @@ pub fn decode_3byte_utf(
_ -> Error(Nil) _ -> Error(Nil)
}) })
use #(hd3, rest) <- result.try(parse_hex_digit(rest)) use #(hd3, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil))
use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use #(hd4, rest) <- result.try(parse_hex_digit(rest))
use rest <- result.try(case rest { use rest <- result.try(case rest {
"%" <> rest -> Ok(rest) "%" <> rest -> Ok(rest)
_ -> Error(Nil) _ -> Error(Nil)
}) })
use #(hd5, rest) <- result.try(parse_hex_digit(rest)) use #(hd5, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd5), return: Error(Nil))
use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use #(hd6, rest) <- result.try(parse_hex_digit(rest))
use bytes <- result.try(int.base_parse( use bytes <- result.try(int.base_parse(
@@ -340,39 +400,6 @@ pub fn decode_3byte_utf(
Ok(#(string.from_utf_codepoints([res]), rest)) Ok(#(string.from_utf_codepoints([res]), rest))
} }
pub fn decode_2byte_utf(
first_byte: String,
rest: String,
) -> Result(#(String, String), Nil) {
use rest <- result.try(case rest {
"%" <> rest -> Ok(rest)
_ -> Error(Nil)
})
use #(hd3, rest) <- result.try(parse_hex_digit(rest))
use #(hd4, rest) <- result.try(parse_hex_digit(rest))
use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16))
let assert <<
_:size(3),
x:size(3),
y1:size(2),
_:size(2),
y2:size(2),
z:size(4),
>> = <<bytes:size(16)>>
let assert <<i:size(16)>> = <<
0:size(5),
x:size(3),
y1:size(2),
y2:size(2),
z:size(4),
>>
use res <- result.try(string.utf_codepoint(i))
Ok(#(string.from_utf_codepoints([res]), rest))
}
fn decode_4byte_utf( fn decode_4byte_utf(
first_byte: String, first_byte: String,
rest: String, rest: String,
@@ -382,18 +409,27 @@ fn decode_4byte_utf(
_ -> Error(Nil) _ -> Error(Nil)
}) })
use #(hd3, rest) <- result.try(parse_hex_digit(rest)) use #(hd3, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil))
use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use #(hd4, rest) <- result.try(parse_hex_digit(rest))
use rest <- result.try(case rest { use rest <- result.try(case rest {
"%" <> rest -> Ok(rest) "%" <> rest -> Ok(rest)
_ -> Error(Nil) _ -> Error(Nil)
}) })
use #(hd5, rest) <- result.try(parse_hex_digit(rest)) use #(hd5, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd5), return: Error(Nil))
use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use #(hd6, rest) <- result.try(parse_hex_digit(rest))
use rest <- result.try(case rest { use rest <- result.try(case rest {
"%" <> rest -> Ok(rest) "%" <> rest -> Ok(rest)
_ -> Error(Nil) _ -> Error(Nil)
}) })
use #(hd7, rest) <- result.try(parse_hex_digit(rest)) use #(hd7, rest) <- result.try(parse_hex_digit(rest))
use <- bool.guard(when: !within_byte_range(hd7), return: Error(Nil))
use #(hd8, rest) <- result.try(parse_hex_digit(rest)) use #(hd8, rest) <- result.try(parse_hex_digit(rest))
use bytes <- result.try(int.base_parse( use bytes <- result.try(int.base_parse(
@@ -432,6 +468,13 @@ fn decode_4byte_utf(
Ok(#(string.from_utf_codepoints([res]), rest)) Ok(#(string.from_utf_codepoints([res]), rest))
} }
fn within_byte_range(str: String) {
case str {
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" -> False
_ -> True
}
}
pub fn do_percent_encode(str: String) -> String { pub fn do_percent_encode(str: String) -> String {
string.to_utf_codepoints(str) string.to_utf_codepoints(str)
|> list.map(string.utf_codepoint_to_int) |> list.map(string.utf_codepoint_to_int)

View File

@@ -1,13 +1,11 @@
import gleam/bool import gleam/bool
import gleam/int import gleam/int
import gleam/list import gleam/list
import gleam/option.{None, Some} import gleam/option.{Some}
import gleam/result
import gleam/string import gleam/string
import gleam/uri import gleam/uri
import internal/parser import internal/parser
import internal/utils import internal/utils
import splitter
import types.{type Uri, Uri} import types.{type Uri, Uri}
pub fn parse(uri: String) -> Result(Uri, Nil) { pub fn parse(uri: String) -> Result(Uri, Nil) {

View File

@@ -1006,6 +1006,54 @@ pub fn normalise_tests() {
|> uri.normalise |> uri.normalise
|> should.equal(Uri(..empty_uri, path: "mid/6")) |> should.equal(Uri(..empty_uri, path: "mid/6"))
}), }),
it("normalise ports", fn() {
uri.parse("http://example.com:80/test")
|> should.be_ok
|> uri.normalise
|> should.equal(
Uri(
..empty_uri,
scheme: Some("http"),
host: Some("example.com"),
path: "/test",
),
)
uri.parse("https://example.com:443/test")
|> should.be_ok
|> uri.normalise
|> should.equal(
Uri(
..empty_uri,
scheme: Some("https"),
host: Some("example.com"),
path: "/test",
),
)
uri.parse("http://example.com:8080/test")
|> should.be_ok
|> uri.normalise
|> should.equal(
Uri(
..empty_uri,
scheme: Some("http"),
host: Some("example.com"),
port: Some(8080),
path: "/test",
),
)
uri.parse("https://example.com:8043/test")
|> should.be_ok
|> uri.normalise
|> should.equal(
Uri(
..empty_uri,
scheme: Some("https"),
host: Some("example.com"),
port: Some(8043),
path: "/test",
),
)
}),
it("abnormal examples", fn() { it("abnormal examples", fn() {
let base = uri.parse("http://a/b/c/d;p?q") |> should.be_ok let base = uri.parse("http://a/b/c/d;p?q") |> should.be_ok
@@ -1202,6 +1250,15 @@ pub fn percent_encode_tests() {
}) })
Nil Nil
}), }),
it("fail decoding", fn() {
uri.percent_decode("%C3test") |> should.be_error
uri.percent_decode("%C3%01test") |> should.be_error
uri.percent_decode("%E2%82%01test") |> should.be_error
uri.percent_decode("%E2%01%ACtest") |> should.be_error
uri.percent_decode("%F0%90%80%01test") |> should.be_error
uri.percent_decode("%F0%90%01%85test") |> should.be_error
uri.percent_decode("%F0%01%80%85test") |> should.be_error
}),
]) ])
} }