From 62943e11e7a08e27dc244fda5bf89a144d152cc6 Mon Sep 17 00:00:00 2001 From: Gareth Pendleton Date: Mon, 8 Sep 2025 13:25:03 +0100 Subject: [PATCH] feat: Added port/scheme normalisation --- src/internal/utils.gleam | 115 +++++++++++++++++++++++++++------------ src/uri.gleam | 4 +- test/uri_test.gleam | 57 +++++++++++++++++++ 3 files changed, 137 insertions(+), 39 deletions(-) diff --git a/src/internal/utils.gleam b/src/internal/utils.gleam index 96da75f..f1ec041 100644 --- a/src/internal/utils.gleam +++ b/src/internal/utils.gleam @@ -1,7 +1,7 @@ import gleam/bool import gleam/int import gleam/list -import gleam/option.{None, Some} +import gleam/option.{type Option, None, Some} import gleam/result import gleam/string import splitter.{type Splitter} @@ -15,9 +15,10 @@ pub const scheme_port = [ #("wss", 443), ] -pub fn get_port_for_scheme(scheme: String) -> Result(Int, Nil) { +pub fn get_port_for_scheme(scheme: String) -> Option(Int) { list.find(scheme_port, fn(sp) { sp.0 == scheme }) |> result.map(fn(sp) { sp.1 }) + |> option.from_result } pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) { @@ -86,7 +87,7 @@ pub fn normalise(uri: Uri) -> Uri { let percent_normaliser = normalise_percent(percent_splitter, _) let scheme = uri.scheme |> option.map(string.lowercase) let userinfo = uri.userinfo |> option.map(percent_normaliser) - let port = uri.port + let port = uri.port |> scheme_normalisation(scheme) let host = uri.host |> option.map(string.lowercase) |> option.map(percent_normaliser) let path = uri.path |> percent_normaliser |> remove_dot_segments @@ -96,6 +97,21 @@ pub fn normalise(uri: Uri) -> Uri { Uri(scheme, userinfo, host, port, path, query, fragment) } +fn scheme_normalisation( + port: Option(Int), + scheme: Option(String), +) -> Option(Int) { + case scheme, port { + Some(scheme), Some(_) -> { + case get_port_for_scheme(scheme) == port { + True -> None + False -> port + } + } + _, _ -> port + } +} + fn remove_dot_segments(path: String) -> String { do_remove_dot_segments(path, "") } @@ -258,6 +274,7 @@ fn do_percent_decode( _ -> { case int.bitwise_and(char, 224) { 192 -> { + "2bytes" |> echo use #(char, rest) <- result.try(decode_2byte_utf(hd1 <> hd2, rest)) do_percent_decode(splitter, rest, acc <> before <> char) @@ -295,6 +312,43 @@ fn do_percent_decode( } } +pub fn decode_2byte_utf( + first_byte: String, + rest: String, +) -> Result(#(String, String), Nil) { + use rest <- result.try(case rest { + "%" <> rest -> Ok(rest) + _ -> Error(Nil) + }) + use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil)) + + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) + + use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16)) + + let assert << + _:size(3), + x:size(3), + y1:size(2), + _:size(2), + y2:size(2), + z:size(4), + >> = <> + let assert <> = << + 0:size(5), + x:size(3), + y1:size(2), + y2:size(2), + z:size(4), + >> + + use res <- result.try(string.utf_codepoint(i)) + + Ok(#(string.from_utf_codepoints([res]), rest)) +} + pub fn decode_3byte_utf( first_byte: String, rest: String, @@ -304,12 +358,18 @@ pub fn decode_3byte_utf( _ -> Error(Nil) }) use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil)) + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) use #(hd5, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd5), return: Error(Nil)) + use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use bytes <- result.try(int.base_parse( @@ -340,39 +400,6 @@ pub fn decode_3byte_utf( Ok(#(string.from_utf_codepoints([res]), rest)) } -pub fn decode_2byte_utf( - first_byte: String, - rest: String, -) -> Result(#(String, String), Nil) { - use rest <- result.try(case rest { - "%" <> rest -> Ok(rest) - _ -> Error(Nil) - }) - use #(hd3, rest) <- result.try(parse_hex_digit(rest)) - use #(hd4, rest) <- result.try(parse_hex_digit(rest)) - - use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16)) - let assert << - _:size(3), - x:size(3), - y1:size(2), - _:size(2), - y2:size(2), - z:size(4), - >> = <> - let assert <> = << - 0:size(5), - x:size(3), - y1:size(2), - y2:size(2), - z:size(4), - >> - - use res <- result.try(string.utf_codepoint(i)) - - Ok(#(string.from_utf_codepoints([res]), rest)) -} - fn decode_4byte_utf( first_byte: String, rest: String, @@ -382,18 +409,27 @@ fn decode_4byte_utf( _ -> Error(Nil) }) use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd3), return: Error(Nil)) + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) use #(hd5, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd5), return: Error(Nil)) + use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) use #(hd7, rest) <- result.try(parse_hex_digit(rest)) + + use <- bool.guard(when: !within_byte_range(hd7), return: Error(Nil)) + use #(hd8, rest) <- result.try(parse_hex_digit(rest)) use bytes <- result.try(int.base_parse( @@ -432,6 +468,13 @@ fn decode_4byte_utf( Ok(#(string.from_utf_codepoints([res]), rest)) } +fn within_byte_range(str: String) { + case str { + "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" -> False + _ -> True + } +} + pub fn do_percent_encode(str: String) -> String { string.to_utf_codepoints(str) |> list.map(string.utf_codepoint_to_int) diff --git a/src/uri.gleam b/src/uri.gleam index 76eb580..a64b081 100644 --- a/src/uri.gleam +++ b/src/uri.gleam @@ -1,13 +1,11 @@ import gleam/bool import gleam/int import gleam/list -import gleam/option.{None, Some} -import gleam/result +import gleam/option.{Some} import gleam/string import gleam/uri import internal/parser import internal/utils -import splitter import types.{type Uri, Uri} pub fn parse(uri: String) -> Result(Uri, Nil) { diff --git a/test/uri_test.gleam b/test/uri_test.gleam index 4a0893c..8ac9206 100644 --- a/test/uri_test.gleam +++ b/test/uri_test.gleam @@ -1006,6 +1006,54 @@ pub fn normalise_tests() { |> uri.normalise |> should.equal(Uri(..empty_uri, path: "mid/6")) }), + it("normalise ports", fn() { + uri.parse("http://example.com:80/test") + |> should.be_ok + |> uri.normalise + |> should.equal( + Uri( + ..empty_uri, + scheme: Some("http"), + host: Some("example.com"), + path: "/test", + ), + ) + uri.parse("https://example.com:443/test") + |> should.be_ok + |> uri.normalise + |> should.equal( + Uri( + ..empty_uri, + scheme: Some("https"), + host: Some("example.com"), + path: "/test", + ), + ) + uri.parse("http://example.com:8080/test") + |> should.be_ok + |> uri.normalise + |> should.equal( + Uri( + ..empty_uri, + scheme: Some("http"), + host: Some("example.com"), + port: Some(8080), + path: "/test", + ), + ) + uri.parse("https://example.com:8043/test") + |> should.be_ok + |> uri.normalise + |> should.equal( + Uri( + ..empty_uri, + scheme: Some("https"), + host: Some("example.com"), + port: Some(8043), + path: "/test", + ), + ) + }), it("abnormal examples", fn() { let base = uri.parse("http://a/b/c/d;p?q") |> should.be_ok @@ -1202,6 +1250,15 @@ pub fn percent_encode_tests() { }) Nil }), + it("fail decoding", fn() { + uri.percent_decode("%C3test") |> should.be_error + uri.percent_decode("%C3%01test") |> should.be_error + uri.percent_decode("%E2%82%01test") |> should.be_error + uri.percent_decode("%E2%01%ACtest") |> should.be_error + uri.percent_decode("%F0%90%80%01test") |> should.be_error + uri.percent_decode("%F0%90%01%85test") |> should.be_error + uri.percent_decode("%F0%01%80%85test") |> should.be_error + }), ]) }