diff --git a/src/internal/parser.gleam b/src/internal/parser.gleam index 81db22b..718e52c 100644 --- a/src/internal/parser.gleam +++ b/src/internal/parser.gleam @@ -4,6 +4,8 @@ import gleam/list.{Continue, Stop} import gleam/option.{None, Some} import gleam/result import gleam/string +import internal/utils +import splitter import types.{type Uri, Uri, empty_uri} @@ -511,8 +513,8 @@ fn do_parse_reg_name(str: String, reg_name: String) { fn parse_pct_encoded(str: String) { case str { "%" <> rest -> { - use #(hex1, rest) <- result.try(parse_hex_digit(rest)) - use #(hex2, rest) <- result.try(parse_hex_digit(rest)) + use #(hex1, rest) <- result.try(utils.parse_hex_digit(rest)) + use #(hex2, rest) <- result.try(utils.parse_hex_digit(rest)) Ok(#("%" <> hex1 <> hex2, rest)) } @@ -689,35 +691,7 @@ fn parse_min_max(str, min, max, parse_fn) { } fn parse_hex_digits(str, min, max) { - parse_min_max(str, min, max, parse_hex_digit) -} - -pub fn parse_hex_digit(str) { - case str { - "0" as l <> rest - | "1" as l <> rest - | "2" as l <> rest - | "3" as l <> rest - | "4" as l <> rest - | "5" as l <> rest - | "6" as l <> rest - | "7" as l <> rest - | "8" as l <> rest - | "9" as l <> rest - | "a" as l <> rest - | "b" as l <> rest - | "c" as l <> rest - | "d" as l <> rest - | "e" as l <> rest - | "f" as l <> rest - | "A" as l <> rest - | "B" as l <> rest - | "C" as l <> rest - | "D" as l <> rest - | "E" as l <> rest - | "F" as l <> rest -> Ok(#(l, rest)) - _ -> Error(Nil) - } + parse_min_max(str, min, max, utils.parse_hex_digit) } fn parse_digit(str: String) -> Result(#(String, String), Nil) { @@ -848,3 +822,41 @@ fn combine_uris(uris: List(Uri)) -> Uri { } }) } + +pub fn parse_query_parts(query: String) -> Result(List(#(String, String)), Nil) { + let splitter = splitter.new(["&"]) + + do_parse_query_parts(splitter, query, []) +} + +fn do_parse_query_parts( + splitter: splitter.Splitter, + query: String, + acc: List(#(String, String)), +) -> Result(List(#(String, String)), Nil) { + case splitter.split(splitter, query) { + #("", _, "") -> Ok(list.reverse(acc)) + #("", _, rest) -> do_parse_query_parts(splitter, rest, acc) + #(pair, _, rest) -> { + use pair <- result.try(do_parse_query_pair(pair)) + + let acc = [pair, ..acc] + + case rest { + "" -> Ok(list.reverse(acc)) + _ -> do_parse_query_parts(splitter, rest, acc) + } + } + } +} + +fn do_parse_query_pair(pair: String) -> Result(#(String, String), Nil) { + let #(key, val) = case string.split_once(pair, "=") { + Error(_) -> #(pair, "") + Ok(p) -> p + } + use key <- result.try(utils.percent_decode(string.replace(key, "+", " "))) + use val <- result.try(utils.percent_decode(string.replace(val, "+", " "))) + + Ok(#(key, val)) +} diff --git a/src/internal/utils.gleam b/src/internal/utils.gleam index 68bd5e1..96da75f 100644 --- a/src/internal/utils.gleam +++ b/src/internal/utils.gleam @@ -4,10 +4,22 @@ import gleam/list import gleam/option.{None, Some} import gleam/result import gleam/string -import internal/parser import splitter.{type Splitter} import types.{type Uri, Uri} +pub const scheme_port = [ + #("http", 80), + #("https", 443), + #("ftp", 21), + #("ws", 80), + #("wss", 443), +] + +pub fn get_port_for_scheme(scheme: String) -> Result(Int, Nil) { + list.find(scheme_port, fn(sp) { sp.0 == scheme }) + |> result.map(fn(sp) { sp.1 }) +} + pub fn merge(base: Uri, relative: Uri) -> Result(Uri, Nil) { use <- bool.guard(when: base.scheme == None, return: Error(Nil)) let uri = case relative.scheme { @@ -133,9 +145,9 @@ fn do_normalise_percent( case after { "" -> res <> before _ -> { - let #(pc_val, rest) = case parser.parse_hex_digit(after) { + let #(pc_val, rest) = case parse_hex_digit(after) { Ok(#(pc1, rest)) -> { - case parser.parse_hex_digit(rest) { + case parse_hex_digit(rest) { Ok(#(pc2, rest)) -> { let hex = pc1 <> pc2 let v = unescape_percent(hex) @@ -171,6 +183,34 @@ fn unescape_percent(str: String) -> String { } } +pub fn parse_hex_digit(str) { + case str { + "0" as l <> rest + | "1" as l <> rest + | "2" as l <> rest + | "3" as l <> rest + | "4" as l <> rest + | "5" as l <> rest + | "6" as l <> rest + | "7" as l <> rest + | "8" as l <> rest + | "9" as l <> rest + | "a" as l <> rest + | "b" as l <> rest + | "c" as l <> rest + | "d" as l <> rest + | "e" as l <> rest + | "f" as l <> rest + | "A" as l <> rest + | "B" as l <> rest + | "C" as l <> rest + | "D" as l <> rest + | "E" as l <> rest + | "F" as l <> rest -> Ok(#(l, rest)) + _ -> Error(Nil) + } +} + fn encoding_not_needed(i: Int) -> Bool { // $-_.+!*'() case i { @@ -202,8 +242,8 @@ fn do_percent_decode( case splitter.split(splitter, str) { #(before, "", "") -> Ok(acc <> before) #(before, "%", after) -> { - use #(hd1, rest) <- result.try(parser.parse_hex_digit(after)) - use #(hd2, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd1, rest) <- result.try(parse_hex_digit(after)) + use #(hd2, rest) <- result.try(parse_hex_digit(rest)) use char <- result.try(int.base_parse(hd1 <> hd2, 16)) case int.bitwise_and(char, 128) { @@ -263,14 +303,14 @@ pub fn decode_3byte_utf( "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd5, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd6, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd5, rest) <- result.try(parse_hex_digit(rest)) + use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use bytes <- result.try(int.base_parse( first_byte <> hd3 <> hd4 <> hd5 <> hd6, @@ -308,8 +348,8 @@ pub fn decode_2byte_utf( "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use bytes <- result.try(int.base_parse(first_byte <> hd3 <> hd4, 16)) let assert << @@ -341,20 +381,20 @@ fn decode_4byte_utf( "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd3, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd4, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd3, rest) <- result.try(parse_hex_digit(rest)) + use #(hd4, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd5, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd6, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd5, rest) <- result.try(parse_hex_digit(rest)) + use #(hd6, rest) <- result.try(parse_hex_digit(rest)) use rest <- result.try(case rest { "%" <> rest -> Ok(rest) _ -> Error(Nil) }) - use #(hd7, rest) <- result.try(parser.parse_hex_digit(rest)) - use #(hd8, rest) <- result.try(parser.parse_hex_digit(rest)) + use #(hd7, rest) <- result.try(parse_hex_digit(rest)) + use #(hd8, rest) <- result.try(parse_hex_digit(rest)) use bytes <- result.try(int.base_parse( first_byte <> hd3 <> hd4 <> hd5 <> hd6 <> hd7 <> hd8, diff --git a/src/uri.gleam b/src/uri.gleam index c934f3e..76eb580 100644 --- a/src/uri.gleam +++ b/src/uri.gleam @@ -1,9 +1,13 @@ import gleam/bool import gleam/int +import gleam/list import gleam/option.{None, Some} +import gleam/result +import gleam/string import gleam/uri import internal/parser import internal/utils +import splitter import types.{type Uri, Uri} pub fn parse(uri: String) -> Result(Uri, Nil) { @@ -104,11 +108,16 @@ pub fn percent_encode(value: String) -> String { } pub fn query_to_string(query: List(#(String, String))) -> String { - todo + list.map(query, fn(q) { + [utils.do_percent_encode(q.0), "=", utils.do_percent_encode(q.1)] + }) + |> list.intersperse(["&"]) + |> list.flatten + |> string.concat } pub fn parse_query(query: String) -> Result(List(#(String, String)), Nil) { - todo + parser.parse_query_parts(query) } pub fn origin(uri: Uri) -> Result(String, Nil) { diff --git a/test/uri_test.gleam b/test/uri_test.gleam index d44eafd..4a0893c 100644 --- a/test/uri_test.gleam +++ b/test/uri_test.gleam @@ -341,7 +341,7 @@ pub fn parse_path_tests() { ]) } -pub fn parse_query_tests() { +pub fn parse_query_part_tests() { describe("query parsing", [ it("simple parse", fn() { uri.parse("foo:?name=ferret") @@ -553,7 +553,7 @@ pub fn parse_fragment_tests() { ]) } -fn parse_special_tests() { +pub fn parse_special_tests() { describe("special parsing", [ it("special 1", fn() { uri.parse("//?") @@ -731,9 +731,9 @@ fn parse_special_tests() { uri.parse("#") |> should.equal(Ok(Uri(..types.empty_uri, path: "", fragment: Some("")))) uri.parse("##") - |> should.equal(Ok(Uri(..types.empty_uri, path: "", fragment: Some("#")))) + |> should.be_error uri.parse("###") - |> should.equal(Ok(Uri(..types.empty_uri, path: "", fragment: Some("##")))) + |> should.be_error }), it("special 2", fn() { uri.parse("a://:1/") @@ -781,9 +781,7 @@ fn parse_special_tests() { Uri(..types.empty_uri, host: Some("localhost"), path: "/"), )) uri.parse("//:") - |> should.equal(Ok( - Uri(..types.empty_uri, host: Some("localhost"), path: ""), - )) + |> should.equal(Ok(Uri(..types.empty_uri, host: Some(""), path: ""))) }), ]) } @@ -1157,29 +1155,6 @@ pub fn equivalence_tests() { ]) } -pub fn percent_encode_tests() { - describe("percent encoding", [ - it("encoding", fn() { - percent_codec_fixtures - |> list.map(fn(t) { - let #(a, b) = t - uri.percent_encode(a) - |> should.equal(b) - }) - Nil - }), - it("decoding", fn() { - percent_codec_fixtures - |> list.map(fn(t) { - let #(a, b) = t - uri.percent_decode(b) - |> should.equal(Ok(a)) - }) - Nil - }), - ]) -} - const percent_codec_fixtures = [ #(" ", "%20"), #(",", "%2C"), @@ -1206,6 +1181,85 @@ const percent_codec_fixtures = [ #("+", "+"), #("100% great+fun", "100%25%20great+fun"), ] + +pub fn percent_encode_tests() { + describe("percent encoding", [ + it("encoding", fn() { + percent_codec_fixtures + |> list.map(fn(t) { + let #(a, b) = t + uri.percent_encode(a) + |> should.equal(b) + }) + Nil + }), + it("decoding", fn() { + percent_codec_fixtures + |> list.map(fn(t) { + let #(a, b) = t + uri.percent_decode(b) + |> should.equal(Ok(a)) + }) + Nil + }), + ]) +} + +pub fn parse_query_tests() { + describe("parse_query", [ + it("basic parse", fn() { + uri.parse_query("el1=123&el2=321") + |> should.be_ok + |> should.equal([#("el1", "123"), #("el2", "321")]) + uri.parse_query("el%201=123&el+2=321") + |> should.be_ok + |> should.equal([#("el 1", "123"), #("el 2", "321")]) + uri.parse_query("el%E2%82%AC1=12%CE%A33&el%F0%90%80%852=321") + |> should.be_ok + |> should.equal([#("el€1", "12Σ3"), #("el𐀅2", "321")]) + }), + it("empty parts", fn() { + uri.parse_query("el1=123&&el2=321") + |> should.be_ok + |> should.equal([#("el1", "123"), #("el2", "321")]) + uri.parse_query("el1=&el2=") + |> should.be_ok + |> should.equal([#("el1", ""), #("el2", "")]) + uri.parse_query("") + |> should.be_ok + |> should.equal([]) + uri.parse_query("=123&el2=321") + |> should.be_ok + |> should.equal([#("", "123"), #("el2", "321")]) + uri.parse_query("=&el2=321") + |> should.be_ok + |> should.equal([#("", ""), #("el2", "321")]) + }), + ]) +} + +pub fn query_to_string_tests() { + describe("query_to_string", [ + it("basic query", fn() { + uri.query_to_string([#("el1", "123"), #("el2", "321")]) + |> should.equal("el1=123&el2=321") + uri.query_to_string([#("el 1", "123"), #("el 2", "321")]) + |> should.equal("el%201=123&el%202=321") + uri.query_to_string([#("el€1", "12Σ3"), #("el𐀅2", "321")]) + |> should.equal("el%E2%82%AC1=12%CE%A33&el%F0%90%80%852=321") + }), + it("empty parts", fn() { + uri.query_to_string([#("el1", ""), #("el2", "")]) + |> should.equal("el1=&el2=") + uri.query_to_string([]) + |> should.equal("") + uri.query_to_string([#("", "123"), #("el2", "321")]) + |> should.equal("=123&el2=321") + uri.query_to_string([#("", ""), #("el2", "321")]) + |> should.equal("=&el2=321") + }), + ]) +} // gleeunit test functions end in `_test` // pub fn uri_test() { // match("uri:")