diff --git a/rebar.config.script b/rebar.config.script index dde1e526..5a22cb8f 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -344,6 +344,7 @@ Funs = [{application, get_supervisor, 1}, {unicode_util, titlecase, 1}, {unicode_util, uppercase, 1}, {unicode_util, whitespace, 0}, + {uri_string, allowed_characters, 0}, {uri_string, compose_query, 1}, {uri_string, compose_query, 2}, {uri_string, dissect_query, 1}, @@ -352,8 +353,14 @@ Funs = [{application, get_supervisor, 1}, {uri_string, normalize, 1}, {uri_string, normalize, 2}, {uri_string, parse, 1}, + {uri_string, percent_decode, 1}, + {uri_string, quote, 1}, + {uri_string, quote, 2}, {uri_string, recompose, 1}, + {uri_string, resolve, 2}, + {uri_string, resolve, 3}, {uri_string, transcode, 2}, + {uri_string, unquote, 1}, {user, interfaces, 1}, {zlib, adler32, 2}, {zlib, adler32, 3}, diff --git a/src/otpbp_pt.erl b/src/otpbp_pt.erl index 0c9caef7..04b776b6 100644 --- a/src/otpbp_pt.erl +++ b/src/otpbp_pt.erl @@ -234,9 +234,13 @@ {{unicode_util, [casefold, cp, gc, get_case, is_whitespace, lookup, lowercase, nfc, nfd, nfkc, nfkd, titlecase,uppercase], 1}, otpbp_unicode_util}, - {{uri_string, [compose_query, normalize], [1, 2]}, otpbp_uri_string}, - {{uri_string, [dissect_query, is_host, is_path, parse, recompose], 1}, otpbp_uri_string}, + {{uri_string, allowed_characters, 0}, otpbp_uri_string}, + {{uri_string, [is_host, is_path], 1}, otpbp_uri_string}, + {{uri_string, [dissect_query, parse, percent_decode, recompose, unquote], 1}, + otpbp_uri_string}, + {{uri_string, [compose_query, normalize, quote], [1, 2]}, otpbp_uri_string}, {{uri_string, transcode, 2}, otpbp_uri_string}, + {{uri_string, resolve, [2, 3]}, otpbp_uri_string}, {{user, interfaces, 1}, otpbp_user}, {{zlib, [adler32, crc32], [2, 3]}, otpbp_zlib}, {{zlib, [adler32_combine, crc32_combine], 4}, otpbp_zlib}, diff --git a/src/otpbp_uri_string.erl b/src/otpbp_uri_string.erl index 05d5d5f2..b0b64db2 100644 --- a/src/otpbp_uri_string.erl +++ b/src/otpbp_uri_string.erl @@ -1,1496 +1,928 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2017-2018. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% -%% [RFC 3986, Chapter 2.2. Reserved Characters] -%% -%% reserved = gen-delims / sub-delims -%% -%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -%% -%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -%% / "*" / "+" / "," / ";" / "=" -%% -%% -%% [RFC 3986, Chapter 2.3. Unreserved Characters] -%% -%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -%% -%% -%% [RFC 3986, Chapter 3. Syntax Components] -%% -%% The generic URI syntax consists of a hierarchical sequence of -%% components referred to as the scheme, authority, path, query, and -%% fragment. -%% -%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -%% -%% hier-part = "//" authority path-abempty -%% / path-absolute -%% / path-rootless -%% / path-empty -%% -%% The scheme and path components are required, though the path may be -%% empty (no characters). When authority is present, the path must -%% either be empty or begin with a slash ("/") character. When -%% authority is not present, the path cannot begin with two slash -%% characters ("//"). These restrictions result in five different ABNF -%% rules for a path (Section 3.3), only one of which will match any -%% given URI reference. -%% -%% The following are two example URIs and their component parts: -%% -%% foo://example.com:8042/over/there?name=ferret#nose -%% \_/ \______________/\_________/ \_________/ \__/ -%% | | | | | -%% scheme authority path query fragment -%% | _____________________|__ -%% / \ / \ -%% urn:example:animal:ferret:nose -%% -%% -%% [RFC 3986, Chapter 3.1. Scheme] -%% -%% Each URI begins with a scheme name that refers to a specification for -%% assigning identifiers within that scheme. -%% -%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) -%% -%% -%% [RFC 3986, Chapter 3.2. Authority] -%% -%% Many URI schemes include a hierarchical element for a naming -%% authority so that governance of the name space defined by the -%% remainder of the URI is delegated to that authority (which may, in -%% turn, delegate it further). -%% -%% authority = [ userinfo "@" ] host [ ":" port ] -%% -%% -%% [RFC 3986, Chapter 3.2.1. User Information] -%% -%% The userinfo subcomponent may consist of a user name and, optionally, -%% scheme-specific information about how to gain authorization to access -%% the resource. The user information, if present, is followed by a -%% commercial at-sign ("@") that delimits it from the host. -%% -%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -%% -%% -%% [RFC 3986, Chapter 3.2.2. Host] -%% -%% The host subcomponent of authority is identified by an IP literal -%% encapsulated within square brackets, an IPv4 address in dotted- -%% decimal form, or a registered name. -%% -%% host = IP-literal / IPv4address / reg-name -%% -%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" -%% -%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) -%% -%% IPv6address = 6( h16 ":" ) ls32 -%% / "::" 5( h16 ":" ) ls32 -%% / [ h16 ] "::" 4( h16 ":" ) ls32 -%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 -%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 -%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 -%% / [ *4( h16 ":" ) h16 ] "::" ls32 -%% / [ *5( h16 ":" ) h16 ] "::" h16 -%% / [ *6( h16 ":" ) h16 ] "::" -%% -%% ls32 = ( h16 ":" h16 ) / IPv4address -%% ; least-significant 32 bits of address -%% -%% h16 = 1*4HEXDIG -%% ; 16 bits of address represented in hexadecimal -%% -%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet -%% -%% dec-octet = DIGIT ; 0-9 -%% / %x31-39 DIGIT ; 10-99 -%% / "1" 2DIGIT ; 100-199 -%% / "2" %x30-34 DIGIT ; 200-249 -%% / "25" %x30-35 ; 250-255 -%% -%% reg-name = *( unreserved / pct-encoded / sub-delims ) -%% -%% -%% [RFC 3986, Chapter 3.2.2. Port] -%% -%% The port subcomponent of authority is designated by an optional port -%% number in decimal following the host and delimited from it by a -%% single colon (":") character. -%% -%% port = *DIGIT -%% -%% -%% [RFC 3986, Chapter 3.3. Path] -%% -%% The path component contains data, usually organized in hierarchical -%% form, that, along with data in the non-hierarchical query component -%% (Section 3.4), serves to identify a resource within the scope of the -%% URI's scheme and naming authority (if any). The path is terminated -%% by the first question mark ("?") or number sign ("#") character, or -%% by the end of the URI. -%% -%% path = path-abempty ; begins with "/" or is empty -%% / path-absolute ; begins with "/" but not "//" -%% / path-noscheme ; begins with a non-colon segment -%% / path-rootless ; begins with a segment -%% / path-empty ; zero characters -%% -%% path-abempty = *( "/" segment ) -%% path-absolute = "/" [ segment-nz *( "/" segment ) ] -%% path-noscheme = segment-nz-nc *( "/" segment ) -%% path-rootless = segment-nz *( "/" segment ) -%% path-empty = 0 -%% segment = *pchar -%% segment-nz = 1*pchar -%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) -%% ; non-zero-length segment without any colon ":" -%% -%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -%% -%% -%% [RFC 3986, Chapter 3.4. Query] -%% -%% The query component contains non-hierarchical data that, along with -%% data in the path component (Section 3.3), serves to identify a -%% resource within the scope of the URI's scheme and naming authority -%% (if any). The query component is indicated by the first question -%% mark ("?") character and terminated by a number sign ("#") character -%% or by the end of the URI. -%% -%% query = *( pchar / "/" / "?" ) -%% -%% -%% [RFC 3986, Chapter 3.5. Fragment] -%% -%% The fragment identifier component of a URI allows indirect -%% identification of a secondary resource by reference to a primary -%% resource and additional identifying information. -%% -%% fragment = *( pchar / "/" / "?" ) -%% -%% -%% [RFC 3986, Chapter 4.1. URI Reference] -%% -%% URI-reference is used to denote the most common usage of a resource -%% identifier. -%% -%% URI-reference = URI / relative-ref -%% -%% -%% [RFC 3986, Chapter 4.2. Relative Reference] -%% -%% A relative reference takes advantage of the hierarchical syntax -%% (Section 1.2.3) to express a URI reference relative to the name space -%% of another hierarchical URI. -%% -%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] -%% -%% relative-part = "//" authority path-abempty -%% / path-absolute -%% / path-noscheme -%% / path-empty -%% -%% -%% [RFC 3986, Chapter 4.3. Absolute URI] -%% -%% Some protocol elements allow only the absolute form of a URI without -%% a fragment identifier. For example, defining a base URI for later -%% use by relative references calls for an absolute-URI syntax rule that -%% does not allow a fragment. -%% -%% absolute-URI = scheme ":" hier-part [ "?" query ] -%% -module(otpbp_uri_string). --ifndef(HAVE_uri_string__parse_1). -%%------------------------------------------------------------------------- -%% External API -%%------------------------------------------------------------------------- --export([compose_query/1, compose_query/2, - dissect_query/1, normalize/1, normalize/2, parse/1, - recompose/1, transcode/2]). --export_type([error/0, uri_map/0, uri_string/0]). - +-ifndef(OTP_RELEASE). +-compile({parse_transform, otpbp_pt}). +-endif. -%%------------------------------------------------------------------------- -%% Internal API -%%------------------------------------------------------------------------- --export([is_host/1, is_path/1]). % suppress warnings +-ifndef(HAVE_uri_string__is_host_1). +% OTP 21.0 +-export([is_host/1]). +-endif. +-ifndef(HAVE_uri_string__is_path_1). +% OTP 21.0 +-export([is_path/1]). +-endif. +-ifndef(HAVE_uri_string__compose_query_1). +% OTP 21.0 +-export([compose_query/1]). +-endif. +-ifndef(HAVE_uri_string__compose_query_2). +% OTP 21.0 +-export([compose_query/2]). +-endif. +-ifndef(HAVE_uri_string__dissect_query_1). +% OTP 21.0 +-export([dissect_query/1]). +-endif. +-ifndef(HAVE_uri_string__normalize_1). +% OTP 21.0 +-export([normalize/1]). +-endif. +-ifndef(HAVE_uri_string__normalize_2). +% OTP 21.0 +-export([normalize/2]). +-endif. +-ifndef(HAVE_uri_string__parse_1). +% OTP 21.0 +-export([parse/1]). +-endif. +-ifndef(HAVE_uri_string__recompose_1). +% OTP 21.0 +-export([recompose/1]). +-endif. +-ifndef(HAVE_uri_string__transcode_2). +% OTP 21.0 +-export([transcode/2]). +-endif. +-ifndef(HAVE_uri_string__resolve_2). +% OTP 22.3 +-export([resolve/2]). +-endif. +-ifndef(HAVE_uri_string__resolve_3). +% OTP 22.3 +-export([resolve/3]). +-endif. +-ifndef(HAVE_uri_string__allowed_characters_0). +% OTP 23.2 +-export([allowed_characters/0]). +-endif. +-ifndef(HAVE_uri_string__percent_decode_1). +% OTP 23.2 +-export([percent_decode/1]). +-endif. +-ifndef(HAVE_uri_string__quote_1). +% OTP 25.0 +-export([quote/1]). +-endif. +-ifndef(HAVE_uri_string__quote_2). +% OTP 25.0 +-export([quote/2]). +-endif. +-ifndef(HAVE_uri_string__unquote_1). +% OTP 25.0 +-export([unquote/1]). +-endif. +-ifndef(HAVE_uri_string__compose_query_1). +-ifdef(HAVE_uri_string__compose_query_2). +-import(uri_string, [compose_query/2]). +-endif. +-endif. +-ifndef(HAVE_uri_string__normalize_1). +-ifdef(HAVE_uri_string__normalize_2). +-import(uri_string, [normalize/2]). +-endif. +-endif. +-ifndef(HAVE_uri_string__normalize_2). +-ifndef(NEED_parse_1). +-define(NEED_parse_1, true). +-endif. +-ifndef(NEED_recompose_1). +-define(NEED_recompose_1, true). +-endif. +-endif. +-ifndef(HAVE_uri_string__resolve_2). +-ifdef(HAVE_uri_string__resolve_3). +-import(uri_string, [resolve/3]). +-endif. +-endif. +-ifndef(HAVE_uri_string__resolve_3). +-ifndef(NEED_parse_1). +-define(NEED_parse_1, true). +-endif. +-ifndef(NEED_recompose_1). +-define(NEED_recompose_1, true). +-endif. +-endif. +-ifndef(HAVE_uri_string__percent_decode_1). +-ifdef(HAVE_uri_string__unquote_1). +-import(uri_string, [unquote/1]). +-endif. +-endif. +-ifdef(NEED_recompose_1). +-ifdef(HAVE_uri_string__recompose_1). +-import(uri_string, [recompose/1]). +-endif. +-endif. +-ifdef(NEED_parse_1). +-ifdef(HAVE_uri_string__parse_1). +-import(uri_string, [parse/1]). +-endif. +-endif. -%%------------------------------------------------------------------------- -%% Macros -%%------------------------------------------------------------------------- --define(CHAR(Char), <>). --define(STRING_EMPTY, <<>>). --define(STRING(MatchStr), <>). --define(STRING_REST(MatchStr, Rest), <>). +-ifdef(HAVE_uri_string__is_host_1). +-define(fun_is_host_1, fun uri_string:is_host/1). +-else. +-define(fun_is_host_1, fun is_host/1). +-endif. +-ifdef(HAVE_uri_string__is_path_1). +-define(fun_is_path_1, fun uri_string:is_path/1). +-else. +-define(fun_is_path_1, fun is_path/1). +-endif. -define(DEC2HEX(X), - if ((X) >= 0) andalso ((X) =< 9) -> (X) + $0; - ((X) >= 10) andalso ((X) =< 15) -> (X) + $A - 10 + if + X =< 9 -> X + $0; + true -> X + ($A - 16#A) end). -define(HEX2DEC(X), - if ((X) >= $0) andalso ((X) =< $9) -> (X) - $0; - ((X) >= $A) andalso ((X) =< $F) -> (X) - $A + 10; - ((X) >= $a) andalso ((X) =< $f) -> (X) - $a + 10 + if + X =< $9 -> X - $0; + X =< $F -> X - ($A - 16#A); + true -> X - ($a - 16#A) end). - -%%%========================================================================= -%%% API -%%%========================================================================= - -%%------------------------------------------------------------------------- -%% URI compliant with RFC 3986 -%% ASCII %x21 - %x7A ("!" - "z") except -%% %x34 " double quote -%% %x60 < less than -%% %x62 > greater than -%% %x92 \ backslash -%% %x94 ^ caret / circumflex -%% %x96 ` grave / accent -%%------------------------------------------------------------------------- -type uri_string() :: iodata(). -type error() :: {error, atom(), term()}. +-type uri_map() :: #{fragment => unicode:chardata(), + host => unicode:chardata(), + path => unicode:chardata(), + port => non_neg_integer() | undefined, + query => unicode:chardata(), + scheme => unicode:chardata(), + userinfo => unicode:chardata()}. +-export_type([error/0, uri_map/0, uri_string/0]). + +-ifndef(HAVE_uri_string__is_host_1). +is_host(C) -> C =:= $: orelse is_unreserved(C) orelse is_sub_delim(C). + +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_is_sub_delim_1). +-define(NEED_is_sub_delim_1, true). +-endif. +-endif. +-ifndef(HAVE_uri_string__is_path_1). +is_path(C) -> C =:= $/ orelse is_pchar(C). -%%------------------------------------------------------------------------- -%% RFC 3986, Chapter 3. Syntax Components -%%------------------------------------------------------------------------- --type uri_map() :: - #{fragment => unicode:chardata(), - host => unicode:chardata(), - path => unicode:chardata(), - port => non_neg_integer() | undefined, - query => unicode:chardata(), - scheme => unicode:chardata(), - userinfo => unicode:chardata()} | #{}. - - -%%------------------------------------------------------------------------- -%% Normalize URIs -%%------------------------------------------------------------------------- --spec normalize(URI) -> NormalizedURI when - URI :: uri_string() | uri_map(), - NormalizedURI :: uri_string() - | error(). -normalize(URIMap) -> - try normalize(URIMap, []) +-ifndef(NEED_is_pchar_1). +-define(NEED_is_pchar_1, true). +-endif. +-endif. + +-ifndef(HAVE_uri_string__compose_query_1). +compose_query(List) -> compose_query(List, [{encoding, utf8}]). +-endif. + +-ifndef(HAVE_uri_string__compose_query_2). +compose_query([], _Options) -> []; +compose_query(List, Options) -> + try + compose_query(List, Options, false, <<>>) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end. +compose_query([{Key, true}|Rest], Options, IsList, Acc) -> + compose_query(Rest, Options, IsList orelse is_list(Key), + <>); +compose_query([{Key, Value}|Rest], Options, IsList, Acc) -> + compose_query(Rest, Options, IsList orelse is_list(Key) orelse is_list(Value), + <>); +compose_query([], _Options, true, Acc) -> convert_to_list(Acc, utf8); +compose_query([], _Options, false, Acc) -> Acc. --spec normalize(URI, Options) -> NormalizedURI when - URI :: uri_string() | uri_map(), - Options :: [return_map], - NormalizedURI :: uri_string() | uri_map() - | error(). -normalize(URIMap, []) when is_map(URIMap) -> - recompose(normalize_map(URIMap)); -normalize(URIMap, [return_map]) when is_map(URIMap) -> - normalize_map(URIMap); -normalize(URIString, []) -> - case parse(URIString) of - Value when is_map(Value) -> - recompose(normalize_map(Value)); - Error -> - Error - end; -normalize(URIString, [return_map]) -> - case parse(URIString) of - Value when is_map(Value) -> - normalize_map(Value); - Error -> - Error - end. +form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) -> + html5_byte_encode(base10_encode(convert_to_binary(Cs, utf8, utf8))); +form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) -> html5_byte_encode(base10_encode(Cs)); +form_urlencode(Cs, [{encoding, Enc}]) when is_list(Cs), Enc =:= utf8 orelse Enc =:= unicode -> + html5_byte_encode(convert_to_binary(Cs, utf8, Enc)); +form_urlencode(Cs, [{encoding, Enc}]) when is_binary(Cs), Enc =:= utf8 orelse Enc =:= unicode -> html5_byte_encode(Cs); +form_urlencode(Cs, [{encoding, Enc}]) when is_list(Cs); is_binary(Cs) -> throw({error, invalid_encoding, Enc}); +form_urlencode(Cs, _) -> throw({error, invalid_input, Cs}). + +-ifndef(NEED_convert_to_list_2). +-define(NEED_convert_to_list_2, true). +-endif. +-ifndef(NEED_convert_to_binary_3). +-define(NEED_convert_to_binary_3, true). +-endif. +base10_encode(Cs) -> base10_encode(Cs, <<>>). -%%------------------------------------------------------------------------- -%% Parse URIs -%%------------------------------------------------------------------------- --spec parse(URIString) -> URIMap when - URIString :: uri_string(), - URIMap :: uri_map() - | error(). -parse(URIString) when is_binary(URIString) -> - try parse_uri_reference(URIString, #{}) +base10_encode(<<>>, Acc) -> Acc; +base10_encode(<>, Acc) when H > 255 -> + base10_encode(T, <>); +base10_encode(<>, Acc) -> base10_encode(T, <>). + +html5_byte_encode(B) -> html5_byte_encode(B, <<>>). + +html5_byte_encode(<<>>, Acc) -> Acc; +html5_byte_encode(<<$\s, T/binary>>, Acc) -> html5_byte_encode(T, <>); +html5_byte_encode(<>, Acc) -> + html5_byte_encode(T, + case is_url_char(H) of + true -> <>; + false -> <> + end); +html5_byte_encode(H, _Acc) -> throw({error, invalid_input, H}). + +get_separator([]) -> <<>>; +get_separator(_L) -> <<"&">>. + +-compile({inline, is_url_char/1}). +is_url_char(C) -> + C =:= 16#2A orelse C =:= 16#2D orelse C =:= 16#2E orelse C =:= 16#5F orelse + C >= 16#30 andalso C =< 16#39 orelse C >= 16#41 andalso C =< 16#5A orelse C >= 16#61 andalso C =< 16#7A. +-endif. + +-ifndef(HAVE_uri_string__dissect_query_1). +dissect_query(QueryString) when QueryString =:= <<>>; QueryString =:= [] -> []; +dissect_query(QueryString) when is_list(QueryString) -> + try + dissect_query_key(convert_to_binary(QueryString, utf8, utf8), true, [], <<>>, <<>>) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end; -parse(URIString) when is_list(URIString) -> +dissect_query(QueryString) -> try - Binary = unicode:characters_to_binary(URIString), - Map = parse_uri_reference(Binary, #{}), - convert_mapfields_to_list(Map) + dissect_query_key(QueryString, false, [], <<>>, <<>>) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end. +-ifndef(NEED_convert_to_binary_3). +-define(NEED_convert_to_binary_3, true). +-endif. -%%------------------------------------------------------------------------- -%% Recompose URIs -%%------------------------------------------------------------------------- --spec recompose(URIMap) -> URIString when - URIMap :: uri_map(), - URIString :: uri_string() - | error(). -recompose(Map) -> - case is_valid_map(Map) of - false -> - {error, invalid_map, Map}; - true -> - try - T0 = update_scheme(Map, empty), - T1 = update_userinfo(Map, T0), - T2 = update_host(Map, T1), - T3 = update_port(Map, T2), - T4 = update_path(Map, T3), - T5 = update_query(Map, T4), - update_fragment(Map, T5) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end - end. +dissect_query_key(<<$=, T/binary>>, IsList, Acc, Key, Value) -> dissect_query_value(T, IsList, Acc, Key, Value); +dissect_query_key(<<"&#", T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, <>, Value); +dissect_query_key(<<$&, _/binary>> = T, IsList, Acc, Key, <<>>) -> dissect_query_value(T, IsList, Acc, Key, true); +dissect_query_key(<>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, Acc, <>, Value); +dissect_query_key(<<>>, IsList, Acc, Key, <<>>) -> dissect_query_value(<<>>, IsList, Acc, Key, true). + +dissect_query_value(<<$&, T/binary>>, IsList, Acc, Key, Value) -> + dissect_query_key(T, IsList, [{form_urldecode(IsList, Key), form_urldecode(IsList, Value)}|Acc], <<>>, <<>>); +dissect_query_value(<>, IsList, Acc, Key, Value) -> + dissect_query_value(T, IsList, Acc, Key, <>); +dissect_query_value(<<>>, IsList, Acc, Key, Value) -> + lists:reverse([{form_urldecode(IsList, Key), form_urldecode(IsList, Value)}|Acc]). + +form_urldecode(_, true) -> true; +form_urldecode(true, B) -> convert_to_list(base10_decode(form_urldecode(B, <<>>)), utf8); +form_urldecode(false, B) -> base10_decode(form_urldecode(B, <<>>)); +form_urldecode(<<>>, Acc) -> Acc; +form_urldecode(<<$+, T/binary>>, Acc) -> form_urldecode(T, <>); +form_urldecode(<<$%, C0, C1, T/binary>> = B, Acc) -> + is_hex_digit(C0) andalso is_hex_digit(C1) orelse throw({error, invalid_percent_encoding, convert_to_list(B, utf8)}), + form_urldecode(T, <>); +form_urldecode(<>, Acc) -> form_urldecode(T, <>); +form_urldecode(<>, _Acc) -> throw({error, invalid_character, [H]}). + +base10_decode(Cs) -> base10_decode(Cs, <<>>). + +base10_decode(<<"&#",T/binary>>, Acc) -> base10_decode_unicode(T, Acc); +base10_decode(<>, Acc) -> base10_decode(T, <>); +base10_decode(<>, _) -> throw({error, invalid_input, [H]}); +base10_decode(<<>>, Acc) -> Acc. + +base10_decode_unicode(B, Acc) -> base10_decode_unicode(B, 0, Acc). + +base10_decode_unicode(<>, Codepoint, Acc) when H >= $0, H =< $9 -> + base10_decode_unicode(T, Codepoint * 10 + (H - $0), Acc); +base10_decode_unicode(<<$;, T/binary>>, Codepoint, Acc) -> base10_decode(T, <>); +base10_decode_unicode(<>, _, _) -> throw({error, invalid_input, [H]}). + +-ifndef(NEED_convert_to_list_2). +-define(NEED_convert_to_list_2, true). +-endif. +-ifndef(NEED_is_hex_digit_1). +-define(NEED_is_hex_digit_1, true). +-endif. +-endif. +-ifndef(HAVE_uri_string__normalize_1). +normalize(URIMap) -> normalize(URIMap, []). +-endif. -%%------------------------------------------------------------------------- -%% Transcode URIs -%%------------------------------------------------------------------------- --spec transcode(URIString, Options) -> Result when - URIString :: uri_string(), - Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], - Result :: uri_string() - | error(). -transcode(URIString, Options) when is_binary(URIString) -> +-ifndef(HAVE_uri_string__normalize_2). +normalize(URIMap, []) when is_map(URIMap) -> try - InEnc = proplists:get_value(in_encoding, Options, utf8), - OutEnc = proplists:get_value(out_encoding, Options, utf8), - List = convert_to_list(URIString, InEnc), - Output = transcode(List, [], InEnc, OutEnc), - convert_to_binary(Output, utf8, OutEnc) + recompose(normalize_map(URIMap)) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end; -transcode(URIString, Options) when is_list(URIString) -> - InEnc = proplists:get_value(in_encoding, Options, utf8), - OutEnc = proplists:get_value(out_encoding, Options, utf8), - Flattened = flatten_list(URIString, InEnc), - try transcode(Flattened, [], InEnc, OutEnc) +normalize(URIMap, [return_map]) when is_map(URIMap) -> + try + normalize_map(URIMap) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E + end; +normalize(URIString, []) -> + case parse(URIString) of + Value when is_map(Value) -> + try + recompose(normalize_map(Value)) + catch + throw:{error, _Atom, _RestData} = E -> E + end; + Error -> Error + end; +normalize(URIString, [return_map]) -> + case parse(URIString) of + Value when is_map(Value) -> + try + normalize_map(Value) + catch + throw:{error, _Atom, _RestData} = E -> E + end; + Error -> Error end. +normalize_map(URIMap) -> + lists:foldr(fun(F, A) -> F(A) end, URIMap, + [fun normalize_path_segment/1, fun normalize_scheme_based/1, fun normalize_undefined_port/1, + fun normalize_percent_encoding/1, fun normalize_case/1]). -%%------------------------------------------------------------------------- -%% Functions for working with the query part of a URI as a list -%% of key/value pairs. -%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 -%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8 -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% Compose urlencoded query string from a list of unescaped key/value pairs. -%% (application/x-www-form-urlencoded encoding algorithm) -%%------------------------------------------------------------------------- --spec compose_query(QueryList) -> QueryString when - QueryList :: [{unicode:chardata(), unicode:chardata() | true}], - QueryString :: uri_string() - | error(). -compose_query(List) -> - compose_query(List, [{encoding, utf8}]). - - --spec compose_query(QueryList, Options) -> QueryString when - QueryList :: [{unicode:chardata(), unicode:chardata() | true}], - Options :: [{encoding, atom()}], - QueryString :: uri_string() - | error(). -compose_query([],_Options) -> - []; -compose_query(List, Options) -> - try compose_query(List, Options, false, <<>>) - catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} - end. -%% -compose_query([{Key,true}|Rest], Options, IsList, Acc) -> - Separator = get_separator(Rest), - K = form_urlencode(Key, Options), - IsListNew = IsList orelse is_list(Key), - compose_query(Rest, Options, IsListNew, <>); -compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> - Separator = get_separator(Rest), - K = form_urlencode(Key, Options), - V = form_urlencode(Value, Options), - IsListNew = IsList orelse is_list(Key) orelse is_list(Value), - compose_query(Rest, Options, IsListNew, <> -> + normalize_http(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, Path) when Scheme =:= "https"; Scheme =:= <<"https">> -> + normalize_https(Map, Port, Path); +normalize_scheme_based(Map, Scheme, Port, _Path) when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> + normalize_ftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> + normalize_ssh_sftp(Map, Port); +normalize_scheme_based(Map, Scheme, Port, _Path) when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> + normalize_tftp(Map, Port); +normalize_scheme_based(Map, _, _, _) -> Map. + +-compile({inline, normalize_http/3}). +normalize_http(Map, Port, Path) -> normalize_http_path(normalize_default_port(Map, Port, 80), Path). + +-compile({inline, normalize_https/3}). +normalize_https(Map, Port, Path) -> normalize_http_path(normalize_default_port(Map, Port, 443), Path). + +-compile({inline, normalize_ftp/2}). +normalize_ftp(Map, Port) -> normalize_default_port(Map, Port, 21). + +normalize_ssh_sftp(Map, Port) -> normalize_default_port(Map, Port, 22). + +-compile({inline, normalize_tftp/2}). +normalize_tftp(Map, Port) -> normalize_default_port(Map, Port, 69). + +normalize_default_port(Map, Port, Port) -> maps:remove(port, Map); +normalize_default_port(Map, _Port, _Default) -> Map. + +-compile({inline, normalize_undefined_port/1}). +normalize_undefined_port(#{port := undefined} = Map) -> maps:remove(port, Map); +normalize_undefined_port(#{} = Map) -> Map. + +normalize_http_path(Map, "") -> Map#{path => "/"}; +normalize_http_path(Map, <<>>) -> Map#{path => <<$/>>}; +normalize_http_path(Map, _Path) -> Map. + +-compile({inline, decode/1}). +decode(Cs) -> decode(Cs, <<>>). + +decode(L, Acc) when is_list(L) -> unicode:characters_to_list(decode(unicode:characters_to_binary(L), Acc)); +decode(<<$%, C0, C1, Cs/binary>>, Acc) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + B = ?HEX2DEC(C0) * 16 + ?HEX2DEC(C1), + decode(Cs, + case is_unreserved(B) of + false -> + %% [2.2] Characters in the reserved set are protected from normalization. + %% [2.1] For consistency, URI producers and normalizers should + %% use uppercase hexadecimal digits for all percent-encodings. + <>; + true -> <> + end); + false -> throw({error, invalid_percent_encoding, <<$%, C0, C1>>}) + end; +decode(<>, Acc) -> decode(Cs, <>); +decode(<<>>, Acc) -> check_utf8(Acc). + +-ifndef(NEED_convert_to_binary_3). +-define(NEED_convert_to_binary_3, true). +-endif. +-ifndef(NEED_convert_to_list_2). +-define(NEED_convert_to_list_2, true). +-endif. +-ifndef(NEED_normalize_path_segment_1). +-define(NEED_normalize_path_segment_1, true). +-endif. +-ifndef(NEED_is_hex_digit_1). +-define(NEED_is_hex_digit_1, true). +-endif. +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_check_utf8_1). +-define(NEED_check_utf8_1, true). +-endif. + +hex_to_upper(H) when H >= $a, H =< $f -> H - ($a - $A); +hex_to_upper(H) when H >= $0, H =< $9; H >= $A, H =< $F-> H; +hex_to_upper(H) -> throw({error, invalid_input, H}). +-endif. + +-ifndef(HAVE_uri_string__parse_1). +parse(URIString) when is_binary(URIString) -> try - B = convert_to_binary(QueryString, utf8, utf8), - dissect_query_key(B, true, [], <<>>, <<>>) + parse_uri_reference(URIString, #{}) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end; -dissect_query(QueryString) -> - try dissect_query_key(QueryString, false, [], <<>>, <<>>) +parse(URIString) when is_list(URIString) -> + try + convert_mapfields_to_list(parse_uri_reference(unicode:characters_to_binary(URIString), #{})) catch - throw:{error, Atom, RestData} -> {error, Atom, RestData} + throw:{error, _Atom, _RestData} = E -> E end. - -%%%======================================================================== -%%% Internal functions -%%%======================================================================== - -%%------------------------------------------------------------------------- -%% Converts Map fields to lists -%%------------------------------------------------------------------------- +-compile({inline, convert_mapfields_to_list/1}). convert_mapfields_to_list(Map) -> - Fun = fun (_, V) when is_binary(V) -> unicode:characters_to_list(V); - (_, V) -> V end, - maps:map(Fun, Map). - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 4.1. URI Reference] -%% -%% URI-reference is used to denote the most common usage of a resource -%% identifier. -%% -%% URI-reference = URI / relative-ref -%%------------------------------------------------------------------------- --spec parse_uri_reference(binary(), uri_map()) -> uri_map(). + maps:map(fun(_, V) when is_binary(V) -> unicode:characters_to_list(V); + (_, V) -> V + end, + Map). + parse_uri_reference(<<>>, _) -> #{path => <<>>}; parse_uri_reference(URIString, URI) -> - try parse_scheme_start(URIString, URI) + try + parse_scheme_start(URIString, URI) catch - throw:{_,_,_} -> - parse_relative_part(URIString, URI) + throw:{_, _, _} -> parse_relative_part(URIString, URI) end. - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 4.2. Relative Reference] -%% -%% A relative reference takes advantage of the hierarchical syntax -%% (Section 1.2.3) to express a URI reference relative to the name space -%% of another hierarchical URI. -%% -%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] -%% -%% relative-part = "//" authority path-abempty -%% / path-absolute -%% / path-noscheme -%% / path-empty -%%------------------------------------------------------------------------- --spec parse_relative_part(binary(), uri_map()) -> uri_map(). -parse_relative_part(?STRING_REST("//", Rest), URI) -> +-compile({inline, parse_relative_part/2}). +parse_relative_part(<<"//", Rest/binary>>, URI) -> %% Parse userinfo - "//" is NOT part of authority try parse_userinfo(Rest, URI) of {T, URI1} -> - Userinfo = calculate_parsed_userinfo(Rest, T), - URI2 = maybe_add_path(URI1), - URI2#{userinfo => Userinfo} + maps:put(userinfo, calculate_parsed_userinfo(Rest, T), maybe_add_path(URI1)) catch - throw:{_,_,_} -> + throw:{_, _, _} -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_host_port(Rest, T), - URI2 = maybe_add_path(URI1), - URI2#{host => remove_brackets(Host)} + maps:put(host, remove_brackets(calculate_parsed_host_port(Rest, T)), maybe_add_path(URI1)) end; -parse_relative_part(?STRING_REST($/, Rest), URI) -> +parse_relative_part(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute - Path = calculate_parsed_part(Rest, T), - URI1#{path => ?STRING_REST($/, Path)}; -parse_relative_part(?STRING_REST($?, Rest), URI) -> + URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}; +parse_relative_part(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - URI2 = maybe_add_path(URI1), - URI2#{query => Query}; -parse_relative_part(?STRING_REST($#, Rest), URI) -> + maps:put(query, calculate_parsed_query_fragment(Rest, T), maybe_add_path(URI1)); +parse_relative_part(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - URI2 = maybe_add_path(URI1), - URI2#{fragment => Fragment}; -parse_relative_part(?STRING_REST(Char, Rest), URI) -> - case is_segment_nz_nc(Char) of - true -> - {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme - Path = calculate_parsed_part(Rest, T), - URI1#{path => ?STRING_REST(Char, Path)}; - false -> throw({error,invalid_uri,[Char]}) - end. - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.3. Path] -%% -%% The path component contains data, usually organized in hierarchical -%% form, that, along with data in the non-hierarchical query component -%% (Section 3.4), serves to identify a resource within the scope of the -%% URI's scheme and naming authority (if any). The path is terminated -%% by the first question mark ("?") or number sign ("#") character, or -%% by the end of the URI. -%% -%% path = path-abempty ; begins with "/" or is empty -%% / path-absolute ; begins with "/" but not "//" -%% / path-noscheme ; begins with a non-colon segment -%% / path-rootless ; begins with a segment -%% / path-empty ; zero characters -%% -%% path-abempty = *( "/" segment ) -%% path-absolute = "/" [ segment-nz *( "/" segment ) ] -%% path-noscheme = segment-nz-nc *( "/" segment ) -%% path-rootless = segment-nz *( "/" segment ) -%% path-empty = 0 -%% segment = *pchar -%% segment-nz = 1*pchar -%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) -%% ; non-zero-length segment without any colon ":" -%% -%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% path-abempty -%%------------------------------------------------------------------------- --spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. -parse_segment(?STRING_REST($/, Rest), URI) -> - parse_segment(Rest, URI); % segment -parse_segment(?STRING_REST($?, Rest), URI) -> + maps:put(fragment, calculate_parsed_query_fragment(Rest, T), maybe_add_path(URI1)); +parse_relative_part(<>, URI) -> + is_segment_nz_nc(Char) orelse throw({error, invalid_uri, [Char]}), + {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme + URI1#{path => <>}. + +parse_segment(<<$/, Rest/binary>>, URI) -> parse_segment(Rest, URI); % segment +parse_segment(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_segment(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_segment(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_segment(?STRING_REST(Char, Rest), URI) -> - case is_pchar(Char) of - true -> parse_segment(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_segment(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - - -%%------------------------------------------------------------------------- -%% path-noscheme -%%------------------------------------------------------------------------- --spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. -parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> - parse_segment(Rest, URI); % segment -parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_segment(<>, URI) -> + is_pchar(Char) orelse throw({error, invalid_uri, [Char]}), + parse_segment(Rest, URI); +parse_segment(<<>>, URI) -> {<<>>, URI}. + +parse_segment_nz_nc(<<$/, Rest/binary>>, URI) -> parse_segment(Rest, URI); % segment +parse_segment_nz_nc(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_segment_nz_nc(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> - case is_segment_nz_nc(Char) of - true -> parse_segment_nz_nc(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_segment_nz_nc(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - - -%% Check if char is pchar. --spec is_pchar(char()) -> boolean(). -is_pchar($%) -> true; % pct-encoded -is_pchar($:) -> true; -is_pchar($@) -> true; -is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). - -%% Check if char is segment_nz_nc. --spec is_segment_nz_nc(char()) -> boolean(). -is_segment_nz_nc($%) -> true; % pct-encoded -is_segment_nz_nc($@) -> true; -is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.1. Scheme] -%% -%% Each URI begins with a scheme name that refers to a specification for -%% assigning identifiers within that scheme. -%% -%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) -%%------------------------------------------------------------------------- --spec parse_scheme_start(binary(), uri_map()) -> uri_map(). -parse_scheme_start(?STRING_REST(Char, Rest), URI) -> - case is_alpha(Char) of - true -> {T, URI1} = parse_scheme(Rest, URI), - Scheme = calculate_parsed_scheme(Rest, T), - URI2 = maybe_add_path(URI1), - URI2#{scheme => ?STRING_REST(Char, Scheme)}; - false -> throw({error,invalid_uri,[Char]}) - end. + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_segment_nz_nc(<>, URI) -> + is_segment_nz_nc(Char) orelse throw({error, invalid_uri, [Char]}), + parse_segment_nz_nc(Rest, URI); +parse_segment_nz_nc(<<>>, URI) -> {<<>>, URI}. -%% Add path component if it missing after parsing the URI. -%% According to the URI specification there is always a -%% path component in every URI-reference and it can be -%% empty. -maybe_add_path(Map) -> - case maps:is_key(path, Map) of - false -> - Map#{path => <<>>}; - _Else -> - Map - end. +is_segment_nz_nc(C) -> C =:= $% orelse C =:= $@ orelse is_unreserved(C) orelse is_sub_delim(C). +-compile({inline, parse_scheme_start/2}). +parse_scheme_start(<>, URI) -> + is_alpha(Char) orelse throw({error, invalid_uri, [Char]}), + {T, URI1} = parse_scheme(Rest, URI), + maps:put(scheme, <>, maybe_add_path(URI1)). +maybe_add_path(Map) -> maps:merge(#{path => <<>>}, Map). --spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. -parse_scheme(?STRING_REST($:, Rest), URI) -> +parse_scheme(<<$:, Rest/binary>>, URI) -> {_, URI1} = parse_hier(Rest, URI), {Rest, URI1}; -parse_scheme(?STRING_REST(Char, Rest), URI) -> - case is_scheme(Char) of - true -> parse_scheme(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_scheme(?STRING_EMPTY, _URI) -> - throw({error,invalid_uri,<<>>}). - +parse_scheme(<>, URI) -> + is_scheme(Char) orelse throw({error, invalid_uri, [Char]}), + parse_scheme(Rest, URI); +parse_scheme(<<>>, _URI) -> throw({error,invalid_uri,<<>>}). -%% Check if char is allowed in scheme --spec is_scheme(char()) -> boolean(). -is_scheme($+) -> true; -is_scheme($-) -> true; -is_scheme($.) -> true; -is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). - - -%%------------------------------------------------------------------------- -%% hier-part = "//" authority path-abempty -%% / path-absolute -%% / path-rootless -%% / path-empty -%%------------------------------------------------------------------------- --spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. -parse_hier(?STRING_REST("//", Rest), URI) -> +-compile({inline, parse_hier/2}). +parse_hier(<<"//", Rest/binary>>, URI) -> % Parse userinfo - "//" is NOT part of authority try parse_userinfo(Rest, URI) of - {T, URI1} -> - Userinfo = calculate_parsed_userinfo(Rest, T), - {Rest, URI1#{userinfo => Userinfo}} + {T, URI1} -> {Rest, URI1#{userinfo => calculate_parsed_userinfo(Rest, T)}} catch - throw:{_,_,_} -> + throw:{_, _, _} -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_host_port(Rest, T), - {Rest, URI1#{host => remove_brackets(Host)}} + {Rest, URI1#{host => remove_brackets(calculate_parsed_host_port(Rest, T))}} end; -parse_hier(?STRING_REST($/, Rest), URI) -> +parse_hier(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-absolute - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_hier(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_hier(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_hier(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_hier(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless - case is_pchar(Char) of - true -> % segment_nz - {T, URI1} = parse_segment(Rest, URI), - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST(Char, Path)}}; - false -> throw({error,invalid_uri,[Char]}) - end; -parse_hier(?STRING_EMPTY, URI) -> - {<<>>, URI}. - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.2. Authority] -%% -%% Many URI schemes include a hierarchical element for a naming -%% authority so that governance of the name space defined by the -%% remainder of the URI is delegated to that authority (which may, in -%% turn, delegate it further). -%% -%% The authority component is preceded by a double slash ("//") and is -%% terminated by the next slash ("/"), question mark ("?"), or number -%% sign ("#") character, or by the end of the URI. -%% -%% authority = [ userinfo "@" ] host [ ":" port ] -%% -%% -%% [RFC 3986, Chapter 3.2.1. User Information] -%% -%% The userinfo subcomponent may consist of a user name and, optionally, -%% scheme-specific information about how to gain authorization to access -%% the resource. The user information, if present, is followed by a -%% commercial at-sign ("@") that delimits it from the host. -%% -%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -%%------------------------------------------------------------------------- --spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. -parse_userinfo(?CHAR($@), URI) -> - {?STRING_EMPTY, URI#{host => <<>>}}; -parse_userinfo(?STRING_REST($@, Rest), URI) -> + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_hier(<>, URI) -> % path-rootless + is_pchar(Char) orelse throw({error, invalid_uri, [Char]}), + % segment_nz + {T, URI1} = parse_segment(Rest, URI), + {Rest, URI1#{path => <>}}; +parse_hier(<<>>, URI) -> {<<>>, URI}. + +parse_userinfo(<<$@>>, URI) -> {<<>>, URI#{host => <<>>}}; +parse_userinfo(<<$@, Rest/binary>>, URI) -> {T, URI1} = parse_host(Rest, URI), - Host = calculate_parsed_host_port(Rest, T), - {Rest, URI1#{host => remove_brackets(Host)}}; -parse_userinfo(?STRING_REST(Char, Rest), URI) -> - case is_userinfo(Char) of - true -> parse_userinfo(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_userinfo(?STRING_EMPTY, _URI) -> + {Rest, URI1#{host => remove_brackets(calculate_parsed_host_port(Rest, T))}}; +parse_userinfo(<>, URI) -> + is_userinfo(Char) orelse throw({error, invalid_uri, [Char]}), + parse_userinfo(Rest, URI); +parse_userinfo(<<>>, _URI) -> %% URI cannot end in userinfo state throw({error,invalid_uri,<<>>}). - -%% Check if char is allowed in userinfo --spec is_userinfo(char()) -> boolean(). -is_userinfo($%) -> true; % pct-encoded -is_userinfo($:) -> true; -is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.2.2. Host] -%% -%% The host subcomponent of authority is identified by an IP literal -%% encapsulated within square brackets, an IPv4 address in dotted- -%% decimal form, or a registered name. -%% -%% host = IP-literal / IPv4address / reg-name -%% -%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" -%% -%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) -%% -%% IPv6address = 6( h16 ":" ) ls32 -%% / "::" 5( h16 ":" ) ls32 -%% / [ h16 ] "::" 4( h16 ":" ) ls32 -%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 -%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 -%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 -%% / [ *4( h16 ":" ) h16 ] "::" ls32 -%% / [ *5( h16 ":" ) h16 ] "::" h16 -%% / [ *6( h16 ":" ) h16 ] "::" -%% -%% ls32 = ( h16 ":" h16 ) / IPv4address -%% ; least-significant 32 bits of address -%% -%% h16 = 1*4HEXDIG -%% ; 16 bits of address represented in hexadecimal -%% -%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet -%% -%% dec-octet = DIGIT ; 0-9 -%% / %x31-39 DIGIT ; 10-99 -%% / "1" 2DIGIT ; 100-199 -%% / "2" %x30-34 DIGIT ; 200-249 -%% / "25" %x30-35 ; 250-255 -%% -%% reg-name = *( unreserved / pct-encoded / sub-delims ) -%%------------------------------------------------------------------------- --spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. -parse_host(?STRING_REST($:, Rest), URI) -> +parse_host(<<$:, Rest/binary>>, URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_host_port(Rest, T), - Port = get_port(H), - {Rest, URI1#{port => Port}}; -parse_host(?STRING_REST($/, Rest), URI) -> + {Rest, URI1#{port => get_port(calculate_parsed_host_port(Rest, T))}}; +parse_host(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_host(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_host(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_host(?STRING_REST($[, Rest), URI) -> - parse_ipv6_bin(Rest, [], URI); -parse_host(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_host(<<$[, Rest/binary>>, URI) -> parse_ipv6_bin(Rest, [], URI); +parse_host(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_host(?STRING_REST(Char, Rest), URI) -> + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_host(<>, URI) -> case is_digit(Char) of true -> - try parse_ipv4_bin(Rest, [Char], URI) + try + parse_ipv4_bin(Rest, [Char], URI) catch - throw:{_,_,_} -> - parse_reg_name(?STRING_REST(Char, Rest), URI) + throw:{_, _, _} -> parse_reg_name(<>, URI) end; - false -> parse_reg_name(?STRING_REST(Char, Rest), URI) + false -> parse_reg_name(<>, URI) end; -parse_host(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. +parse_host(<<>>, URI) -> {<<>>, URI}. - --spec parse_reg_name(binary(), uri_map()) -> {binary(), uri_map()}. -parse_reg_name(?STRING_REST($:, Rest), URI) -> +parse_reg_name(<<$:, Rest/binary>>, URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_host_port(Rest, T), - Port = get_port(H), - {Rest, URI1#{port => Port}}; -parse_reg_name(?STRING_REST($/, Rest), URI) -> + {Rest, URI1#{port => get_port(calculate_parsed_host_port(Rest, T))}}; +parse_reg_name(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_reg_name(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_reg_name(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_reg_name(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_reg_name(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_reg_name(?STRING_REST(Char, Rest), URI) -> - case is_reg_name(Char) of - true -> parse_reg_name(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_reg_name(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - -%% Check if char is allowed in reg-name --spec is_reg_name(char()) -> boolean(). -is_reg_name($%) -> true; -is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). - - --spec parse_ipv4_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. -parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> - _ = validate_ipv4_address(lists:reverse(Acc)), + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_reg_name(<>, URI) -> + is_reg_name(Char) orelse throw({error, invalid_uri, [Char]}), + parse_reg_name(Rest, URI); +parse_reg_name(<<>>, URI) -> {<<>>, URI}. + +parse_ipv4_bin(<<$:, Rest/binary>>, Acc, URI) -> + validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_host_port(Rest, T), - Port = get_port(H), - {Rest, URI1#{port => Port}}; -parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> - _ = validate_ipv4_address(lists:reverse(Acc)), + {Rest, URI1#{port => get_port(calculate_parsed_host_port(Rest, T))}}; +parse_ipv4_bin(<<$/, Rest/binary>>, Acc, URI) -> + validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_segment(Rest, URI), % path-abempty - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_ipv4_bin(?STRING_REST($?, Rest), Acc, URI) -> - _ = validate_ipv4_address(lists:reverse(Acc)), + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_ipv4_bin(<<$?, Rest/binary>>, Acc, URI) -> + validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_ipv4_bin(?STRING_REST($#, Rest), Acc, URI) -> - _ = validate_ipv4_address(lists:reverse(Acc)), + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_ipv4_bin(<<$#, Rest/binary>>, Acc, URI) -> + validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_ipv4_bin(?STRING_REST(Char, Rest), Acc, URI) -> - case is_ipv4(Char) of - true -> parse_ipv4_bin(Rest, [Char|Acc], URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_ipv4_bin(?STRING_EMPTY, Acc, URI) -> - _ = validate_ipv4_address(lists:reverse(Acc)), - {?STRING_EMPTY, URI}. - - -%% Check if char is allowed in IPv4 addresses --spec is_ipv4(char()) -> boolean(). -is_ipv4($.) -> true; -is_ipv4(Char) -> is_digit(Char). + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_ipv4_bin(<>, Acc, URI) -> + is_ipv4(Char) orelse throw({error, invalid_uri, [Char]}), + parse_ipv4_bin(Rest, [Char|Acc], URI); +parse_ipv4_bin(<<>>, Acc, URI) -> + validate_ipv4_address(lists:reverse(Acc)), + {<<>>, URI}. --spec validate_ipv4_address(list()) -> list(). validate_ipv4_address(Addr) -> case inet:parse_ipv4strict_address(Addr) of {ok, _} -> Addr; - {error, _} -> throw({error,invalid_uri,Addr}) + {error, _} -> throw({error, invalid_uri, Addr}) end. - --spec parse_ipv6_bin(binary(), list(), uri_map()) -> {binary(), uri_map()}. -parse_ipv6_bin(?STRING_REST($], Rest), Acc, URI) -> - _ = validate_ipv6_address(lists:reverse(Acc)), +parse_ipv6_bin(<<$], Rest/binary>>, Acc, URI) -> + validate_ipv6_address(lists:reverse(Acc)), parse_ipv6_bin_end(Rest, URI); -parse_ipv6_bin(?STRING_REST(Char, Rest), Acc, URI) -> - case is_ipv6(Char) of - true -> parse_ipv6_bin(Rest, [Char|Acc], URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_ipv6_bin(?STRING_EMPTY, _Acc, _URI) -> - throw({error,invalid_uri,<<>>}). - -%% Check if char is allowed in IPv6 addresses --spec is_ipv6(char()) -> boolean(). -is_ipv6($:) -> true; -is_ipv6($.) -> true; -is_ipv6(Char) -> is_hex_digit(Char). - +parse_ipv6_bin(<>, Acc, URI) -> + is_ipv6(Char) orelse throw({error, invalid_uri, [Char]}), + parse_ipv6_bin(Rest, [Char|Acc], URI); +parse_ipv6_bin(<<>>, _Acc, _URI) -> throw({error, invalid_uri, <<>>}). --spec parse_ipv6_bin_end(binary(), uri_map()) -> {binary(), uri_map()}. -parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> +parse_ipv6_bin_end(<<$:, Rest/binary>>, URI) -> {T, URI1} = parse_port(Rest, URI), - H = calculate_parsed_host_port(Rest, T), - Port = get_port(H), - {Rest, URI1#{port => Port}}; -parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> + {Rest, URI1#{port => get_port(calculate_parsed_host_port(Rest, T))}}; +parse_ipv6_bin_end(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_ipv6_bin_end(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_ipv6_bin_end(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_ipv6_bin_end(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_ipv6_bin_end(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_ipv6_bin_end(?STRING_REST(Char, Rest), URI) -> - case is_ipv6(Char) of - true -> parse_ipv6_bin_end(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_ipv6_bin_end(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_ipv6_bin_end(<>, URI) -> + is_ipv6(Char) orelse throw({error, invalid_uri, [Char]}), + parse_ipv6_bin_end(Rest, URI); +parse_ipv6_bin_end(<<>>, URI) -> {<<>>, URI}. --spec validate_ipv6_address(list()) -> list(). +-compile({inline, validate_ipv6_address/1}). validate_ipv6_address(Addr) -> case inet:parse_ipv6strict_address(Addr) of {ok, _} -> Addr; - {error, _} -> throw({error,invalid_uri,Addr}) + {error, _} -> throw({error, invalid_uri, Addr}) end. - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.2.2. Port] -%% -%% The port subcomponent of authority is designated by an optional port -%% number in decimal following the host and delimited from it by a -%% single colon (":") character. -%% -%% port = *DIGIT -%%------------------------------------------------------------------------- --spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. -parse_port(?STRING_REST($/, Rest), URI) -> +parse_port(<<$/, Rest/binary>>, URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty - Path = calculate_parsed_part(Rest, T), - {Rest, URI1#{path => ?STRING_REST($/, Path)}}; -parse_port(?STRING_REST($?, Rest), URI) -> + {Rest, URI1#{path => <<$/, (calculate_parsed_part(Rest, T))/binary>>}}; +parse_port(<<$?, Rest/binary>>, URI) -> {T, URI1} = parse_query(Rest, URI), % path-empty ?query - Query = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{query => Query}}; -parse_port(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{query => calculate_parsed_query_fragment(Rest, T)}}; +parse_port(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), % path-empty - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_port(?STRING_REST(Char, Rest), URI) -> - case is_digit(Char) of - true -> parse_port(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_port(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.4. Query] -%% -%% The query component contains non-hierarchical data that, along with -%% data in the path component (Section 3.3), serves to identify a -%% resource within the scope of the URI's scheme and naming authority -%% (if any). The query component is indicated by the first question -%% mark ("?") character and terminated by a number sign ("#") character -%% or by the end of the URI. -%% -%% query = *( pchar / "/" / "?" ) -%%------------------------------------------------------------------------- --spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. -parse_query(?STRING_REST($#, Rest), URI) -> + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_port(<>, URI) -> + is_digit(Char) orelse throw({error, invalid_uri, [Char]}), + parse_port(Rest, URI); +parse_port(<<>>, URI) -> {<<>>, URI}. + +parse_query(<<$#, Rest/binary>>, URI) -> {T, URI1} = parse_fragment(Rest, URI), - Fragment = calculate_parsed_query_fragment(Rest, T), - {Rest, URI1#{fragment => Fragment}}; -parse_query(?STRING_REST(Char, Rest), URI) -> - case is_query(Char) of - true -> parse_query(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_query(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - - -%% Check if char is allowed in query --spec is_query(char()) -> boolean(). -is_query($/) -> true; -is_query($?) -> true; -is_query(Char) -> is_pchar(Char). - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 3.5. Fragment] -%% -%% The fragment identifier component of a URI allows indirect -%% identification of a secondary resource by reference to a primary -%% resource and additional identifying information. -%% -%% fragment = *( pchar / "/" / "?" ) -%%------------------------------------------------------------------------- --spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. -parse_fragment(?STRING_REST(Char, Rest), URI) -> - case is_fragment(Char) of - true -> parse_fragment(Rest, URI); - false -> throw({error,invalid_uri,[Char]}) - end; -parse_fragment(?STRING_EMPTY, URI) -> - {?STRING_EMPTY, URI}. - - -%% Check if char is allowed in fragment --spec is_fragment(char()) -> boolean(). -is_fragment($/) -> true; -is_fragment($?) -> true; -is_fragment(Char) -> is_pchar(Char). - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 2.2. Reserved Characters] -%% -%% reserved = gen-delims / sub-delims -%% -%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -%% -%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -%% / "*" / "+" / "," / ";" / "=" -%% -%%------------------------------------------------------------------------- - -%% Return true if input char is reserved. --spec is_reserved(char()) -> boolean(). -is_reserved($:) -> true; -is_reserved($/) -> true; -is_reserved($?) -> true; -is_reserved($#) -> true; -is_reserved($[) -> true; -is_reserved($]) -> true; -is_reserved($@) -> true; - -is_reserved($!) -> true; -is_reserved($$) -> true; -is_reserved($&) -> true; -is_reserved($') -> true; -is_reserved($() -> true; -is_reserved($)) -> true; - -is_reserved($*) -> true; -is_reserved($+) -> true; -is_reserved($,) -> true; -is_reserved($;) -> true; -is_reserved($=) -> true; -is_reserved(_) -> false. - - -%% Check if char is sub-delim. --spec is_sub_delim(char()) -> boolean(). -is_sub_delim($!) -> true; -is_sub_delim($$) -> true; -is_sub_delim($&) -> true; -is_sub_delim($') -> true; -is_sub_delim($() -> true; -is_sub_delim($)) -> true; - -is_sub_delim($*) -> true; -is_sub_delim($+) -> true; -is_sub_delim($,) -> true; -is_sub_delim($;) -> true; -is_sub_delim($=) -> true; -is_sub_delim(_) -> false. - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 2.3. Unreserved Characters] -%% -%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -%% -%%------------------------------------------------------------------------- --spec is_unreserved(char()) -> boolean(). -is_unreserved($-) -> true; -is_unreserved($.) -> true; -is_unreserved($_) -> true; -is_unreserved($~) -> true; -is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). - --spec is_alpha(char()) -> boolean(). -is_alpha(C) - when $A =< C, C =< $Z; - $a =< C, C =< $z -> true; -is_alpha(_) -> false. - --spec is_digit(char()) -> boolean(). -is_digit(C) - when $0 =< C, C =< $9 -> true; -is_digit(_) -> false. - --spec is_hex_digit(char()) -> boolean(). -is_hex_digit(C) - when $0 =< C, C =< $9;$a =< C, C =< $f;$A =< C, C =< $F -> true; -is_hex_digit(_) -> false. - - -%% Remove enclosing brackets from binary --spec remove_brackets(binary()) -> binary(). -remove_brackets(<<$[/utf8, Rest/binary>>) -> - {H,T} = split_binary(Rest, byte_size(Rest) - 1), - case T =:= <<$]/utf8>> of - true -> H; - false -> Rest + {Rest, URI1#{fragment => calculate_parsed_query_fragment(Rest, T)}}; +parse_query(<>, URI) -> + is_query(Char) orelse throw({error, invalid_uri, [Char]}), + parse_query(Rest, URI); +parse_query(<<>>, URI) -> {<<>>, URI}. + +parse_fragment(<>, URI) -> + is_fragment(Char) orelse throw({error, invalid_uri, [Char]}), + parse_fragment(Rest, URI); +parse_fragment(<<>>, URI) -> {<<>>, URI}. + +-ifndef(NEED_is_pchar_1). +-define(NEED_is_pchar_1, true). +-endif. +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_is_sub_delim_1). +-define(NEED_is_sub_delim_1, true). +-endif. +-ifndef(NEED_is_alpha_1). +-define(NEED_is_alpha_1, true). +-endif. +-ifndef(NEED_is_scheme_1). +-define(NEED_is_scheme_1, true). +-endif. +-ifndef(NEED_is_userinfo_1). +-define(NEED_is_userinfo_1, true). +-endif. +-ifndef(NEED_is_digit_1). +-define(NEED_is_digit_1, true). +-endif. +-ifndef(NEED_is_reg_name_1). +-define(NEED_is_reg_name_1, true). +-endif. +-ifndef(NEED_is_ipv4_1). +-define(NEED_is_ipv4_1, true). +-endif. +-ifndef(NEED_is_ipv6_1). +-define(NEED_is_ipv6_1, true). +-endif. +-ifndef(NEED_is_query_1). +-define(NEED_is_query_1, true). +-endif. +-ifndef(NEED_is_fragment_1). +-define(NEED_is_fragment_1, true). +-endif. + +remove_brackets(<<$[, Rest/binary>>) -> + S = byte_size(Rest) - 1, + case Rest of + <> -> R; + _ -> Rest end; remove_brackets(Addr) -> Addr. +-compile({inline, calculate_parsed_scheme/2}). +calculate_parsed_scheme(Input, <<>>) -> strip_last_char(Input, ":"); +calculate_parsed_scheme(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). -%%------------------------------------------------------------------------- -%% Helper functions for calculating the parsed binary. -%%------------------------------------------------------------------------- --spec calculate_parsed_scheme(binary(), binary()) -> binary(). -calculate_parsed_scheme(Input, <<>>) -> - strip_last_char(Input, [$:]); -calculate_parsed_scheme(Input, Unparsed) -> - get_parsed_binary(Input, Unparsed). +calculate_parsed_part(Input, <<>>) -> strip_last_char(Input, "?#"); +calculate_parsed_part(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). +calculate_parsed_userinfo(Input, <<>>) -> strip_last_char(Input, "?#@"); +calculate_parsed_userinfo(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). --spec calculate_parsed_part(binary(), binary()) -> binary(). -calculate_parsed_part(Input, <<>>) -> - strip_last_char(Input, [$?,$#]); -calculate_parsed_part(Input, Unparsed) -> - get_parsed_binary(Input, Unparsed). +calculate_parsed_host_port(Input, <<>>) -> strip_last_char(Input, ":?#/"); +calculate_parsed_host_port(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). +calculate_parsed_query_fragment(Input, <<>>) -> strip_last_char(Input, "#"); +calculate_parsed_query_fragment(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). --spec calculate_parsed_userinfo(binary(), binary()) -> binary(). -calculate_parsed_userinfo(Input, <<>>) -> - strip_last_char(Input, [$?,$#,$@]); -calculate_parsed_userinfo(Input, Unparsed) -> - get_parsed_binary(Input, Unparsed). +get_port(<<>>) -> undefined; +get_port(B) -> + try + binary_to_integer(B) + catch + error:badarg -> throw({error, invalid_uri, B}) + end. +strip_last_char(<<>>, _) -> <<>>; +strip_last_char(Input, L) -> + S = byte_size(Input) - 1, + <> = Input, + case lists:member(C, L) of + true -> H; + _false -> Input + end. --spec calculate_parsed_host_port(binary(), binary()) -> binary(). -calculate_parsed_host_port(Input, <<>>) -> - strip_last_char(Input, [$:,$?,$#,$/]); -calculate_parsed_host_port(Input, Unparsed) -> - get_parsed_binary(Input, Unparsed). +get_parsed_binary(Input, Unparsed) -> binary:part(Input, 0, byte_size(Input) - byte_size(Unparsed) - 1). +-endif. +-ifndef(HAVE_uri_string__recompose_1). +recompose(Map) -> + case is_valid_map(Map) of + false -> {error, invalid_map, Map}; + true -> + try + lists:foldl(fun(F, A) -> F(Map, A) end, empty, + [fun update_scheme/2, fun update_userinfo/2, fun update_host/2, + fun update_port/2, fun update_path/2, fun update_query/2, fun update_fragment/2]) + catch + throw:{error, _Atom, _RestData} = E -> E + end + end. -calculate_parsed_query_fragment(Input, <<>>) -> - strip_last_char(Input, [$#]); -calculate_parsed_query_fragment(Input, Unparsed) -> - get_parsed_binary(Input, Unparsed). +-compile({inline, is_valid_map/1}). +is_valid_map(#{path := Path} = Map) -> + (starts_with_two_slash(Path) orelse maps:is_key(userinfo, Map) orelse maps:is_key(port, Map)) andalso + is_valid_map_host(Map) orelse + all_fields_valid(Map); +is_valid_map(#{}) -> false. + +-compile({inline, starts_with_two_slash/1}). +starts_with_two_slash("//" ++ _) -> true; +starts_with_two_slash(<<"//", _/binary>>) -> true; +starts_with_two_slash(_) -> false. +-compile({inline, is_valid_map_host/1}). +is_valid_map_host(Map) -> maps:is_key(host, Map) andalso all_fields_valid(Map). -get_port(<<>>) -> - undefined; -get_port(B) -> - try binary_to_integer(B) - catch - error:badarg -> - throw({error, invalid_uri, B}) - end. +all_fields_valid(Map) -> maps:keys(Map) -- [scheme, userinfo, host, port, path, query, fragment] =:= []. +update_scheme(#{scheme := Scheme}, _) -> add_colon_postfix(encode_scheme(Scheme)); +update_scheme(#{}, _) -> empty. -%% Strip last char if it is in list -%% -%% This function is optimized for speed: parse/1 is about 10% faster than -%% with an alternative implementation based on lists and sets. -strip_last_char(<<>>, _) -> <<>>; -strip_last_char(Input, [C0]) -> - case binary:last(Input) of - C0 -> - init_binary(Input); - _Else -> - Input - end; -strip_last_char(Input, [C0,C1]) -> - case binary:last(Input) of - C0 -> - init_binary(Input); - C1 -> - init_binary(Input); - _Else -> - Input - end; -strip_last_char(Input, [C0,C1,C2]) -> - case binary:last(Input) of - C0 -> - init_binary(Input); - C1 -> - init_binary(Input); - C2 -> - init_binary(Input); - _Else -> - Input - end; -strip_last_char(Input, [C0,C1,C2,C3]) -> - case binary:last(Input) of - C0 -> - init_binary(Input); - C1 -> - init_binary(Input); - C2 -> - init_binary(Input); - C3 -> - init_binary(Input); - _Else -> - Input - end. +update_userinfo(#{userinfo := Userinfo}, empty) -> add_auth_prefix(encode_userinfo(Userinfo)); +update_userinfo(#{userinfo := Userinfo}, URI) -> concat(URI, add_auth_prefix(encode_userinfo(Userinfo))); +update_userinfo(#{}, empty) -> empty; +update_userinfo(#{}, URI) -> URI. +update_host(#{host := Host}, empty) -> add_auth_prefix(encode_host(Host)); +update_host(#{host := Host} = Map, URI) -> concat(URI, add_host_prefix(Map, encode_host(Host))); +update_host(#{}, URI) -> URI. -%% Get parsed binary -get_parsed_binary(Input, Unparsed) -> - {First, _} = split_binary(Input, byte_size(Input) - byte_size_exl_head(Unparsed)), - First. - - -%% Return all bytes of the binary except the last one. The binary must be non-empty. -init_binary(B) -> - {Init, _} = - split_binary(B, byte_size(B) - 1), - Init. - - -%% Returns the size of a binary exluding the first element. -%% Used in calls to split_binary(). --spec byte_size_exl_head(binary()) -> number(). -byte_size_exl_head(<<>>) -> 0; -byte_size_exl_head(Binary) -> byte_size(Binary) + 1. - - -%%------------------------------------------------------------------------- -%% [RFC 3986, Chapter 2.1. Percent-Encoding] -%% -%% A percent-encoding mechanism is used to represent a data octet in a -%% component when that octet's corresponding character is outside the -%% allowed set or is being used as a delimiter of, or within, the -%% component. A percent-encoded octet is encoded as a character -%% triplet, consisting of the percent character "%" followed by the two -%% hexadecimal digits representing that octet's numeric value. For -%% example, "%20" is the percent-encoding for the binary octet -%% "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space -%% character (SP). Section 2.4 describes when percent-encoding and -%% decoding is applied. -%% -%% pct-encoded = "%" HEXDIG HEXDIG -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% Percent-encode -%%------------------------------------------------------------------------- - -%% Only validates as scheme cannot have percent-encoded characters --spec encode_scheme(list()|binary()) -> list() | binary(). -encode_scheme([]) -> - throw({error,invalid_scheme,""}); -encode_scheme(<<>>) -> - throw({error,invalid_scheme,<<>>}); -encode_scheme(Scheme) -> - case validate_scheme(Scheme) of - true -> Scheme; - false -> throw({error,invalid_scheme,Scheme}) - end. +update_port(#{port := undefined}, URI) -> concat(URI, <<$:>>); +update_port(#{port := Port}, URI) -> concat(URI, add_colon(encode_port(Port))); +update_port(#{}, URI) -> URI. --spec encode_userinfo(list()|binary()) -> list() | binary(). -encode_userinfo(Cs) -> - encode(Cs, fun is_userinfo/1). +update_path(#{path := Path}, empty) -> encode_path(Path); +update_path(#{host := _, path := Path}, URI) -> concat(URI, encode_path(make_path_absolute(maybe_flatten_list(Path)))); +update_path(#{path := Path}, URI) -> concat(URI, encode_path(Path)); +update_path(#{}, URI) -> URI. --spec encode_host(list()|binary()) -> list() | binary(). -encode_host(Cs) -> - case classify_host(Cs) of - regname -> Cs; - ipv4 -> Cs; - ipv6 -> bracket_ipv6(Cs); - other -> encode(Cs, fun is_reg_name/1) - end. +update_query(#{query := Query}, empty) -> encode_query(Query); +update_query(#{query := Query}, URI) -> concat(URI, add_question_mark(encode_query(Query))); +update_query(#{}, URI) -> URI. --spec encode_path(list()|binary()) -> list() | binary(). -encode_path(Cs) -> - encode(Cs, fun is_path/1). - --spec encode_query(list()|binary()) -> list() | binary(). -encode_query(Cs) -> - encode(Cs, fun is_query/1). - --spec encode_fragment(list()|binary()) -> list() | binary(). -encode_fragment(Cs) -> - encode(Cs, fun is_fragment/1). - -%%------------------------------------------------------------------------- -%% Helper funtions for percent-decode -%%------------------------------------------------------------------------- - --spec decode(list()|binary()) -> list() | binary(). -decode(Cs) -> - decode(Cs, <<>>). -%% -decode(L, Acc) when is_list(L) -> - B0 = unicode:characters_to_binary(L), - B1 = decode(B0, Acc), - unicode:characters_to_list(B1); -decode(<<$%,C0,C1,Cs/binary>>, Acc) -> - case is_hex_digit(C0) andalso is_hex_digit(C1) of - true -> - B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - case is_reserved(B) of - true -> - %% [2.2] Characters in the reserved set are protected from - %% normalization. - %% [2.1] For consistency, URI producers and normalizers should - %% use uppercase hexadecimal digits for all percent- - %% encodings. - H0 = hex_to_upper(C0), - H1 = hex_to_upper(C1), - decode(Cs, <>); - false -> - decode(Cs, <>) - end; - false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) - end; -decode(<>, Acc) -> - decode(Cs, <>); -decode(<<>>, Acc) -> - check_utf8(Acc). +update_fragment(#{fragment := Fragment}, empty) -> add_hashmark(encode_fragment(Fragment)); +update_fragment(#{fragment := Fragment}, URI) -> concat(URI, add_hashmark(encode_fragment(Fragment))); +update_fragment(#{}, empty) -> ""; +update_fragment(#{}, URI) -> URI. -%% Returns Cs if it is utf8 encoded. -check_utf8(Cs) -> - case unicode:characters_to_list(Cs) of - {incomplete,_,_} -> - throw({error,invalid_utf8,Cs}); - {error,_,_} -> - throw({error,invalid_utf8,Cs}); - _ -> Cs - end. +-compile({inline, add_colon_postfix/1}). +add_colon_postfix(Comp) when is_binary(Comp) -> <>; +add_colon_postfix(Comp) when is_list(Comp) -> Comp ++ ":". -%% Convert hex digit to uppercase form -hex_to_upper(H) when $a =< H, H =< $f -> - H - 32; -hex_to_upper(H) when $0 =< H, H =< $9;$A =< H, H =< $F-> - H; -hex_to_upper(H) -> - throw({error,invalid_input, H}). - -%% Check if char is allowed in host --spec is_host(char()) -> boolean(). -is_host($:) -> true; -is_host(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). - -%% Check if char is allowed in path --spec is_path(char()) -> boolean(). -is_path($/) -> true; -is_path(Char) -> is_pchar(Char). - - -%%------------------------------------------------------------------------- -%% Helper functions for percent-encode -%%------------------------------------------------------------------------- --spec encode(list()|binary(), fun()) -> list() | binary(). -encode(Component, Fun) when is_list(Component) -> - B = unicode:characters_to_binary(Component), - unicode:characters_to_list(encode(B, Fun, <<>>)); -encode(Component, Fun) when is_binary(Component) -> - encode(Component, Fun, <<>>). -%% -encode(<>, Fun, Acc) -> - C = encode_codepoint_binary(Char, Fun), - encode(Rest, Fun, <>); -encode(<>, _Fun, _Acc) -> - throw({error,invalid_input,<>}); -encode(<<>>, _Fun, Acc) -> - Acc. +add_auth_prefix(Comp) -> add_double_slash(Comp). +-compile({inline, add_host_prefix/2}). +add_host_prefix(#{userinfo := _}, Host) -> add_char(Host, $@); +add_host_prefix(#{}, Host) -> add_double_slash(Host). --spec encode_codepoint_binary(integer(), fun()) -> binary(). -encode_codepoint_binary(C, Fun) -> - case Fun(C) of - false -> percent_encode_binary(C); - true -> <> - end. +-compile({inline, add_colon/1}). +add_colon(Comp) when is_binary(Comp) -> <<$:, Comp/binary>>. +-compile({inline, add_question_mark/1}). +add_question_mark(Comp) -> add_char(Comp, $?). --spec percent_encode_binary(integer()) -> binary(). -percent_encode_binary(Code) -> - percent_encode_binary(<>, <<>>). +add_hashmark(Comp) -> add_char(Comp, $#). +add_char(Comp, C) when is_binary(Comp) -> <>; +add_char(Comp, C) when is_list(Comp) -> [C|Comp]. -percent_encode_binary(<>, Acc) -> - percent_encode_binary(Rest, <>); -percent_encode_binary(<<>>, Acc) -> - Acc. +add_double_slash(Comp) when is_binary(Comp) -> <<"//", Comp/binary>>; +add_double_slash(Comp) when is_list(Comp) -> [$/, $/|Comp]. +-compile({inline, encode_scheme/1}). +encode_scheme(Scheme) -> + Scheme =/= "" andalso Scheme =/= <<>> andalso validate_scheme(Scheme) orelse throw({error, invalid_scheme, Scheme}), + Scheme. -%%------------------------------------------------------------------------- -%%------------------------------------------------------------------------- -validate_scheme([]) -> true; -validate_scheme([H|T]) -> - case is_scheme(H) of - true -> validate_scheme(T); - false -> false - end; -validate_scheme(<<>>) -> true; -validate_scheme(<>) -> - case is_scheme(H) of - true -> validate_scheme(Rest); - false -> false +encode_userinfo(Cs) -> encode(Cs, fun is_userinfo/1). + +encode_host(Cs) -> + case classify_host(Cs) of + ipv6 -> bracket_ipv6(Cs); + other -> encode(Cs, fun is_reg_name/1); + C when C =:= regname; C =:= ipv4 -> Cs end. +-compile({inline, encode_port/1}). +encode_port(Port) -> integer_to_binary(Port). + +encode_path(Cs) -> encode(Cs, ?fun_is_path_1). + +encode_query(Cs) -> encode(Cs, fun is_query/1). + +encode_fragment(Cs) -> encode(Cs, fun is_fragment/1). -%%------------------------------------------------------------------------- -%% Classifies hostname into the following categories: -%% regname, ipv4 - address does not contain reserved characters to be -%% percent-encoded -%% ipv6 - address does not contain reserved characters but it shall be -%% encolsed in brackets -%% other - address shall be percent-encoded -%%------------------------------------------------------------------------- +validate_scheme([H|T]) -> is_scheme(H) andalso validate_scheme(T); +validate_scheme(<>) -> is_scheme(H) andalso validate_scheme(Rest); +validate_scheme(S) when S =:= []; S =:= <<>> -> true. + +-compile({inline, classify_host/1}). classify_host([]) -> other; -classify_host(Addr) when is_binary(Addr) -> - A = unicode:characters_to_list(Addr), - classify_host_ipv6(A); -classify_host(Addr) -> - classify_host_ipv6(Addr). +classify_host(Addr) when is_binary(Addr) -> classify_host_ipv6(unicode:characters_to_list(Addr)); +classify_host(Addr) -> classify_host_ipv6(Addr). classify_host_ipv6(Addr) -> case is_ipv6_address(Addr) of @@ -1498,6 +930,7 @@ classify_host_ipv6(Addr) -> false -> classify_host_ipv4(Addr) end. +-compile({inline, classify_host_ipv4/1}). classify_host_ipv4(Addr) -> case is_ipv4_address(Addr) of true -> ipv4; @@ -1511,675 +944,500 @@ classify_host_regname([H|T]) -> false -> other end. +-compile({inline, is_ipv4_address/1}). is_ipv4_address(Addr) -> case inet:parse_ipv4strict_address(Addr) of {ok, _} -> true; {error, _} -> false end. +-compile({inline, is_ipv6_address/1}). is_ipv6_address(Addr) -> case inet:parse_ipv6strict_address(Addr) of {ok, _} -> true; {error, _} -> false end. -bracket_ipv6(Addr) when is_binary(Addr) -> - concat(<<$[,Addr/binary>>,<<$]>>); -bracket_ipv6(Addr) when is_list(Addr) -> - [$[|Addr] ++ "]". - - -%%------------------------------------------------------------------------- -%% Helper funtions for recompose -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% Checks if input Map has valid combination of fields that can be -%% recomposed into a URI. -%% -%% The implementation is based on a decision tree that fulfills the -%% following rules: -%% - 'path' shall always be present in the input map -%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -%% hier-part = "//" authority path-abempty -%% / path-absolute -%% / path-rootless -%% / path-empty -%% - 'host' shall be present in the input map when 'path' starts with -%% two slashes ("//") -%% path = path-abempty ; begins with "/" or is empty -%% / path-absolute ; begins with "/" but not "//" -%% / path-noscheme ; begins with a non-colon segment -%% / path-rootless ; begins with a segment -%% / path-empty ; zero characters -%% path-abempty = *( "/" segment ) -%% segment = *pchar -%% - 'host' shall be present if userinfo or port is present in input map -%% authority = [ userinfo "@" ] host [ ":" port ] -%% - All fields shall be valid (scheme, userinfo, host, port, path, query -%% or fragment). -%%------------------------------------------------------------------------- -is_valid_map(#{path := Path} = Map) -> - ((starts_with_two_slash(Path) andalso is_valid_map_host(Map)) - orelse - (maps:is_key(userinfo, Map) andalso is_valid_map_host(Map)) - orelse - (maps:is_key(port, Map) andalso is_valid_map_host(Map)) - orelse - all_fields_valid(Map)); -is_valid_map(#{}) -> - false. - - -is_valid_map_host(Map) -> - maps:is_key(host, Map) andalso all_fields_valid(Map). - - -all_fields_valid(Map) -> - Fun = fun(scheme, _, Acc) -> Acc; - (userinfo, _, Acc) -> Acc; - (host, _, Acc) -> Acc; - (port, _, Acc) -> Acc; - (path, _, Acc) -> Acc; - (query, _, Acc) -> Acc; - (fragment, _, Acc) -> Acc; - (_, _, _) -> false - end, - maps:fold(Fun, true, Map). +-compile({inline, bracket_ipv6/1}). +bracket_ipv6(Addr) when is_binary(Addr) -> <<$[, Addr/binary, $]>>; +bracket_ipv6(Addr) when is_list(Addr) -> [$[|Addr ++ "]"]. +-compile({inline, make_path_absolute/1}). +make_path_absolute(Path) when Path =:= <<>>; Path =:= "" -> Path; +make_path_absolute(<<"/", _/binary>> = Path) -> Path; +make_path_absolute([$/|_] = Path) -> Path; +make_path_absolute(Path) -> add_char(Path, $/). -starts_with_two_slash([$/,$/|_]) -> - true; -starts_with_two_slash(?STRING_REST("//", _)) -> - true; -starts_with_two_slash(_) -> false. +-compile({inline, maybe_flatten_list/1}). +maybe_flatten_list(Path) when is_binary(Path) -> Path; +maybe_flatten_list(Path) -> unicode:characters_to_list(Path). +concat(A, B) when is_binary(A), is_binary(B) -> <>; +concat(A, B) when is_binary(A), is_list(B) -> unicode:characters_to_list(A) ++ B; +concat(A, B) when is_list(A), is_binary(B) -> A ++ unicode:characters_to_list(B); +concat(A, B) when is_list(A) -> A ++ B. -update_scheme(#{scheme := Scheme}, _) -> - add_colon_postfix(encode_scheme(Scheme)); -update_scheme(#{}, _) -> - empty. - - -update_userinfo(#{userinfo := Userinfo}, empty) -> - add_auth_prefix(encode_userinfo(Userinfo)); -update_userinfo(#{userinfo := Userinfo}, URI) -> - concat(URI,add_auth_prefix(encode_userinfo(Userinfo))); -update_userinfo(#{}, empty) -> - empty; -update_userinfo(#{}, URI) -> - URI. - - -update_host(#{host := Host}, empty) -> - add_auth_prefix(encode_host(Host)); -update_host(#{host := Host} = Map, URI) -> - concat(URI,add_host_prefix(Map, encode_host(Host))); -update_host(#{}, empty) -> - empty; -update_host(#{}, URI) -> - URI. - - -%% URI cannot be empty for ports. E.g. ":8080" is not a valid URI -update_port(#{port := undefined}, URI) -> - concat(URI, <<":">>); -update_port(#{port := Port}, URI) -> - concat(URI,add_colon(encode_port(Port))); -update_port(#{}, URI) -> - URI. - - -update_path(#{path := Path}, empty) -> - encode_path(Path); -update_path(#{host := _, path := Path0}, URI) -> - %% When host is present in a URI the path must begin with "/" or be empty. - Path = make_path_absolute(Path0), - concat(URI,encode_path(Path)); -update_path(#{path := Path}, URI) -> - concat(URI,encode_path(Path)); -update_path(#{}, empty) -> - empty; -update_path(#{}, URI) -> - URI. - - -update_query(#{query := Query}, empty) -> - encode_query(Query); -update_query(#{query := Query}, URI) -> - concat(URI,add_question_mark(encode_query(Query))); -update_query(#{}, empty) -> - empty; -update_query(#{}, URI) -> - URI. - - -update_fragment(#{fragment := Fragment}, empty) -> - add_hashmark(encode_fragment(Fragment)); -update_fragment(#{fragment := Fragment}, URI) -> - concat(URI,add_hashmark(encode_fragment(Fragment))); -update_fragment(#{}, empty) -> - ""; -update_fragment(#{}, URI) -> - URI. - -%%------------------------------------------------------------------------- -%% Concatenates its arguments that can be lists and binaries. -%% The result is a list if at least one of its argument is a list and -%% binary otherwise. -%%------------------------------------------------------------------------- -concat(A, B) when is_binary(A), is_binary(B) -> - <>; -concat(A, B) when is_binary(A), is_list(B) -> - unicode:characters_to_list(A) ++ B; -concat(A, B) when is_list(A) -> - A ++ maybe_to_list(B). - -add_hashmark(Comp) when is_binary(Comp) -> - <<$#, Comp/binary>>; -add_hashmark(Comp) when is_list(Comp) -> - [$#|Comp]. - -add_question_mark(Comp) when is_binary(Comp) -> - <<$?, Comp/binary>>; -add_question_mark(Comp) when is_list(Comp) -> - [$?|Comp]. - -add_colon(Comp) when is_binary(Comp) -> - <<$:, Comp/binary>>. - -add_colon_postfix(Comp) when is_binary(Comp) -> - <>; -add_colon_postfix(Comp) when is_list(Comp) -> - Comp ++ ":". - -add_auth_prefix(Comp) when is_binary(Comp) -> - <<"//", Comp/binary>>; -add_auth_prefix(Comp) when is_list(Comp) -> - [$/,$/|Comp]. - -add_host_prefix(#{userinfo := _}, Host) when is_binary(Host) -> - <<$@,Host/binary>>; -add_host_prefix(#{}, Host) when is_binary(Host) -> - <<"//",Host/binary>>; -add_host_prefix(#{userinfo := _}, Host) when is_list(Host) -> - [$@|Host]; -add_host_prefix(#{}, Host) when is_list(Host) -> - [$/,$/|Host]. - -maybe_to_list(Comp) when is_binary(Comp) -> unicode:characters_to_list(Comp); -maybe_to_list(Comp) -> Comp. - -encode_port(Port) -> - integer_to_binary(Port). - -%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -%% -%% hier-part = "//" authority path-abempty -%% / path-absolute -%% / path-rootless -%% / path-empty -%% -%% path = path-abempty ; begins with "/" or is empty -%% / path-absolute ; begins with "/" but not "//" -%% / path-noscheme ; begins with a non-colon segment -%% / path-rootless ; begins with a segment -%% / path-empty ; zero characters -make_path_absolute(<<>>) -> - <<>>; -make_path_absolute("") -> - ""; -make_path_absolute(<<"/",_/binary>> = Path) -> - Path; -make_path_absolute([$/|_] = Path) -> - Path; -make_path_absolute(Path) when is_binary(Path) -> - concat(<<$/>>, Path); -make_path_absolute(Path) when is_list(Path) -> - concat("/", Path). - -%%------------------------------------------------------------------------- -%% Helper functions for transcode -%%------------------------------------------------------------------------- - -%%------------------------------------------------------------------------- -%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). -%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) -%% "x%00%00%00%F6" -%% 2. Accumulate characters until percent-encoded segment (transcode/4). -%% Acc = "x" -%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) -%% <<0,0,0,246>> -%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): -%% <<195,182>> -%% 5. Percent-encode out-encoded binary: -%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> -%% 6. Convert binary to list form, reverse it and append the accumulator -%% "6B%3C%" + "x" -%% 7. Reverse Acc and return it -%%------------------------------------------------------------------------- -transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> - transcode_pct(L, Acc, <<>>, InEnc, OutEnc); -transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> - transcode(L, Acc, [], InEnc, OutEnc). -%% -transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> - transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); -transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> - transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); -transcode([], Acc, List, _InEncoding, _OutEncoding) -> - lists:reverse(List ++ Acc). - - -%% Transcode percent-encoded segment -transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> - case is_hex_digit(C0) andalso is_hex_digit(C1) of - true -> - Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - transcode_pct(Rest, Acc, <>, InEncoding, OutEncoding); - false -> throw({error, invalid_percent_encoding,L}) +-ifndef(NEED_is_reg_name_1). +-define(NEED_is_reg_name_1, true). +-endif. +-ifndef(NEED_encode_2). +-define(NEED_encode_2, true). +-endif. +-ifndef(NEED_is_scheme_1). +-define(NEED_is_scheme_1, true). +-endif. +-ifndef(NEED_is_userinfo_1). +-define(NEED_is_userinfo_1, true). +-endif. +-ifndef(NEED_is_query_1). +-define(NEED_is_query_1, true). +-endif. +-ifndef(NEED_is_fragment_1). +-define(NEED_is_fragment_1, true). +-endif. +-endif. + +-ifndef(HAVE_uri_string__transcode_2). +transcode(URIString, Options) when is_binary(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + try + convert_to_binary(transcode(convert_to_list(URIString, InEnc), InEnc, OutEnc), utf8, OutEnc) + catch + throw:{error, _Atom, _RestData} = E -> E end; -transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> - OutBinary = convert_to_binary(B, InEncoding, OutEncoding), - PctEncUtf8 = percent_encode_segment(OutBinary), - Out = lists:reverse(convert_to_list(PctEncUtf8, utf8)), - transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); -transcode_pct([], Acc, B, InEncoding, OutEncoding) -> - OutBinary = convert_to_binary(B, InEncoding, OutEncoding), - PctEncUtf8 = percent_encode_segment(OutBinary), - Out = convert_to_list(PctEncUtf8, utf8), - lists:reverse(Acc) ++ Out. - - -%% Convert to binary -convert_to_binary(Binary, InEncoding, OutEncoding) -> - case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of - {error, _List, RestData} -> - throw({error, invalid_input, RestData}); - {incomplete, _List, RestData} -> - throw({error, invalid_input, RestData}); - Result -> - Result +transcode(URIString, Options) when is_list(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + Flattened = flatten_list(URIString, InEnc), + try + transcode(Flattened, InEnc, OutEnc) + catch + throw:{error, _Atom, _RestData} = E -> E end. +transcode([$%, _C0, _C1|_Rest] = L, InEnc, OutEnc) -> transcode_pct(L, InEnc, OutEnc, [], <<>>); +transcode([_C|_Rest] = L, InEnc, OutEnc) -> transcode(L, InEnc, OutEnc, [], []). -%% Convert to list -convert_to_list(Binary, InEncoding) -> - case unicode:characters_to_list(Binary, InEncoding) of - {error, _List, RestData} -> - throw({error, invalid_input, RestData}); - {incomplete, _List, RestData} -> - throw({error, invalid_input, RestData}); - Result -> - Result - end. +transcode([$%, _C0, _C1|_Rest] = L, InEnc, OutEnc, Acc, List) -> transcode_pct(L, InEnc, OutEnc, List ++ Acc, <<>>); +transcode([C|Rest], InEnc, OutEnc, Acc, List) -> transcode(Rest, InEnc, OutEnc, Acc, [C|List]); +transcode([], _InEnc, _OutEnc, Acc, List) -> lists:reverse(List ++ Acc). + +transcode_pct([$%, C0, C1|Rest] = L, InEnc, OutEnc, Acc, B) -> + is_hex_digit(C0) andalso is_hex_digit(C1) orelse throw({error, invalid_percent_encoding, L}), + transcode_pct(Rest, InEnc, OutEnc, Acc, <>); +transcode_pct([_C|_Rest] = L, InEnc, OutEnc, Acc, B) -> + transcode(L, InEnc, OutEnc, + lists:reverse(convert_to_list(percent_encode_segment(convert_to_binary(B, InEnc, OutEnc)), utf8), Acc), + []); +transcode_pct([], InEnc, OutEnc, Acc, B) -> + lists:reverse(Acc, convert_to_list(percent_encode_segment(convert_to_binary(B, InEnc, OutEnc)), utf8)). + +percent_encode_segment(Segment) -> percent_encode_binary(Segment, <<>>). +-compile({inline, flatten_list/2}). +flatten_list([], _) -> []; +flatten_list(L, InEnc) -> flatten_list(L, InEnc, []). -%% Flatten input list -flatten_list([], _) -> - []; -flatten_list(L, InEnc) -> - flatten_list(L, InEnc, []). -%% flatten_list([H|T], InEnc, Acc) when is_binary(H) -> - L = convert_to_list(H, InEnc), - flatten_list(T, InEnc, lists:reverse(L) ++ Acc); -flatten_list([H|T], InEnc, Acc) when is_list(H) -> - flatten_list(H ++ T, InEnc, Acc); -flatten_list([H|T], InEnc, Acc) -> - flatten_list(T, InEnc, [H|Acc]); -flatten_list([], _InEnc, Acc) -> - lists:reverse(Acc); -flatten_list(Arg, _, _) -> - throw({error, invalid_input, Arg}). + flatten_list(T, InEnc, lists:reverse(convert_to_list(H, InEnc), Acc)); +flatten_list([H|T], InEnc, Acc) when is_list(H) -> flatten_list(H ++ T, InEnc, Acc); +flatten_list([H|T], InEnc, Acc) -> flatten_list(T, InEnc, [H|Acc]); +flatten_list([], _InEnc, Acc) -> lists:reverse(Acc); +flatten_list(Arg, _, _) -> throw({error, invalid_input, Arg}). + +-ifndef(NEED_percent_encode_binary_2). +-define(NEED_percent_encode_binary_2, true). +-endif. +-ifndef(NEED_is_hex_digit_1). +-define(NEED_is_hex_digit_1, true). +-endif. +-ifndef(NEED_convert_to_binary_3). +-define(NEED_convert_to_binary_3, true). +-endif. +-ifndef(NEED_convert_to_list_2). +-define(NEED_convert_to_list_2, true). +-endif. +-endif. +-ifndef(HAVE_uri_string__allowed_characters_0). +allowed_characters() -> + Input = lists:seq(0, 127), + lists:keymap(fun(F) -> lists:filter(F, Input) end, 1, + [{scheme, fun is_scheme/1}, + {userinfo, fun is_userinfo/1}, + {host, ?fun_is_host_1}, + {ipv4, fun is_ipv4/1}, + {ipv6, fun is_ipv6/1}, + {regname, fun is_reg_name/1}, + {path, ?fun_is_path_1}, + {query, fun is_query/1}, + {fragment, fun is_fragment/1}, + {reserved, fun is_reserved/1}, + {unreserved, fun is_unreserved/1}]). + +-ifndef(NEED_is_scheme_1). +-define(NEED_is_scheme_1, true). +-endif. +-ifndef(NEED_is_userinfo_1). +-define(NEED_is_userinfo_1, true). +-endif. +-ifndef(NEED_is_ipv4_1). +-define(NEED_is_ipv4_1, true). +-endif. +-ifndef(NEED_is_ipv6_1). +-define(NEED_is_ipv6_1, true). +-endif. +-ifndef(NEED_is_reg_name_1). +-define(NEED_is_reg_name_1, true). +-endif. +-ifndef(NEED_is_query_1). +-define(NEED_is_query_1, true). +-endif. +-ifndef(NEED_is_fragment_1). +-define(NEED_is_fragment_1, true). +-endif. +-ifndef(NEED_is_reserved_1). +-define(NEED_is_reserved_1, true). +-endif. +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-endif. -percent_encode_segment(Segment) -> - percent_encode_binary(Segment, <<>>). +-ifndef(HAVE_uri_string__percent_decode_1). +percent_decode(URIMap) when is_map(URIMap)-> + Fun = fun(K, V) when K =:= userinfo; K =:= host; K =:= path; K =:= query; K =:= fragment -> + case unquote(V) of + {error, Reason, Input} -> throw({error, {invalid, {K, {Reason, Input}}}}); + Else -> Else + end; + %% Handle port and scheme + (_, V) -> V + end, + try + maps:map(Fun, URIMap) + catch + throw:Return -> Return + end; +percent_decode(URI) when is_list(URI); is_binary(URI) -> unquote(URI). +-endif. +-ifndef(HAVE_uri_string__quote_1). +quote(D) -> encode(D, fun is_unreserved/1). -%%------------------------------------------------------------------------- -%% Helper functions for compose_query -%%------------------------------------------------------------------------- +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_encode_2). +-define(NEED_encode_2, true). +-endif. +-endif. -%% Returns separator to be used between key-value pairs -get_separator(L) when length(L) =:= 0 -> - <<>>; -get_separator(_L) -> - <<"&">>. +-ifndef(HAVE_uri_string__quote_2). +quote(D, Safe) -> encode(D, fun(C) -> is_unreserved(C) orelse lists:member(C, Safe) end). +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_encode_2). +-define(NEED_encode_2, true). +-endif. +-endif. -%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 -%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8) -form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) -> - B = convert_to_binary(Cs, utf8, utf8), - html5_byte_encode(base10_encode(B)); -form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) -> - html5_byte_encode(base10_encode(Cs)); -form_urlencode(Cs, [{encoding, Encoding}]) - when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode -> - B = convert_to_binary(Cs, utf8, Encoding), - html5_byte_encode(B); -form_urlencode(Cs, [{encoding, Encoding}]) - when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode -> - html5_byte_encode(Cs); -form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) -> - throw({error,invalid_encoding, Encoding}); -form_urlencode(Cs, _) -> - throw({error,invalid_input, Cs}). - - -%% For each character in the entry's name and value that cannot be expressed using -%% the selected character encoding, replace the character by a string consisting of -%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII -%% digits representing the Unicode code point of the character in base ten, and -%% finally a ";" (U+003B) character. -base10_encode(Cs) -> - base10_encode(Cs, <<>>). -%% -base10_encode(<<>>, Acc) -> - Acc; -base10_encode(<>, Acc) when H > 255 -> - Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8), - base10_encode(T, <>); -base10_encode(<>, Acc) -> - base10_encode(T, <>). - - -html5_byte_encode(B) -> - html5_byte_encode(B, <<>>). -%% -html5_byte_encode(<<>>, Acc) -> - Acc; -html5_byte_encode(<<$ ,T/binary>>, Acc) -> - html5_byte_encode(T, <>); -html5_byte_encode(<>, Acc) -> - case is_url_char(H) of - true -> - html5_byte_encode(T, <>); - false -> - <> = <>, - html5_byte_encode(T, <>) - end; -html5_byte_encode(H, _Acc) -> - throw({error,invalid_input, H}). - - -%% Return true if input char can appear in form-urlencoded string -%% Allowed chararacters: -%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, -%% 0x5F, 0x61 to 0x7A -is_url_char(C) - when C =:= 16#2A; C =:= 16#2D; - C =:= 16#2E; C =:= 16#5F; - 16#30 =< C, C =< 16#39; - 16#41 =< C, C =< 16#5A; - 16#61 =< C, C =< 16#7A -> true; -is_url_char(_) -> false. - - -%%------------------------------------------------------------------------- -%% Helper functions for dissect_query -%%------------------------------------------------------------------------- -dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, Value); -dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, <>, Value); -dissect_query_key(T = <<$&,_/binary>>, IsList, Acc, Key, <<>>) -> - dissect_query_value(T, IsList, Acc, Key, true); -dissect_query_key(<>, IsList, Acc, Key, Value) -> - dissect_query_key(T, IsList, Acc, <>, Value); -dissect_query_key(T = <<>>, IsList, Acc, Key, <<>>) -> - dissect_query_value(T, IsList, Acc, Key, true). - -dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>); -dissect_query_value(<>, IsList, Acc, Key, Value) -> - dissect_query_value(T, IsList, Acc, Key, <>); -dissect_query_value(<<>>, IsList, Acc, Key, Value) -> - K = form_urldecode(IsList, Key), - V = form_urldecode(IsList, Value), - lists:reverse([{K,V}|Acc]). - -%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8 -%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8) -form_urldecode(_, true) -> - true; -form_urldecode(true, B) -> - Result = base10_decode(form_urldecode(B, <<>>)), - convert_to_list(Result, utf8); -form_urldecode(false, B) -> - base10_decode(form_urldecode(B, <<>>)); -form_urldecode(<<>>, Acc) -> - Acc; -form_urldecode(<<$+,T/binary>>, Acc) -> - form_urldecode(T, <>); -form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> - case is_hex_digit(C0) andalso is_hex_digit(C1) of - true -> - V = ?HEX2DEC(C0)*16+?HEX2DEC(C1), - form_urldecode(T, <>); - false -> - L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8), - throw({error, invalid_percent_encoding, L}) +-ifndef(HAVE_uri_string__unquote_1). +unquote(D) -> raw_decode(D, <<>>). + +raw_decode(<<$%, C0, C1, Cs/binary>>, Acc) -> + is_hex_digit(C0) andalso is_hex_digit(C1) orelse throw({error, invalid_percent_encoding, <<$%, C0, C1>>}), + raw_decode(Cs, <>); +raw_decode(<>, Acc) -> raw_decode(Cs, <>); +raw_decode(<<>>, Acc) -> check_utf8(Acc); +raw_decode(L, Acc) when is_list(L) -> + try + unicode:characters_to_list(raw_decode(unicode:characters_to_binary(L), Acc)) + catch + throw:{error, _Atom, _RestData} = E -> E + end. + +-ifndef(NEED_is_hex_digit_1). +-define(NEED_is_hex_digit_1, true). +-endif. +-ifndef(NEED_check_utf8_1). +-define(NEED_check_utf8_1, true). +-endif. +-endif. + +-ifndef(HAVE_uri_string__resolve_2). +resolve(URIMap, BaseURIMap) -> resolve(URIMap, BaseURIMap, []). +-endif. + +-ifndef(HAVE_uri_string__resolve_3). +resolve(URIMap, BaseURIMap, Options) when is_map(URIMap) -> + case resolve_map(URIMap, BaseURIMap) of + TargetURIMap when is_map(TargetURIMap) -> + case Options of + [return_map] -> TargetURIMap; + [] -> recompose(TargetURIMap) + end; + Error -> Error end; -form_urldecode(<>, Acc) -> - form_urldecode(T, <>); -form_urldecode(<>, _Acc) -> - throw({error, invalid_character, [H]}). - -base10_decode(Cs) -> - base10_decode(Cs, <<>>). -% -base10_decode(<<>>, Acc) -> - Acc; -base10_decode(<<"&#",T/binary>>, Acc) -> - base10_decode_unicode(T, Acc); -base10_decode(<>, Acc) -> - base10_decode(T,<>); -base10_decode(<>, _) -> - throw({error, invalid_input, [H]}). - - -base10_decode_unicode(B, Acc) -> - base10_decode_unicode(B, 0, Acc). -%% -base10_decode_unicode(<>, Codepoint, Acc) when $0 =< H, H =< $9 -> - Res = Codepoint * 10 + (H - $0), - base10_decode_unicode(T, Res, Acc); -base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) -> - base10_decode(T, <>); -base10_decode_unicode(<>, _, _) -> - throw({error, invalid_input, [H]}). - - -%%------------------------------------------------------------------------- -%% Helper functions for normalize -%%------------------------------------------------------------------------- +resolve(URIString, BaseURIMap, Options) -> + case parse(URIString) of + URIMap when is_map(URIMap) -> resolve(URIMap, BaseURIMap, Options); + Error -> Error + end. -normalize_map(URIMap) -> - normalize_path_segment( - normalize_scheme_based( - normalize_percent_encoding( - normalize_case(URIMap)))). +-compile({inline, resolve_map/2}). +resolve_map(#{scheme := _} = URIMap, _) -> normalize_path_segment(URIMap); +resolve_map(URIMap, #{scheme := _} = BaseURIMap) -> resolve_map(URIMap, BaseURIMap, resolve_path_type(URIMap)); +resolve_map(_URIMap, BaseURIMap) when is_map(BaseURIMap) -> {error, invalid_scheme, ""}; +resolve_map(URIMap, BaseURIString) -> + case parse(BaseURIString) of + #{scheme := _} = BaseURIMap-> resolve_map(URIMap, BaseURIMap, resolve_path_type(URIMap)); + BaseURIMap when is_map(BaseURIMap) -> {error, invalid_scheme, ""}; + Error -> Error + end. +resolve_map(#{host := _} = URI, #{scheme := Scheme}, _) -> normalize_path_segment(URI#{scheme => Scheme}); +resolve_map(#{query := _} = URI, BaseURI, empty_path) -> + maps:merge(URI, maps:with([scheme, userinfo, host, port, path], BaseURI)); +resolve_map(URI, BaseURI, empty_path) -> + maps:merge(URI, maps:with([scheme, userinfo, host, port, path, query], BaseURI)); +resolve_map(URI, BaseURI, absolute_path) -> + normalize_path_segment(maps:merge(URI, maps:with([scheme, userinfo, host, port], BaseURI))); +resolve_map(#{path := Path} = URI, BaseURI, relative_path) -> + normalize_path_segment(maps:merge(URI#{path => merge_paths(Path, BaseURI)}, + maps:with([scheme, userinfo, host, port], BaseURI))). + +resolve_path_type(URIMap) -> + case iolist_to_binary(maps:get(path, URIMap, <<>>)) of + <<>> -> empty_path; + <<$/, _/bits>> -> absolute_path; + _ -> relative_path + end. -%% 6.2.2.1. Case Normalization -normalize_case(#{scheme := Scheme, host := Host} = Map) -> - Map#{scheme => to_lower(Scheme), - host => to_lower(Host)}; -normalize_case(#{host := Host} = Map) -> - Map#{host => to_lower(Host)}; -normalize_case(#{scheme := Scheme} = Map) -> - Map#{scheme => to_lower(Scheme)}; -normalize_case(#{} = Map) -> - Map. +-compile({inline, merge_paths/2}). +merge_paths(Path, #{path := BasePath0} = BaseURI) -> + case {BaseURI, iolist_size(BasePath0)} of + {#{host := _}, 0} -> merge_paths_absolute(Path); + _ -> + case string:split(BasePath0, <<$/>>, trailing) of + [BasePath, _] when is_binary(Path) -> unicode:characters_to_binary([BasePath, $/, Path]); + [BasePath, _] when is_list(Path) -> unicode:characters_to_list([BasePath, $/, Path]); + [_] -> Path + end + end. +-compile({inline, merge_paths_absolute/1}). +merge_paths_absolute(Path) when is_binary(Path) -> <<$/, Path/binary>>; +merge_paths_absolute(Path) when is_list(Path) -> unicode:characters_to_list([$/, Path]). -%% 6.2.2.2. Percent-Encoding Normalization -normalize_percent_encoding(Map) -> - Fun = fun (K,V) when K =:= userinfo; K =:= host; K =:= path; - K =:= query; K =:= fragment -> - decode(V); - %% Handle port and scheme - (_,V) -> - V - end, - maps:map(Fun, Map). +-ifndef(NEED_normalize_path_segment_1). +-define(NEED_normalize_path_segment_1, true). +-endif. +-endif. +-ifdef(NEED_check_utf8_1). +check_utf8(Cs) -> + case unicode:characters_to_list(Cs) of + {E, _, _} when E =:= incomplete; E =:= error -> throw({error, invalid_utf8, Cs}); + _ -> Cs + end. +-endif. -to_lower(Cs) when is_list(Cs) -> - B = convert_to_binary(Cs, utf8, utf8), - convert_to_list(to_lower(B), utf8); -to_lower(Cs) when is_binary(Cs) -> - to_lower(Cs, <<>>). -%% -to_lower(<>, Acc) when $A =< C, C =< $Z -> - to_lower(Cs, <>); -to_lower(<>, Acc) -> - to_lower(Cs, <>); -to_lower(<<>>, Acc) -> - Acc. +-ifdef(NEED_is_userinfo_1). +is_userinfo(C) -> C =:= $% orelse C =:= $: orelse is_unreserved(C) orelse is_sub_delim(C). +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_is_sub_delim_1). +-define(NEED_is_sub_delim_1, true). +-endif. +-endif. -%% 6.2.2.3. Path Segment Normalization -%% 5.2.4. Remove Dot Segments -normalize_path_segment(Map) -> - Path = maps:get(path, Map, undefined), - Map#{path => remove_dot_segments(Path)}. +-ifdef(NEED_is_reg_name_1). +is_reg_name(C) -> C =:= $% orelse is_unreserved(C) orelse is_sub_delim(C). +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_is_sub_delim_1). +-define(NEED_is_sub_delim_1, true). +-endif. +-endif. -remove_dot_segments(Path) when is_binary(Path) -> - remove_dot_segments(Path, <<>>); -remove_dot_segments(Path) when is_list(Path) -> - B = convert_to_binary(Path, utf8, utf8), - B1 = remove_dot_segments(B, <<>>), - convert_to_list(B1, utf8). -%% -remove_dot_segments(<<>>, Output) -> - Output; -remove_dot_segments(<<"../",T/binary>>, Output) -> - remove_dot_segments(T, Output); -remove_dot_segments(<<"./",T/binary>>, Output) -> - remove_dot_segments(T, Output); -remove_dot_segments(<<"/./",T/binary>>, Output) -> - remove_dot_segments(<<$/,T/binary>>, Output); -remove_dot_segments(<<"/.">>, Output) -> - remove_dot_segments(<<$/>>, Output); -remove_dot_segments(<<"/../",T/binary>>, Output) -> - Out1 = remove_last_segment(Output), - remove_dot_segments(<<$/,T/binary>>, Out1); -remove_dot_segments(<<"/..">>, Output) -> - Out1 = remove_last_segment(Output), - remove_dot_segments(<<$/>>, Out1); -remove_dot_segments(<<$.>>, Output) -> - remove_dot_segments(<<>>, Output); -remove_dot_segments(<<"..">>, Output) -> - remove_dot_segments(<<>>, Output); -remove_dot_segments(Input, Output) -> - {First, Rest} = first_path_segment(Input), - remove_dot_segments(Rest, <>). - - -first_path_segment(Input) -> - F = first_path_segment(Input, <<>>), - split_binary(Input, byte_size(F)). -%% -first_path_segment(<<$/,T/binary>>, Acc) -> - first_path_segment_end(<>, <>); -first_path_segment(<>, Acc) -> - first_path_segment_end(<>, <>). - - -first_path_segment_end(<<>>, Acc) -> - Acc; -first_path_segment_end(<<$/,_/binary>>, Acc) -> - Acc; -first_path_segment_end(<>, Acc) -> - first_path_segment_end(<>, <>). - - -remove_last_segment(<<>>) -> - <<>>; -remove_last_segment(B) -> - {Init, Last} = split_binary(B, byte_size(B) - 1), - case Last of - <<$/>> -> - Init; - _Char -> - remove_last_segment(Init) - end. +-ifdef(NEED_is_query_1). +is_query(C) -> C =:= $/ orelse C =:= $? orelse is_pchar(C). +-ifndef(NEED_is_pchar_1). +-define(NEED_is_pchar_1, true). +-endif. +-endif. -%% RFC 3986, 6.2.3. Scheme-Based Normalization -normalize_scheme_based(Map) -> - Scheme = maps:get(scheme, Map, undefined), - Port = maps:get(port, Map, undefined), - Path= maps:get(path, Map, undefined), - normalize_scheme_based(Map, Scheme, Port, Path). -%% -normalize_scheme_based(Map, Scheme, Port, Path) - when Scheme =:= "http"; Scheme =:= <<"http">> -> - normalize_http(Map, Port, Path); -normalize_scheme_based(Map, Scheme, Port, Path) - when Scheme =:= "https"; Scheme =:= <<"https">> -> - normalize_https(Map, Port, Path); -normalize_scheme_based(Map, Scheme, Port, _Path) - when Scheme =:= "ftp"; Scheme =:= <<"ftp">> -> - normalize_ftp(Map, Port); -normalize_scheme_based(Map, Scheme, Port, _Path) - when Scheme =:= "ssh"; Scheme =:= <<"ssh">> -> - normalize_ssh_sftp(Map, Port); -normalize_scheme_based(Map, Scheme, Port, _Path) - when Scheme =:= "sftp"; Scheme =:= <<"sftp">> -> - normalize_ssh_sftp(Map, Port); -normalize_scheme_based(Map, Scheme, Port, _Path) - when Scheme =:= "tftp"; Scheme =:= <<"tftp">> -> - normalize_tftp(Map, Port); -normalize_scheme_based(Map, _, _, _) -> - Map. +-ifdef(NEED_is_fragment_1). +is_fragment(C) -> C =:= $/ orelse C =:= $? orelse is_pchar(C). + +-ifndef(NEED_is_pchar_1). +-define(NEED_is_pchar_1, true). +-endif. +-endif. + +-ifdef(NEED_is_pchar_1). +is_pchar(C) -> C =:= $% orelse C =:= $: orelse C =:= $@ orelse is_unreserved(C) orelse is_sub_delim(C). + +-ifndef(NEED_is_unreserved_1). +-define(NEED_is_unreserved_1, true). +-endif. +-ifndef(NEED_is_sub_delim_1). +-define(NEED_is_sub_delim_1, true). +-endif. +-endif. + +-ifdef(NEED_is_unreserved_1). +is_unreserved(C) -> C =:= $- orelse C =:= $. orelse C =:= $_ orelse C =:= $~ orelse is_alpha(C) orelse is_digit(C). + +-ifndef(NEED_is_alpha_1). +-define(NEED_is_alpha_1, true). +-endif. +-ifndef(NEED_is_digit_1). +-define(NEED_is_digit_1, true). +-endif. +-endif. +-ifdef(NEED_is_scheme_1). +is_scheme(C) -> C =:= $+ orelse C =:= $- orelse C =:= $. orelse is_alpha(C) orelse is_digit(C). -normalize_http(Map, Port, Path) -> - M1 = normalize_port(Map, Port, 80), - normalize_http_path(M1, Path). +-ifndef(NEED_is_alpha_1). +-define(NEED_is_alpha_1, true). +-endif. +-ifndef(NEED_is_digit_1). +-define(NEED_is_digit_1, true). +-endif. +-endif. +-ifdef(NEED_is_ipv4_1). +is_ipv4(C) -> C =:= $. orelse is_digit(C). -normalize_https(Map, Port, Path) -> - M1 = normalize_port(Map, Port, 443), - normalize_http_path(M1, Path). +-ifndef(NEED_is_digit_1). +-define(NEED_is_digit_1, true). +-endif. +-endif. +-ifdef(NEED_is_ipv6_1). +is_ipv6(C) -> C =:= $: orelse C =:= $. orelse is_hex_digit(C). -normalize_ftp(Map, Port) -> - normalize_port(Map, Port, 21). +-ifndef(NEED_is_hex_digit_1). +-define(NEED_is_hex_digit_1, true). +-endif. +-endif. +-ifdef(NEED_is_alpha_1). +is_alpha(C) -> C >= $A andalso C =< $Z orelse C >= $a andalso C =< $z. +-endif. -normalize_ssh_sftp(Map, Port) -> - normalize_port(Map, Port, 22). +-ifdef(NEED_is_digit_1). +is_digit(C) -> C >= $0 andalso C =< $9. +-endif. +-ifdef(NEED_is_hex_digit_1). +is_hex_digit(C) -> C >= $0 andalso C =< $9 orelse C >= $a andalso C =< $f orelse C >= $A andalso C =< $F. +-endif. -normalize_tftp(Map, Port) -> - normalize_port(Map, Port, 69). +-ifdef(NEED_is_sub_delim_1). +is_sub_delim(C) -> + C =:= $! orelse C =:= $$ orelse C =:= $& orelse C =:= $' orelse C =:= $( orelse C =:= $) orelse + C =:= $* orelse C =:= $+ orelse C =:= $, orelse C =:= $; orelse C =:= $=. +-endif. +-ifdef(NEED_is_reserved_1). +-spec is_reserved(char()) -> boolean(). +is_reserved(C) -> + C =:= $: orelse C =:= $/ orelse C =:= $? orelse C =:= $# orelse C =:= $[ orelse C =:= $] orelse + C =:= $@ orelse C =:= $! orelse C =:= $$ orelse C =:= $& orelse C =:= $' orelse C =:= $( orelse + C =:= $) orelse C =:= $* orelse C =:= $+ orelse C =:= $, orelse C =:= $: orelse C =:= $=. +-endif. -normalize_port(Map, Port, Default) -> - case Port of - Default -> - maps:remove(port, Map); - _Else -> - Map +-ifdef(NEED_encode_2). +encode(Component, Fun) when is_list(Component) -> + unicode:characters_to_list(encode(unicode:characters_to_binary(Component), Fun, <<>>)); +encode(Component, Fun) when is_binary(Component) -> encode(Component, Fun, <<>>). + +encode(<>, Fun, Acc) -> + encode(Rest, Fun, <>); +encode(<<>>, _Fun, Acc) -> Acc; +encode(B, _Fun, _Acc) when is_binary(B) -> throw({error, invalid_input, B}). + +-compile({inline, encode_codepoint_binary/2}). +encode_codepoint_binary(C, Fun) -> + case Fun(C) of + false -> percent_encode_binary(<>, <<>>); + true -> <> end. +-ifndef(NEED_percent_encode_binary_2). +-define(NEED_percent_encode_binary_2, true). +-endif. +-endif. + +-ifdef(NEED_percent_encode_binary_2). +percent_encode_binary(<>, Acc) -> + percent_encode_binary(Rest, <>); +percent_encode_binary(<<>>, Acc) -> Acc. +-endif. + +-ifdef(NEED_normalize_path_segment_1). +normalize_path_segment(Map) -> maps:update_with(path, fun remove_dot_segments/1, Map). -normalize_http_path(Map, Path) -> - case Path of - "" -> - Map#{path => "/"}; - <<>> -> - Map#{path => <<"/">>}; - _Else -> - Map +remove_dot_segments(Path) when is_binary(Path) -> remove_dot_segments(Path, <<>>); +remove_dot_segments(Path) when is_list(Path) -> + convert_to_list(remove_dot_segments(convert_to_binary(Path, utf8, utf8), <<>>), utf8). + +remove_dot_segments(<<>>, Output) -> Output; +remove_dot_segments(<<"../", T/binary>>, Output) -> remove_dot_segments(T, Output); +remove_dot_segments(<<"./", T/binary>>, Output) -> remove_dot_segments(T, Output); +remove_dot_segments(<<"/./", T/binary>>, Output) -> remove_dot_segments(<<$/, T/binary>>, Output); +remove_dot_segments(<<"/.">>, Output) -> remove_dot_segments(<<$/>>, Output); +remove_dot_segments(<<"/../", T/binary>>, Output) -> remove_dot_segments(<<$/, T/binary>>, remove_last_segment(Output)); +remove_dot_segments(<<"/..">>, Output) -> remove_dot_segments(<<$/>>, remove_last_segment(Output)); +remove_dot_segments(B, Output) when B =:= <<$.>>; B =:= <<"..">> -> remove_dot_segments(<<>>, Output); +remove_dot_segments(<> = Input, Output) -> + {First, Rest} = split_binary(Input, byte_size(first_path_segment_end(T, <>))), + remove_dot_segments(Rest, <>). + +remove_last_segment(<<>>) -> <<>>; +remove_last_segment(B) -> + S = byte_size(B) - 1, + case B of + <> -> Init; + <> -> remove_last_segment(Init) + end. + +first_path_segment_end(<<$/, _/binary>>, Acc) -> Acc; +first_path_segment_end(<>, Acc) -> first_path_segment_end(<>, <>); +first_path_segment_end(<<>>, Acc) -> Acc. + +-ifndef(NEED_convert_to_binary_3). +-define(NEED_convert_to_binary_3, true). +-endif. +-ifndef(NEED_convert_to_list_2). +-define(NEED_convert_to_list_2, true). +-endif. +-endif. + +-ifdef(NEED_convert_to_binary_3). +convert_to_binary(Binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of + {T, _List, RestData} when T =:= error; T =:= incomplete -> throw({error, invalid_input, RestData}); + Result -> Result + end. +-endif. + +-ifdef(NEED_convert_to_list_2). +convert_to_list(Binary, InEncoding) -> + case unicode:characters_to_list(Binary, InEncoding) of + {T, _List, RestData} when T =:= error; T =:= incomplete -> throw({error, invalid_input, RestData}); + Result -> Result end. -endif.