diff options
author | Daniel Stenberg <daniel@haxx.se> | 2016-05-08 15:11:10 +0200 |
---|---|---|
committer | Daniel Stenberg <daniel@haxx.se> | 2016-05-30 23:13:55 +0200 |
commit | 5409e1d793de755c7433336b80b0c8370a359d45 (patch) | |
tree | 45a9791d3bf2aa623e0999c5b13128b9d967f810 | |
parent | ed8b8f2456fc485fa81fb3d3eaef684121bb1aef (diff) |
URL parser: allow URLs to use one, two or three slashes
Mostly in order to support broken web sites that redirect to broken URLs
that are accepted by browsers.
Browsers are typically even more leniant than this as the WHATWG URL
spec they should allow an _infinite_ amount. I tested 8000 slashes with
Firefox and it just worked.
Added test case 1141, 1142 and 1143 to verify the new parser.
Closes #791
-rw-r--r-- | lib/url.c | 30 | ||||
-rw-r--r-- | tests/data/Makefile.inc | 2 | ||||
-rw-r--r-- | tests/data/test1141 | 67 | ||||
-rw-r--r-- | tests/data/test1142 | 62 | ||||
-rw-r--r-- | tests/data/test1143 | 45 |
5 files changed, 200 insertions, 6 deletions
@@ -4141,12 +4141,17 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, } else { /* clear path */ + char slashbuf[4]; path[0]=0; - if(2 > sscanf(data->change.url, - "%15[^\n:]://%[^\n/?]%[^\n]", - protobuf, - conn->host.name, path)) { + rc = sscanf(data->change.url, + "%15[^\n:]:%3[/]%[^\n/?]%[^\n]", + protobuf, slashbuf, conn->host.name, path); + if(2 == rc) { + failf(data, "Bad URL"); + return CURLE_URL_MALFORMAT; + } + if(3 > rc) { /* * The URL was badly formatted, let's try the browser-style _without_ @@ -4197,8 +4202,23 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, *prot_missing = TRUE; /* not given in URL */ } - else + else { + size_t s = strlen(slashbuf); protop = protobuf; + if(s != 2) { + infof(data, "Unwillingly accepted illegal URL using %d slash%s!\n", + s, s>1?"es":""); + + if(data->change.url_alloc) + free(data->change.url); + /* repair the URL to use two slashes */ + data->change.url = aprintf("%s://%s%s", + protobuf, conn->host.name, path); + if(!data->change.url) + return CURLE_OUT_OF_MEMORY; + data->change.url_alloc = TRUE; + } + } } /* We search for '?' in the host name (but only on the right side of a diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc index e3b5a880f..aa82227a6 100644 --- a/tests/data/Makefile.inc +++ b/tests/data/Makefile.inc @@ -119,7 +119,7 @@ test1104 test1105 test1106 test1107 test1108 test1109 test1110 test1111 \ test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 \ test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 \ test1128 test1129 test1130 test1131 test1132 test1133 test1134 test1135 \ -test1136 test1137 test1138 test1139 test1140 \ +test1136 test1137 test1138 test1139 test1140 test1141 test1142 test1143 \ \ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \ test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \ diff --git a/tests/data/test1141 b/tests/data/test1141 new file mode 100644 index 000000000..31c505f66 --- /dev/null +++ b/tests/data/test1141 @@ -0,0 +1,67 @@ +<testcase> +<info> +<keywords> +HTTP +HTTP GET +followlocation +</keywords> +</info> +# Server-side +<reply> +<data> +HTTP/1.1 302 This is a weirdo text message +Connection: close +Location: http:///foo.example.com/want/11410001 + +This server reply is for testing +</data> +<data1> +HTTP/1.1 200 hello +Connection: close +Content-Length: 4 + +hej +</data1> +<datacheck> +HTTP/1.1 302 This is a weirdo text message +Connection: close +Location: http:///foo.example.com/want/11410001 + +HTTP/1.1 200 hello +Connection: close +Content-Length: 4 + +hej +</datacheck> +</reply> + +# Client-side +<client> +<server> +http +</server> + <name> +HTTP redirect to http:/// (three slashes!) + </name> + <command> +%HOSTIP:%HTTPPORT/want/1141 -L -x http://%HOSTIP:%HTTPPORT +</command> +</client> + +# Verify data after the test has been "shot" +<verify> +<strip> +^User-Agent:.* +</strip> +<protocol> +GET http://%HOSTIP:%HTTPPORT/want/1141 HTTP/1.1
+Host: %HOSTIP:%HTTPPORT
+Accept: */*
+
+GET http://foo.example.com/want/11410001 HTTP/1.1
+Host: foo.example.com
+Accept: */*
+
+</protocol> +</verify> +</testcase> diff --git a/tests/data/test1142 b/tests/data/test1142 new file mode 100644 index 000000000..ebb0891b6 --- /dev/null +++ b/tests/data/test1142 @@ -0,0 +1,62 @@ +<testcase> +<info> +<keywords> +HTTP +HTTP GET +followlocation +</keywords> +</info> +# Server-side +<reply> +<data> +HTTP/1.1 302 This is a weirdo text message +Connection: close +Location: http:////foo.example.com/want/11420001 + +This server reply is for testing +</data> +<data1> +HTTP/1.1 200 hello +Connection: close +Content-Length: 4 + +hej +</data1> +<datacheck> +HTTP/1.1 302 This is a weirdo text message +Connection: close +Location: http:////foo.example.com/want/11420001 + +</datacheck> +</reply> + +# Client-side +<client> +<server> +http +</server> + <name> +HTTP redirect to http://// (four slashes!) + </name> + <command> +%HOSTIP:%HTTPPORT/want/1142 -L -x http://%HOSTIP:%HTTPPORT +</command> +</client> + +# Verify data after the test has been "shot" +<verify> +<strip> +^User-Agent:.* +</strip> +<protocol> +GET http://%HOSTIP:%HTTPPORT/want/1142 HTTP/1.1
+Host: %HOSTIP:%HTTPPORT
+Accept: */*
+
+</protocol> +# 3, CURLE_URL_MALFORMAT for the four slashes +<errorcode> +3 +</errorcode> +</verify> +</testcase> diff --git a/tests/data/test1143 b/tests/data/test1143 new file mode 100644 index 000000000..4f2f4435a --- /dev/null +++ b/tests/data/test1143 @@ -0,0 +1,45 @@ +<testcase> +<info> +<keywords> +HTTP +HTTP GET +followlocation +</keywords> +</info> +# Server-side +<reply> +<data> +HTTP/1.1 200 hello +Connection: close +Content-Length: 4 + +hej +</data> +</reply> + +# Client-side +<client> +<server> +http +</server> + <name> +HTTP URL with http:/ (one slash!) + </name> + <command> +http:/%HOSTIP:%HTTPPORT/want/1143 +</command> +</client> + +# Verify data after the test has been "shot" +<verify> +<strip> +^User-Agent:.* +</strip> +<protocol> +GET /want/1143 HTTP/1.1
+Host: %HOSTIP:%HTTPPORT
+Accept: */*
+
+</protocol> +</verify> +</testcase> |