aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2013-06-15 23:47:02 +0200
committerDaniel Stenberg <daniel@haxx.se>2013-06-22 14:15:07 +0200
commit7877619f856a04af0519e92780b1d6674a8ff3f7 (patch)
tree6b1c1f0c5ef5761494be6d5d2851a6c196917898
parentec248b590df3ac2e6873ea9c7507eff8e5044825 (diff)
dotdot: introducing dot file path cleanup
RFC3986 details how a path part passed in as part of a URI should be "cleaned" from dot sequences before getting used. The described algorithm is now implemented in lib/dotdot.c with the accompanied test case in test 1395. Bug: http://curl.haxx.se/bug/view.cgi?id=1200 Reported-by: Alex Vinnik
-rw-r--r--lib/Makefile.inc4
-rw-r--r--lib/dotdot.c170
-rw-r--r--lib/dotdot.h25
-rw-r--r--lib/url.c36
-rw-r--r--tests/data/Makefile.am2
-rw-r--r--tests/data/test123161
-rw-r--r--tests/data/test139526
-rw-r--r--tests/unit/Makefile.inc5
-rw-r--r--tests/unit/unit1395.c87
9 files changed, 402 insertions, 14 deletions
diff --git a/lib/Makefile.inc b/lib/Makefile.inc
index f76e1ec83..4228bf6b8 100644
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@@ -25,7 +25,7 @@ CSOURCES = file.c timeval.c base64.c hostip.c progress.c formdata.c \
http_proxy.c non-ascii.c asyn-ares.c asyn-thread.c curl_gssapi.c \
curl_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_ntlm_msgs.c \
curl_sasl.c curl_schannel.c curl_multibyte.c curl_darwinssl.c \
- hostcheck.c bundles.c conncache.c pipeline.c
+ hostcheck.c bundles.c conncache.c pipeline.c dotdot.c
HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \
progress.h formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h \
@@ -44,4 +44,4 @@ HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \
asyn.h curl_ntlm.h curl_gssapi.h curl_ntlm_wb.h curl_ntlm_core.h \
curl_ntlm_msgs.h curl_sasl.h curl_schannel.h curl_multibyte.h \
curl_darwinssl.h hostcheck.h bundles.h conncache.h curl_setup_once.h \
- multihandle.h setup-vms.h pipeline.h
+ multihandle.h setup-vms.h pipeline.h dotdot.h
diff --git a/lib/dotdot.c b/lib/dotdot.c
new file mode 100644
index 000000000..95b636780
--- /dev/null
+++ b/lib/dotdot.c
@@ -0,0 +1,170 @@
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#include "dotdot.h"
+
+#include "curl_memory.h"
+/* The last #include file should be: */
+#include "memdebug.h"
+
+/*
+ * "Remove Dot Segments"
+ * http://tools.ietf.org/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * Curl_dedotdotify()
+ *
+ * This function gets a zero-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.5.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+char *Curl_dedotdotify(char *input)
+{
+ size_t inlen = strlen(input);
+ char *clone;
+ size_t clen = inlen; /* the length of the cloned input */
+ char *out = malloc(inlen+1);
+ char *outp;
+ char *orgclone;
+ char *queryp;
+ if(!out)
+ return NULL; /* out of memory */
+
+ /* get a cloned copy of the input */
+ clone = strdup(input);
+ if(!clone) {
+ free(out);
+ return NULL;
+ }
+ orgclone = clone;
+ outp = out;
+
+ /*
+ * To handle query-parts properly, we must find it and remove it during the
+ * dotdot-operation and then append it again at the end to the output
+ * string.
+ */
+ queryp = strchr(clone, '?');
+ if(queryp)
+ *queryp = 0;
+
+ do {
+
+ /* A. If the input buffer begins with a prefix of "../" or "./", then
+ remove that prefix from the input buffer; otherwise, */
+
+ if(!strncmp("./", clone, 2)) {
+ clone+=2;
+ clen-=2;
+ }
+ else if(!strncmp("../", clone, 3)) {
+ clone+=3;
+ clen-=3;
+ }
+
+ /* B. if the input buffer begins with a prefix of "/./" or "/.", where
+ "." is a complete path segment, then replace that prefix with "/" in
+ the input buffer; otherwise, */
+ else if(!strncmp("/./", clone, 3)) {
+ clone+=2;
+ clen-=2;
+ }
+ else if(!strcmp("/.", clone)) {
+ clone[1]='/';
+ clone++;
+ clen-=1;
+ }
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..", where
+ ".." is a complete path segment, then replace that prefix with "/" in
+ the input buffer and remove the last segment and its preceding "/" (if
+ any) from the output buffer; otherwise, */
+
+ else if(!strncmp("/../", clone, 4)) {
+ clone+=3;
+ clen-=3;
+ /* remove the last segment from the output buffer */
+ while(outp > out) {
+ outp--;
+ if(*outp == '/')
+ break;
+ }
+ *outp = 0; /* zero-terminate where it stops */
+ }
+ else if(!strcmp("/..", clone)) {
+ clone[2]='/';
+ clone+=2;
+ clen-=2;
+ /* remove the last segment from the output buffer */
+ while(outp > out) {
+ outp--;
+ if(*outp == '/')
+ break;
+ }
+ *outp = 0; /* zero-terminate where it stops */
+ }
+
+ /* D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise, */
+
+ else if(!strcmp(".", clone) || !strcmp("..", clone)) {
+ *clone=0;
+ }
+
+ else {
+ /* E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if any) and
+ any subsequent characters up to, but not including, the next "/"
+ character or the end of the input buffer. */
+
+ do {
+ *outp++ = *clone++;
+ clen--;
+ } while(*clone && (*clone != '/'));
+ *outp=0;
+ }
+
+ } while(*clone);
+
+ if(queryp) {
+ size_t qlen;
+ /* There was a query part, append that to the output. The 'clone' string
+ may now have been altered so we copy from the original input string
+ from the correct index. */
+ size_t oindex = queryp - orgclone;
+ qlen = strlen(&input[oindex]);
+ memcpy(outp, &input[oindex], qlen+1); /* include the ending zero byte */
+ }
+
+ free(orgclone);
+ return out;
+}
diff --git a/lib/dotdot.h b/lib/dotdot.h
new file mode 100644
index 000000000..c3487c137
--- /dev/null
+++ b/lib/dotdot.h
@@ -0,0 +1,25 @@
+#ifndef HEADER_CURL_DOTDOT_H
+#define HEADER_CURL_DOTDOT_H
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+char *Curl_dedotdotify(char *input);
+#endif
diff --git a/lib/url.c b/lib/url.c
index 7ba496986..e1c9dffe2 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -124,6 +124,7 @@ int curl_win32_idn_to_ascii(const char *in, char **out);
#include "conncache.h"
#include "multihandle.h"
#include "pipeline.h"
+#include "dotdot.h"
#define _MPRINTF_REPLACE /* use our functions only */
#include <curl/mprintf.h>
@@ -3674,7 +3675,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
char protobuf[16];
const char *protop;
CURLcode result;
- bool fix_slash = FALSE;
+ bool rebuild_url = FALSE;
*prot_missing = FALSE;
@@ -3825,14 +3826,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
memcpy(path+1, query, hostlen);
path[0]='/'; /* prepend the missing slash */
- fix_slash = TRUE;
+ rebuild_url = TRUE;
*query=0; /* now cut off the hostname at the ? */
}
else if(!path[0]) {
/* if there's no path set, use a single slash */
strcpy(path, "/");
- fix_slash = TRUE;
+ rebuild_url = TRUE;
}
/* If the URL is malformatted (missing a '/' after hostname before path) we
@@ -3845,17 +3846,30 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
is bigger than the path. Use +1 to move the zero byte too. */
memmove(&path[1], path, strlen(path)+1);
path[0] = '/';
- fix_slash = TRUE;
+ rebuild_url = TRUE;
+ }
+ else {
+ /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
+ char *newp = Curl_dedotdotify(path);
+
+ if(strcmp(newp, path)) {
+ rebuild_url = TRUE;
+ free(data->state.pathbuffer);
+ data->state.pathbuffer = newp;
+ data->state.path = newp;
+ path = newp;
+ }
+ else
+ free(newp);
}
-
/*
- * "fix_slash" means that the URL was malformatted so we need to generate an
- * updated version with the new slash inserted at the right place! We need
- * the corrected URL when communicating over HTTP proxy and we don't know at
- * this point if we're using a proxy or not.
+ * "rebuild_url" means that one or more URL components have been modified so
+ * we need to generate an updated full version. We need the corrected URL
+ * when communicating over HTTP proxy and we don't know at this point if
+ * we're using a proxy or not.
*/
- if(fix_slash) {
+ if(rebuild_url) {
char *reurl;
size_t plen = strlen(path); /* new path, should be 1 byte longer than
@@ -3878,6 +3892,8 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
data->change.url_alloc = FALSE;
}
+ infof(data, "Rebuilt URL to: %s\n", reurl);
+
data->change.url = reurl;
data->change.url_alloc = TRUE; /* free this later */
}
diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am
index e96bc9ba1..ecfee4850 100644
--- a/tests/data/Makefile.am
+++ b/tests/data/Makefile.am
@@ -93,7 +93,7 @@ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \
test1216 test1217 test1218 test1219 \
test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 \
-test1228 test1229 test1230 \
+test1228 test1229 test1230 test1231 \
\
test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 \
test1308 test1309 test1310 test1311 test1312 test1313 test1314 test1315 \
diff --git a/tests/data/test1231 b/tests/data/test1231
new file mode 100644
index 000000000..16533a851
--- /dev/null
+++ b/tests/data/test1231
@@ -0,0 +1,61 @@
+<testcase>
+<info>
+<keywords>
+HTTP
+HTTP GET
+dotdot removal
+</keywords>
+</info>
+
+#
+# Server-side
+<reply name="1">
+<data>
+HTTP/1.1 200 OK
+Content-Length: 6
+Connection: close
+
+-foo-
+</data>
+
+<data1>
+HTTP/1.1 200 OK
+Content-Length: 7
+Connection: close
+
+-cool-
+</data1>
+</reply>
+
+#
+# Client-side
+<client>
+<server>
+http
+</server>
+ <name>
+HTTP URL with dotdot removal from path
+ </name>
+ <command>
+http://%HOSTIP:%HTTPPORT/../../hej/but/who/../1231?stupid=me/../1231#soo/../1231 http://%HOSTIP:%HTTPPORT/../../hej/but/who/../12310001#/../12310001
+</command>
+</client>
+
+#
+# Verify data after the test has been "shot"
+<verify>
+<strip>
+^User-Agent:.*
+</strip>
+<protocol>
+GET /hej/but/1231?stupid=me/../1231 HTTP/1.1
+Host: %HOSTIP:%HTTPPORT
+Accept: */*
+
+GET /hej/but/12310001 HTTP/1.1
+Host: %HOSTIP:%HTTPPORT
+Accept: */*
+
+</protocol>
+</verify>
+</testcase>
diff --git a/tests/data/test1395 b/tests/data/test1395
new file mode 100644
index 000000000..967c8d492
--- /dev/null
+++ b/tests/data/test1395
@@ -0,0 +1,26 @@
+<testcase>
+<info>
+<keywords>
+unittest
+</keywords>
+</info>
+
+#
+# Client-side
+<client>
+<server>
+none
+</server>
+<features>
+unittest
+</features>
+ <name>
+Curl_dedotdotify
+ </name>
+<tool>
+unit1395
+</tool>
+
+</client>
+
+</testcase>
diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc
index 4b3f903e3..4c06fcf86 100644
--- a/tests/unit/Makefile.inc
+++ b/tests/unit/Makefile.inc
@@ -6,7 +6,7 @@ UNITFILES = curlcheck.h \
# These are all unit test programs
UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \
- unit1308 unit1309 unit1330 unit1394 unit1396
+ unit1308 unit1309 unit1330 unit1394 unit1395 unit1396
unit1300_SOURCES = unit1300.c $(UNITFILES)
unit1300_CPPFLAGS = $(AM_CPPFLAGS)
@@ -44,5 +44,8 @@ unit1394_LDADD = @LIBMETALINK_LIBS@ $(top_builddir)/lib/libcurl.la @LIBCURL_LIBS
unit1394_LDFLAGS = @LIBMETALINK_LDFLAGS@ $(top_builddir)/src/libcurltool.la
unit1394_LIBS =
+unit1395_SOURCES = unit1395.c $(UNITFILES)
+unit1395_CPPFLAGS = $(AM_CPPFLAGS)
+
unit1396_SOURCES = unit1396.c $(UNITFILES)
unit1396_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/tests/unit/unit1395.c b/tests/unit/unit1395.c
new file mode 100644
index 000000000..8b0b0a08a
--- /dev/null
+++ b/tests/unit/unit1395.c
@@ -0,0 +1,87 @@
+/***************************************************************************
+ * _ _ ____ _
+ * Project ___| | | | _ \| |
+ * / __| | | | |_) | |
+ * | (__| |_| | _ <| |___
+ * \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+#include "curlcheck.h"
+
+#include "dotdot.h"
+
+#include "memdebug.h"
+
+static CURLcode unit_setup(void)
+{
+ return CURLE_OK;
+}
+
+static void unit_stop(void)
+{
+
+}
+
+struct dotdot {
+ const char *input;
+ const char *output;
+};
+
+UNITTEST_START
+
+ unsigned int i;
+ int fails=0;
+ struct dotdot pairs[] = {
+ { "/a/b/c/./../../g", "/a/g" },
+ { "mid/content=5/../6", "mid/6" },
+ { "/hello/../moo", "/moo" },
+ { "/1/../1", "/1" },
+ { "/1/./1", "/1/1" },
+ { "/1/..", "/" },
+ { "/1/.", "/1/" },
+ { "/1/./..", "/" },
+ { "/1/./../2", "/2" },
+ { "/hello/1/./../2", "/hello/2" },
+ { "test/this", "test/this" },
+ { "test/this/../now", "test/now" },
+ { "/1../moo../foo", "/1../moo../foo"},
+ { "/../../moo", "/moo"},
+ { "/../../moo?andnot/../yay", "/moo?andnot/../yay"},
+ { "/123?foo=/./&bar=/../", "/123?foo=/./&bar=/../"},
+ { "/../moo/..?what", "/?what" },
+ };
+
+ for(i=0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
+ char *out = Curl_dedotdotify((char *)pairs[i].input);
+
+ if(strcmp(out, pairs[i].output)) {
+ fprintf(stderr, "Test %d: '%s' gave '%s' instead of '%s'\n",
+ i, pairs[i].input, out, pairs[i].output);
+ fail("Test case output mismatched");
+ fails++;
+ }
+ else
+ fprintf(stderr, "Test %d: OK\n", i);
+ free(out);
+ }
+
+ return fails;
+
+UNITTEST_STOP
+
+
+
+