From 7877619f856a04af0519e92780b1d6674a8ff3f7 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Sat, 15 Jun 2013 23:47:02 +0200 Subject: dotdot: introducing dot file path cleanup RFC3986 details how a path part passed in as part of a URI should be "cleaned" from dot sequences before getting used. The described algorithm is now implemented in lib/dotdot.c with the accompanied test case in test 1395. Bug: http://curl.haxx.se/bug/view.cgi?id=1200 Reported-by: Alex Vinnik --- lib/Makefile.inc | 4 +- lib/dotdot.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/dotdot.h | 25 +++++++ lib/url.c | 36 +++++++--- tests/data/Makefile.am | 2 +- tests/data/test1231 | 61 +++++++++++++++++ tests/data/test1395 | 26 ++++++++ tests/unit/Makefile.inc | 5 +- tests/unit/unit1395.c | 87 +++++++++++++++++++++++++ 9 files changed, 402 insertions(+), 14 deletions(-) create mode 100644 lib/dotdot.c create mode 100644 lib/dotdot.h create mode 100644 tests/data/test1231 create mode 100644 tests/data/test1395 create mode 100644 tests/unit/unit1395.c diff --git a/lib/Makefile.inc b/lib/Makefile.inc index f76e1ec83..4228bf6b8 100644 --- a/lib/Makefile.inc +++ b/lib/Makefile.inc @@ -25,7 +25,7 @@ CSOURCES = file.c timeval.c base64.c hostip.c progress.c formdata.c \ http_proxy.c non-ascii.c asyn-ares.c asyn-thread.c curl_gssapi.c \ curl_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_ntlm_msgs.c \ curl_sasl.c curl_schannel.c curl_multibyte.c curl_darwinssl.c \ - hostcheck.c bundles.c conncache.c pipeline.c + hostcheck.c bundles.c conncache.c pipeline.c dotdot.c HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \ progress.h formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h \ @@ -44,4 +44,4 @@ HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \ asyn.h curl_ntlm.h curl_gssapi.h curl_ntlm_wb.h curl_ntlm_core.h \ curl_ntlm_msgs.h curl_sasl.h curl_schannel.h curl_multibyte.h \ curl_darwinssl.h hostcheck.h bundles.h conncache.h curl_setup_once.h \ - multihandle.h setup-vms.h pipeline.h + multihandle.h setup-vms.h pipeline.h dotdot.h diff --git a/lib/dotdot.c b/lib/dotdot.c new file mode 100644 index 000000000..95b636780 --- /dev/null +++ b/lib/dotdot.c @@ -0,0 +1,170 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2013, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#include "curl_setup.h" + +#include "dotdot.h" + +#include "curl_memory.h" +/* The last #include file should be: */ +#include "memdebug.h" + +/* + * "Remove Dot Segments" + * http://tools.ietf.org/html/rfc3986#section-5.2.4 + */ + +/* + * Curl_dedotdotify() + * + * This function gets a zero-terminated path with dot and dotdot sequences + * passed in and strips them off according to the rules in RFC 3986 section + * 5.2.5. + * + * The function handles a query part ('?' + stuff) appended but it expects + * that fragments ('#' + stuff) have already been cut off. + * + * RETURNS + * + * an allocated dedotdotified output string + */ +char *Curl_dedotdotify(char *input) +{ + size_t inlen = strlen(input); + char *clone; + size_t clen = inlen; /* the length of the cloned input */ + char *out = malloc(inlen+1); + char *outp; + char *orgclone; + char *queryp; + if(!out) + return NULL; /* out of memory */ + + /* get a cloned copy of the input */ + clone = strdup(input); + if(!clone) { + free(out); + return NULL; + } + orgclone = clone; + outp = out; + + /* + * To handle query-parts properly, we must find it and remove it during the + * dotdot-operation and then append it again at the end to the output + * string. + */ + queryp = strchr(clone, '?'); + if(queryp) + *queryp = 0; + + do { + + /* A. If the input buffer begins with a prefix of "../" or "./", then + remove that prefix from the input buffer; otherwise, */ + + if(!strncmp("./", clone, 2)) { + clone+=2; + clen-=2; + } + else if(!strncmp("../", clone, 3)) { + clone+=3; + clen-=3; + } + + /* B. if the input buffer begins with a prefix of "/./" or "/.", where + "." is a complete path segment, then replace that prefix with "/" in + the input buffer; otherwise, */ + else if(!strncmp("/./", clone, 3)) { + clone+=2; + clen-=2; + } + else if(!strcmp("/.", clone)) { + clone[1]='/'; + clone++; + clen-=1; + } + + /* C. if the input buffer begins with a prefix of "/../" or "/..", where + ".." is a complete path segment, then replace that prefix with "/" in + the input buffer and remove the last segment and its preceding "/" (if + any) from the output buffer; otherwise, */ + + else if(!strncmp("/../", clone, 4)) { + clone+=3; + clen-=3; + /* remove the last segment from the output buffer */ + while(outp > out) { + outp--; + if(*outp == '/') + break; + } + *outp = 0; /* zero-terminate where it stops */ + } + else if(!strcmp("/..", clone)) { + clone[2]='/'; + clone+=2; + clen-=2; + /* remove the last segment from the output buffer */ + while(outp > out) { + outp--; + if(*outp == '/') + break; + } + *outp = 0; /* zero-terminate where it stops */ + } + + /* D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, */ + + else if(!strcmp(".", clone) || !strcmp("..", clone)) { + *clone=0; + } + + else { + /* E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if any) and + any subsequent characters up to, but not including, the next "/" + character or the end of the input buffer. */ + + do { + *outp++ = *clone++; + clen--; + } while(*clone && (*clone != '/')); + *outp=0; + } + + } while(*clone); + + if(queryp) { + size_t qlen; + /* There was a query part, append that to the output. The 'clone' string + may now have been altered so we copy from the original input string + from the correct index. */ + size_t oindex = queryp - orgclone; + qlen = strlen(&input[oindex]); + memcpy(outp, &input[oindex], qlen+1); /* include the ending zero byte */ + } + + free(orgclone); + return out; +} diff --git a/lib/dotdot.h b/lib/dotdot.h new file mode 100644 index 000000000..c3487c137 --- /dev/null +++ b/lib/dotdot.h @@ -0,0 +1,25 @@ +#ifndef HEADER_CURL_DOTDOT_H +#define HEADER_CURL_DOTDOT_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2013, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +char *Curl_dedotdotify(char *input); +#endif diff --git a/lib/url.c b/lib/url.c index 7ba496986..e1c9dffe2 100644 --- a/lib/url.c +++ b/lib/url.c @@ -124,6 +124,7 @@ int curl_win32_idn_to_ascii(const char *in, char **out); #include "conncache.h" #include "multihandle.h" #include "pipeline.h" +#include "dotdot.h" #define _MPRINTF_REPLACE /* use our functions only */ #include @@ -3674,7 +3675,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, char protobuf[16]; const char *protop; CURLcode result; - bool fix_slash = FALSE; + bool rebuild_url = FALSE; *prot_missing = FALSE; @@ -3825,14 +3826,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, memcpy(path+1, query, hostlen); path[0]='/'; /* prepend the missing slash */ - fix_slash = TRUE; + rebuild_url = TRUE; *query=0; /* now cut off the hostname at the ? */ } else if(!path[0]) { /* if there's no path set, use a single slash */ strcpy(path, "/"); - fix_slash = TRUE; + rebuild_url = TRUE; } /* If the URL is malformatted (missing a '/' after hostname before path) we @@ -3845,17 +3846,30 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, is bigger than the path. Use +1 to move the zero byte too. */ memmove(&path[1], path, strlen(path)+1); path[0] = '/'; - fix_slash = TRUE; + rebuild_url = TRUE; + } + else { + /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ + char *newp = Curl_dedotdotify(path); + + if(strcmp(newp, path)) { + rebuild_url = TRUE; + free(data->state.pathbuffer); + data->state.pathbuffer = newp; + data->state.path = newp; + path = newp; + } + else + free(newp); } - /* - * "fix_slash" means that the URL was malformatted so we need to generate an - * updated version with the new slash inserted at the right place! We need - * the corrected URL when communicating over HTTP proxy and we don't know at - * this point if we're using a proxy or not. + * "rebuild_url" means that one or more URL components have been modified so + * we need to generate an updated full version. We need the corrected URL + * when communicating over HTTP proxy and we don't know at this point if + * we're using a proxy or not. */ - if(fix_slash) { + if(rebuild_url) { char *reurl; size_t plen = strlen(path); /* new path, should be 1 byte longer than @@ -3878,6 +3892,8 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, data->change.url_alloc = FALSE; } + infof(data, "Rebuilt URL to: %s\n", reurl); + data->change.url = reurl; data->change.url_alloc = TRUE; /* free this later */ } diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index e96bc9ba1..ecfee4850 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -93,7 +93,7 @@ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \ test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \ test1216 test1217 test1218 test1219 \ test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 \ -test1228 test1229 test1230 \ +test1228 test1229 test1230 test1231 \ \ test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 \ test1308 test1309 test1310 test1311 test1312 test1313 test1314 test1315 \ diff --git a/tests/data/test1231 b/tests/data/test1231 new file mode 100644 index 000000000..16533a851 --- /dev/null +++ b/tests/data/test1231 @@ -0,0 +1,61 @@ + + + +HTTP +HTTP GET +dotdot removal + + + +# +# Server-side + + +HTTP/1.1 200 OK +Content-Length: 6 +Connection: close + +-foo- + + + +HTTP/1.1 200 OK +Content-Length: 7 +Connection: close + +-cool- + + + +# +# Client-side + + +http + + +HTTP URL with dotdot removal from path + + +http://%HOSTIP:%HTTPPORT/../../hej/but/who/../1231?stupid=me/../1231#soo/../1231 http://%HOSTIP:%HTTPPORT/../../hej/but/who/../12310001#/../12310001 + + + +# +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET /hej/but/1231?stupid=me/../1231 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* + +GET /hej/but/12310001 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* + + + + diff --git a/tests/data/test1395 b/tests/data/test1395 new file mode 100644 index 000000000..967c8d492 --- /dev/null +++ b/tests/data/test1395 @@ -0,0 +1,26 @@ + + + +unittest + + + +# +# Client-side + + +none + + +unittest + + +Curl_dedotdotify + + +unit1395 + + + + + diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc index 4b3f903e3..4c06fcf86 100644 --- a/tests/unit/Makefile.inc +++ b/tests/unit/Makefile.inc @@ -6,7 +6,7 @@ UNITFILES = curlcheck.h \ # These are all unit test programs UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \ - unit1308 unit1309 unit1330 unit1394 unit1396 + unit1308 unit1309 unit1330 unit1394 unit1395 unit1396 unit1300_SOURCES = unit1300.c $(UNITFILES) unit1300_CPPFLAGS = $(AM_CPPFLAGS) @@ -44,5 +44,8 @@ unit1394_LDADD = @LIBMETALINK_LIBS@ $(top_builddir)/lib/libcurl.la @LIBCURL_LIBS unit1394_LDFLAGS = @LIBMETALINK_LDFLAGS@ $(top_builddir)/src/libcurltool.la unit1394_LIBS = +unit1395_SOURCES = unit1395.c $(UNITFILES) +unit1395_CPPFLAGS = $(AM_CPPFLAGS) + unit1396_SOURCES = unit1396.c $(UNITFILES) unit1396_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/tests/unit/unit1395.c b/tests/unit/unit1395.c new file mode 100644 index 000000000..8b0b0a08a --- /dev/null +++ b/tests/unit/unit1395.c @@ -0,0 +1,87 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2013, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +#include "curlcheck.h" + +#include "dotdot.h" + +#include "memdebug.h" + +static CURLcode unit_setup(void) +{ + return CURLE_OK; +} + +static void unit_stop(void) +{ + +} + +struct dotdot { + const char *input; + const char *output; +}; + +UNITTEST_START + + unsigned int i; + int fails=0; + struct dotdot pairs[] = { + { "/a/b/c/./../../g", "/a/g" }, + { "mid/content=5/../6", "mid/6" }, + { "/hello/../moo", "/moo" }, + { "/1/../1", "/1" }, + { "/1/./1", "/1/1" }, + { "/1/..", "/" }, + { "/1/.", "/1/" }, + { "/1/./..", "/" }, + { "/1/./../2", "/2" }, + { "/hello/1/./../2", "/hello/2" }, + { "test/this", "test/this" }, + { "test/this/../now", "test/now" }, + { "/1../moo../foo", "/1../moo../foo"}, + { "/../../moo", "/moo"}, + { "/../../moo?andnot/../yay", "/moo?andnot/../yay"}, + { "/123?foo=/./&bar=/../", "/123?foo=/./&bar=/../"}, + { "/../moo/..?what", "/?what" }, + }; + + for(i=0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { + char *out = Curl_dedotdotify((char *)pairs[i].input); + + if(strcmp(out, pairs[i].output)) { + fprintf(stderr, "Test %d: '%s' gave '%s' instead of '%s'\n", + i, pairs[i].input, out, pairs[i].output); + fail("Test case output mismatched"); + fails++; + } + else + fprintf(stderr, "Test %d: OK\n", i); + free(out); + } + + return fails; + +UNITTEST_STOP + + + + -- cgit v1.2.3