diff options
author | Daniel Stenberg <daniel@haxx.se> | 2013-06-15 23:47:02 +0200 |
---|---|---|
committer | Daniel Stenberg <daniel@haxx.se> | 2013-06-22 14:15:07 +0200 |
commit | 7877619f856a04af0519e92780b1d6674a8ff3f7 (patch) | |
tree | 6b1c1f0c5ef5761494be6d5d2851a6c196917898 /lib | |
parent | ec248b590df3ac2e6873ea9c7507eff8e5044825 (diff) |
dotdot: introducing dot file path cleanup
RFC3986 details how a path part passed in as part of a URI should be
"cleaned" from dot sequences before getting used. The described
algorithm is now implemented in lib/dotdot.c with the accompanied test
case in test 1395.
Bug: http://curl.haxx.se/bug/view.cgi?id=1200
Reported-by: Alex Vinnik
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile.inc | 4 | ||||
-rw-r--r-- | lib/dotdot.c | 170 | ||||
-rw-r--r-- | lib/dotdot.h | 25 | ||||
-rw-r--r-- | lib/url.c | 36 |
4 files changed, 223 insertions, 12 deletions
diff --git a/lib/Makefile.inc b/lib/Makefile.inc index f76e1ec83..4228bf6b8 100644 --- a/lib/Makefile.inc +++ b/lib/Makefile.inc @@ -25,7 +25,7 @@ CSOURCES = file.c timeval.c base64.c hostip.c progress.c formdata.c \ http_proxy.c non-ascii.c asyn-ares.c asyn-thread.c curl_gssapi.c \ curl_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_ntlm_msgs.c \ curl_sasl.c curl_schannel.c curl_multibyte.c curl_darwinssl.c \ - hostcheck.c bundles.c conncache.c pipeline.c + hostcheck.c bundles.c conncache.c pipeline.c dotdot.c HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \ progress.h formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h \ @@ -44,4 +44,4 @@ HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h \ asyn.h curl_ntlm.h curl_gssapi.h curl_ntlm_wb.h curl_ntlm_core.h \ curl_ntlm_msgs.h curl_sasl.h curl_schannel.h curl_multibyte.h \ curl_darwinssl.h hostcheck.h bundles.h conncache.h curl_setup_once.h \ - multihandle.h setup-vms.h pipeline.h + multihandle.h setup-vms.h pipeline.h dotdot.h diff --git a/lib/dotdot.c b/lib/dotdot.c new file mode 100644 index 000000000..95b636780 --- /dev/null +++ b/lib/dotdot.c @@ -0,0 +1,170 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#include "curl_setup.h" + +#include "dotdot.h" + +#include "curl_memory.h" +/* The last #include file should be: */ +#include "memdebug.h" + +/* + * "Remove Dot Segments" + * http://tools.ietf.org/html/rfc3986#section-5.2.4 + */ + +/* + * Curl_dedotdotify() + * + * This function gets a zero-terminated path with dot and dotdot sequences + * passed in and strips them off according to the rules in RFC 3986 section + * 5.2.5. + * + * The function handles a query part ('?' + stuff) appended but it expects + * that fragments ('#' + stuff) have already been cut off. + * + * RETURNS + * + * an allocated dedotdotified output string + */ +char *Curl_dedotdotify(char *input) +{ + size_t inlen = strlen(input); + char *clone; + size_t clen = inlen; /* the length of the cloned input */ + char *out = malloc(inlen+1); + char *outp; + char *orgclone; + char *queryp; + if(!out) + return NULL; /* out of memory */ + + /* get a cloned copy of the input */ + clone = strdup(input); + if(!clone) { + free(out); + return NULL; + } + orgclone = clone; + outp = out; + + /* + * To handle query-parts properly, we must find it and remove it during the + * dotdot-operation and then append it again at the end to the output + * string. + */ + queryp = strchr(clone, '?'); + if(queryp) + *queryp = 0; + + do { + + /* A. If the input buffer begins with a prefix of "../" or "./", then + remove that prefix from the input buffer; otherwise, */ + + if(!strncmp("./", clone, 2)) { + clone+=2; + clen-=2; + } + else if(!strncmp("../", clone, 3)) { + clone+=3; + clen-=3; + } + + /* B. if the input buffer begins with a prefix of "/./" or "/.", where + "." is a complete path segment, then replace that prefix with "/" in + the input buffer; otherwise, */ + else if(!strncmp("/./", clone, 3)) { + clone+=2; + clen-=2; + } + else if(!strcmp("/.", clone)) { + clone[1]='/'; + clone++; + clen-=1; + } + + /* C. if the input buffer begins with a prefix of "/../" or "/..", where + ".." is a complete path segment, then replace that prefix with "/" in + the input buffer and remove the last segment and its preceding "/" (if + any) from the output buffer; otherwise, */ + + else if(!strncmp("/../", clone, 4)) { + clone+=3; + clen-=3; + /* remove the last segment from the output buffer */ + while(outp > out) { + outp--; + if(*outp == '/') + break; + } + *outp = 0; /* zero-terminate where it stops */ + } + else if(!strcmp("/..", clone)) { + clone[2]='/'; + clone+=2; + clen-=2; + /* remove the last segment from the output buffer */ + while(outp > out) { + outp--; + if(*outp == '/') + break; + } + *outp = 0; /* zero-terminate where it stops */ + } + + /* D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, */ + + else if(!strcmp(".", clone) || !strcmp("..", clone)) { + *clone=0; + } + + else { + /* E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if any) and + any subsequent characters up to, but not including, the next "/" + character or the end of the input buffer. */ + + do { + *outp++ = *clone++; + clen--; + } while(*clone && (*clone != '/')); + *outp=0; + } + + } while(*clone); + + if(queryp) { + size_t qlen; + /* There was a query part, append that to the output. The 'clone' string + may now have been altered so we copy from the original input string + from the correct index. */ + size_t oindex = queryp - orgclone; + qlen = strlen(&input[oindex]); + memcpy(outp, &input[oindex], qlen+1); /* include the ending zero byte */ + } + + free(orgclone); + return out; +} diff --git a/lib/dotdot.h b/lib/dotdot.h new file mode 100644 index 000000000..c3487c137 --- /dev/null +++ b/lib/dotdot.h @@ -0,0 +1,25 @@ +#ifndef HEADER_CURL_DOTDOT_H +#define HEADER_CURL_DOTDOT_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +char *Curl_dedotdotify(char *input); +#endif @@ -124,6 +124,7 @@ int curl_win32_idn_to_ascii(const char *in, char **out); #include "conncache.h" #include "multihandle.h" #include "pipeline.h" +#include "dotdot.h" #define _MPRINTF_REPLACE /* use our functions only */ #include <curl/mprintf.h> @@ -3674,7 +3675,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, char protobuf[16]; const char *protop; CURLcode result; - bool fix_slash = FALSE; + bool rebuild_url = FALSE; *prot_missing = FALSE; @@ -3825,14 +3826,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, memcpy(path+1, query, hostlen); path[0]='/'; /* prepend the missing slash */ - fix_slash = TRUE; + rebuild_url = TRUE; *query=0; /* now cut off the hostname at the ? */ } else if(!path[0]) { /* if there's no path set, use a single slash */ strcpy(path, "/"); - fix_slash = TRUE; + rebuild_url = TRUE; } /* If the URL is malformatted (missing a '/' after hostname before path) we @@ -3845,17 +3846,30 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, is bigger than the path. Use +1 to move the zero byte too. */ memmove(&path[1], path, strlen(path)+1); path[0] = '/'; - fix_slash = TRUE; + rebuild_url = TRUE; + } + else { + /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ + char *newp = Curl_dedotdotify(path); + + if(strcmp(newp, path)) { + rebuild_url = TRUE; + free(data->state.pathbuffer); + data->state.pathbuffer = newp; + data->state.path = newp; + path = newp; + } + else + free(newp); } - /* - * "fix_slash" means that the URL was malformatted so we need to generate an - * updated version with the new slash inserted at the right place! We need - * the corrected URL when communicating over HTTP proxy and we don't know at - * this point if we're using a proxy or not. + * "rebuild_url" means that one or more URL components have been modified so + * we need to generate an updated full version. We need the corrected URL + * when communicating over HTTP proxy and we don't know at this point if + * we're using a proxy or not. */ - if(fix_slash) { + if(rebuild_url) { char *reurl; size_t plen = strlen(path); /* new path, should be 1 byte longer than @@ -3878,6 +3892,8 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, data->change.url_alloc = FALSE; } + infof(data, "Rebuilt URL to: %s\n", reurl); + data->change.url = reurl; data->change.url_alloc = TRUE; /* free this later */ } |