diff options
author | Daniel Stenberg <daniel@haxx.se> | 2015-11-02 12:48:03 +0100 |
---|---|---|
committer | Daniel Stenberg <daniel@haxx.se> | 2015-11-02 12:48:03 +0100 |
commit | 3f7b1bb89f92c13e69ee51b710ac54f775aab320 (patch) | |
tree | 88166c329cbc027a9e08dc46fff38d7fa207f133 /lib | |
parent | 1ea3a7d5e4a9d273351b68ad3799aec18bb0ccea (diff) |
http redirects: %-encode bytes outside of ascii range
Apparently there are sites out there that do redirects to URLs they
provide in plain UTF-8 or similar. Browsers and wget %-encode such
headers when doing a subsequent request. Now libcurl does too.
Added test 1138 to verify.
Closes #473
Diffstat (limited to 'lib')
-rw-r--r-- | lib/transfer.c | 41 |
1 files changed, 23 insertions, 18 deletions
diff --git a/lib/transfer.c b/lib/transfer.c index dda235cc7..91777d6f5 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1396,16 +1396,18 @@ CURLcode Curl_posttransfer(struct SessionHandle *data) */ static size_t strlen_url(const char *url) { - const char *ptr; + const unsigned char *ptr; size_t newlen=0; bool left=TRUE; /* left side of the ? */ - for(ptr=url; *ptr; ptr++) { + for(ptr=(unsigned char *)url; *ptr; ptr++) { switch(*ptr) { case '?': left=FALSE; /* fall through */ default: + if(*ptr >= 0x80) + newlen += 2; newlen++; break; case ' ': @@ -1426,9 +1428,9 @@ static void strcpy_url(char *output, const char *url) { /* we must add this with whitespace-replacing */ bool left=TRUE; - const char *iptr; + const unsigned char *iptr; char *optr = output; - for(iptr = url; /* read from here */ + for(iptr = (unsigned char *)url; /* read from here */ *iptr; /* until zero byte */ iptr++) { switch(*iptr) { @@ -1436,7 +1438,12 @@ static void strcpy_url(char *output, const char *url) left=FALSE; /* fall through */ default: - *optr++=*iptr; + if(*iptr >= 0x80) { + snprintf(optr, 4, "%%%02x", *iptr); + optr += 3; + } + else + *optr++=*iptr; break; case ' ': if(left) { @@ -1684,23 +1691,21 @@ CURLcode Curl_follow(struct SessionHandle *data, newurl = absolute; } else { + /* The new URL MAY contain space or high byte values, that means a mighty + stupid redirect URL but we still make an effort to do "right". */ + char *newest; + size_t newlen = strlen_url(newurl); + /* This is an absolute URL, don't allow the custom port number */ disallowport = TRUE; - if(strchr(newurl, ' ')) { - /* This new URL contains at least one space, this is a mighty stupid - redirect but we still make an effort to do "right". */ - char *newest; - size_t newlen = strlen_url(newurl); - - newest = malloc(newlen+1); /* get memory for this */ - if(!newest) - return CURLE_OUT_OF_MEMORY; - strcpy_url(newest, newurl); /* create a space-free URL */ + newest = malloc(newlen+1); /* get memory for this */ + if(!newest) + return CURLE_OUT_OF_MEMORY; + strcpy_url(newest, newurl); /* create a space-free URL */ - free(newurl); /* that was no good */ - newurl = newest; /* use this instead now */ - } + free(newurl); /* that was no good */ + newurl = newest; /* use this instead now */ } |