diff options
| author | Daniel Stenberg <daniel@haxx.se> | 2015-11-02 12:48:03 +0100 | 
|---|---|---|
| committer | Daniel Stenberg <daniel@haxx.se> | 2015-11-02 12:48:03 +0100 | 
| commit | 3f7b1bb89f92c13e69ee51b710ac54f775aab320 (patch) | |
| tree | 88166c329cbc027a9e08dc46fff38d7fa207f133 | |
| parent | 1ea3a7d5e4a9d273351b68ad3799aec18bb0ccea (diff) | |
http redirects: %-encode bytes outside of ascii range
Apparently there are sites out there that do redirects to URLs they
provide in plain UTF-8 or similar. Browsers and wget %-encode such
headers when doing a subsequent request. Now libcurl does too.
Added test 1138 to verify.
Closes #473
| -rw-r--r-- | lib/transfer.c | 41 | ||||
| -rw-r--r-- | tests/data/Makefile.inc | 2 | ||||
| -rw-r--r-- | tests/data/test1138 | 74 | 
3 files changed, 98 insertions, 19 deletions
| diff --git a/lib/transfer.c b/lib/transfer.c index dda235cc7..91777d6f5 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1396,16 +1396,18 @@ CURLcode Curl_posttransfer(struct SessionHandle *data)   */  static size_t strlen_url(const char *url)  { -  const char *ptr; +  const unsigned char *ptr;    size_t newlen=0;    bool left=TRUE; /* left side of the ? */ -  for(ptr=url; *ptr; ptr++) { +  for(ptr=(unsigned char *)url; *ptr; ptr++) {      switch(*ptr) {      case '?':        left=FALSE;        /* fall through */      default: +      if(*ptr >= 0x80) +        newlen += 2;        newlen++;        break;      case ' ': @@ -1426,9 +1428,9 @@ static void strcpy_url(char *output, const char *url)  {    /* we must add this with whitespace-replacing */    bool left=TRUE; -  const char *iptr; +  const unsigned char *iptr;    char *optr = output; -  for(iptr = url;    /* read from here */ +  for(iptr = (unsigned char *)url;    /* read from here */        *iptr;         /* until zero byte */        iptr++) {      switch(*iptr) { @@ -1436,7 +1438,12 @@ static void strcpy_url(char *output, const char *url)        left=FALSE;        /* fall through */      default: -      *optr++=*iptr; +      if(*iptr >= 0x80) { +        snprintf(optr, 4, "%%%02x", *iptr); +        optr += 3; +      } +      else +        *optr++=*iptr;        break;      case ' ':        if(left) { @@ -1684,23 +1691,21 @@ CURLcode Curl_follow(struct SessionHandle *data,      newurl = absolute;    }    else { +    /* The new URL MAY contain space or high byte values, that means a mighty +       stupid redirect URL but we still make an effort to do "right". */ +    char *newest; +    size_t newlen = strlen_url(newurl); +      /* This is an absolute URL, don't allow the custom port number */      disallowport = TRUE; -    if(strchr(newurl, ' ')) { -      /* This new URL contains at least one space, this is a mighty stupid -         redirect but we still make an effort to do "right". */ -      char *newest; -      size_t newlen = strlen_url(newurl); - -      newest = malloc(newlen+1); /* get memory for this */ -      if(!newest) -        return CURLE_OUT_OF_MEMORY; -      strcpy_url(newest, newurl); /* create a space-free URL */ +    newest = malloc(newlen+1); /* get memory for this */ +    if(!newest) +      return CURLE_OUT_OF_MEMORY; +    strcpy_url(newest, newurl); /* create a space-free URL */ -      free(newurl); /* that was no good */ -      newurl = newest; /* use this instead now */ -    } +    free(newurl); /* that was no good */ +    newurl = newest; /* use this instead now */    } diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc index 79214dbfa..3cf8c3e22 100644 --- a/tests/data/Makefile.inc +++ b/tests/data/Makefile.inc @@ -118,7 +118,7 @@ test1104 test1105 test1106 test1107 test1108 test1109 test1110 test1111 \  test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 \  test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 \  test1128 test1129 test1130 test1131 test1132 test1133 test1134 test1135 \ -test1136 test1137 \ +test1136 test1137 test1138 \  \  test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \  test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \ diff --git a/tests/data/test1138 b/tests/data/test1138 new file mode 100644 index 000000000..0c91d204d --- /dev/null +++ b/tests/data/test1138 @@ -0,0 +1,74 @@ +<testcase> +<info> +<keywords> +HTTP +HTTP GET +followlocation +</keywords> +</info> +# +# Server-side +<reply> +<data> +HTTP/1.1 302 OK swsclose
 +Location: ../moo.html/?name=آغاز-سم-زدایی-از-بازار-پول&testcase=/11380002    
 +Date: Thu, 09 Nov 2010 14:49:00 GMT
 +Connection: close
 +
 +</data> +<data2> +HTTP/1.1 200 OK swsclose
 +Location: this should be ignored
 +Date: Thu, 09 Nov 2010 14:49:00 GMT
 +Connection: close
 +
 +body +</data2> +<datacheck> +HTTP/1.1 302 OK swsclose
 +Location: ../moo.html/?name=آغاز-سم-زدایی-از-بازار-پول&testcase=/11380002    
 +Date: Thu, 09 Nov 2010 14:49:00 GMT
 +Connection: close
 +
 +HTTP/1.1 200 OK swsclose
 +Location: this should be ignored
 +Date: Thu, 09 Nov 2010 14:49:00 GMT
 +Connection: close
 +
 +body +</datacheck> +</reply> + +# +# Client-side +<client> +<server> +http +</server> + <name> +HTTP redirect with UTF-8 characters + </name> + <command> +http://%HOSTIP:%HTTPPORT/we/are/all/twits/1138 -L +</command> +</client> + +# +# Verify data after the test has been "shot" +<verify> +<strip> +^User-Agent:.* +</strip> +<protocol> +GET /we/are/all/twits/1138 HTTP/1.1
 +Host: %HOSTIP:%HTTPPORT
 +Accept: */*
 +
 +GET /we/are/all/moo.html/?name=%d8%a2%d8%ba%d8%a7%d8%b2-%d8%b3%d9%85-%d8%b2%d8%af%d8%a7%db%8c%db%8c-%d8%a7%d8%b2-%d8%a8%d8%a7%d8%b2%d8%a7%d8%b1-%d9%be%d9%88%d9%84&testcase=/11380002 HTTP/1.1
 +User-Agent: curl/7.10 (i686-pc-linux-gnu) libcurl/7.10 OpenSSL/0.9.6c ipv6 zlib/1.1.3
 +Host: %HOSTIP:%HTTPPORT
 +Accept: */*
 +
 +</protocol> +</verify> +</testcase> | 
