From 2698520aef593cbd746a64f79021a4c8d7c83d65 Mon Sep 17 00:00:00 2001 From: Ulion Date: Mon, 21 Jan 2013 23:20:09 +0100 Subject: formpost: support quotes, commas and semicolon in file names - document the double-quote and backslash need be escaped if quoting. - libcurl formdata escape double-quote in filename by backslash. - curl formparse can parse filename both contains '"' and ',' or ';'. - curl now can uploading file with ',' or ';' in filename. Bug: http://curl.haxx.se/bug/view.cgi?id=1171 --- docs/curl.1 | 11 +++ lib/formdata.c | 69 ++++++++++----- src/tool_formparse.c | 234 ++++++++++++++++++++++++++++++------------------- tests/data/Makefile.am | 2 +- tests/data/test1133 | 95 ++++++++++++++++++++ tests/data/test39 | 22 ++++- tests/getpart.pm | 2 +- 7 files changed, 319 insertions(+), 116 deletions(-) create mode 100644 tests/data/test1133 diff --git a/docs/curl.1 b/docs/curl.1 index a13291c0b..c2b6887d2 100644 --- a/docs/curl.1 +++ b/docs/curl.1 @@ -484,6 +484,17 @@ filename=, like this: \fBcurl\fP -F "file=@localfile;filename=nameinpost" url.com +If filename/path contains ',' or ';', it must be quoted by double-quotes like: + +\fBcurl\fP -F "file=@\\"localfile\\";filename=\\"nameinpost\\"" url.com + +or + +\fBcurl\fP -F 'file=@"localfile";filename="nameinpost"' url.com + +Note that if a filename/path is quoted by double-quotes, any double-quote +or backslash within the filename must be escaped by backslash. + See further examples and details in the MANUAL. This option can be used multiple times. diff --git a/lib/formdata.c b/lib/formdata.c index 480de98b2..2ba58dda1 100644 --- a/lib/formdata.c +++ b/lib/formdata.c @@ -1025,6 +1025,47 @@ static char *strippath(const char *fullfile) return base; /* returns an allocated string or NULL ! */ } +static CURLcode formdata_add_filename(const struct curl_httppost *file, + struct FormData **form, + curl_off_t *size) +{ + CURLcode result = CURLE_OK; + char *filename = file->showfilename; + char *filebasename = NULL; + char *filename_escaped = NULL; + + if(!filename) { + filebasename = strippath(file->contents); + if(!filebasename) + return CURLE_OUT_OF_MEMORY; + filename = filebasename; + } + + if(strchr(filename, '\\') || strchr(filename, '"')) { + char *p0, *p1; + + /* filename need be escaped */ + filename_escaped = malloc(strlen(filename)*2+1); + if(!filename_escaped) + return CURLE_OUT_OF_MEMORY; + p0 = filename_escaped; + p1 = filename; + while(*p1) { + if(*p1 == '\\' || *p1 == '"') + *p0++ = '\\'; + *p0++ = *p1++; + } + *p0 = '\0'; + filename = filename_escaped; + } + result = AddFormDataf(form, size, + "; filename=\"%s\"", + filename); + Curl_safefree(filename_escaped); + Curl_safefree(filebasename); + return result; +} + /* * Curl_getformdata() converts a linked list of "meta data" into a complete * (possibly huge) multipart formdata. The input list is in 'post', while the @@ -1139,22 +1180,13 @@ CURLcode Curl_getformdata(struct SessionHandle *data, if(post->more) { /* if multiple-file */ - char *filebasename = NULL; - if(!file->showfilename) { - filebasename = strippath(file->contents); - if(!filebasename) { - result = CURLE_OUT_OF_MEMORY; - break; - } - } - result = AddFormDataf(&form, &size, "\r\n--%s\r\nContent-Disposition: " - "attachment; filename=\"%s\"", - fileboundary, - (file->showfilename?file->showfilename: - filebasename)); - Curl_safefree(filebasename); + "attachment", + fileboundary); + if(result) + break; + result = formdata_add_filename(file, &form, &size); if(result) break; } @@ -1164,14 +1196,7 @@ CURLcode Curl_getformdata(struct SessionHandle *data, HTTPPOST_CALLBACK cases the ->showfilename struct member is always assigned at this point */ if(post->showfilename || (post->flags & HTTPPOST_FILENAME)) { - char *filebasename= - (!post->showfilename)?strippath(post->contents):NULL; - - result = AddFormDataf(&form, &size, - "; filename=\"%s\"", - (post->showfilename?post->showfilename: - filebasename)); - Curl_safefree(filebasename); + result = formdata_add_filename(post, &form, &size); } if(result) diff --git a/src/tool_formparse.c b/src/tool_formparse.c index 12b1a9d46..fe357f504 100644 --- a/src/tool_formparse.c +++ b/src/tool_formparse.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2012, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2013, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -34,13 +34,73 @@ #include "memdebug.h" /* keep this as LAST include */ + +/* + * helper function to get a word from form param + * after call get_parm_word, str either point to string end + * or point to any of end chars. + */ +static char *get_param_word(char **str, char **end_pos) +{ + char *ptr = *str; + char *word_begin = NULL; + char *ptr2; + char *escape = NULL; + const char *end_chars = ";,"; + + /* the first non-space char is here */ + word_begin = ptr; + if(*ptr == '"') { + ++ptr; + while(*ptr) { + if(*ptr == '\\') { + if(ptr[1] == '\\' || ptr[1] == '"') { + /* remember the first escape position */ + if(!escape) + escape = ptr; + /* skip escape of back-slash or double-quote */ + ptr += 2; + continue; + } + } + if(*ptr == '"') { + *end_pos = ptr; + if(escape) { + /* has escape, we restore the unescaped string here */ + ptr = ptr2 = escape; + do { + if(*ptr == '\\' && (ptr[1] == '\\' || ptr[1] == '"')) + ++ptr; + *ptr2++ = *ptr++; + } + while(ptr < *end_pos); + *end_pos = ptr2; + } + while(*ptr && NULL==strchr(end_chars, *ptr)) + ++ptr; + *str = ptr; + return word_begin+1; + } + ++ptr; + } + /* end quote is missing, treat it as non-quoted. */ + ptr = word_begin; + } + + while(*ptr && NULL==strchr(end_chars, *ptr)) + ++ptr; + *str = *end_pos = ptr; + return word_begin; +} + /*************************************************************************** * * formparse() * * Reads a 'name=value' parameter and builds the appropriate linked list. * - * Specify files to upload with 'name=@filename'. Supports specified + * Specify files to upload with 'name=@filename', or 'name=@"filename"' + * in case the filename contain ',' or ';'. Supports specified * given Content-Type of the files. Such as ';type='. * * If literal_value is set, any initial '@' or '<' in the value string @@ -51,6 +111,10 @@ * * 'name=@filename,filename2,filename3' * + * or use double-quotes quote the filename: + * + * 'name=@"filename","filename2","filename3"' + * * If you want content-types specified for each too, write them like: * * 'name=@filename;type=image/gif,filename2,filename3' @@ -64,7 +128,12 @@ * To upload a file, but to fake the file name that will be included in the * formpost, do like this: * - * 'name=@filename;filename=/dev/null' + * 'name=@filename;filename=/dev/null' or quote the faked filename like: + * 'name=@filename;filename="play, play, and play.txt"' + * + * If filename/path contains ',' or ';', it must be quoted by double-quotes, + * else curl will fail to figure out the correct filename. if the filename + * tobe quoted contains '"' or '\', '"' and '\' must be escaped by backslash. * * This function uses curl_formadd to fulfill it's job. Is heavily based on * the old curl_formparse code. @@ -86,7 +155,6 @@ int formparse(struct Configurable *config, char *contp; const char *type = NULL; char *sep; - char *sep2; if((1 == sscanf(input, "%255[^=]=", name)) && ((contp = strchr(input, '=')) != NULL)) { @@ -107,118 +175,104 @@ int formparse(struct Configurable *config, struct multi_files *multi_start = NULL; struct multi_files *multi_current = NULL; - contp++; + char *ptr = contp; + char *end = ptr + strlen(ptr); do { /* since this was a file, it may have a content-type specifier at the end too, or a filename. Or both. */ - char *ptr; char *filename = NULL; - - sep = strchr(contp, ';'); - sep2 = strchr(contp, ','); - - /* pick the closest */ - if(sep2 && (sep2 < sep)) { - sep = sep2; - - /* no type was specified! */ - } + char *word_end; + bool semicolon; type = NULL; - if(sep) { - bool semicolon = (';' == *sep) ? TRUE : FALSE; - - *sep = '\0'; /* terminate file name at separator */ - - ptr = sep+1; /* point to the text following the separator */ - - while(semicolon && ptr && (','!= *ptr)) { - - /* pass all white spaces */ - while(ISSPACE(*ptr)) - ptr++; - - if(checkprefix("type=", ptr)) { - /* set type pointer */ - type = &ptr[5]; - - /* verify that this is a fine type specifier */ - if(2 != sscanf(type, "%127[^/]/%127[^;,\n]", - type_major, type_minor)) { - warnf(config, "Illegally formatted content-type field!\n"); - Curl_safefree(contents); - FreeMultiInfo(&multi_start, &multi_current); - return 2; /* illegal content-type syntax! */ - } - - /* now point beyond the content-type specifier */ - sep = (char *)type + strlen(type_major)+strlen(type_minor)+1; - - /* there's a semicolon following - we check if it is a filename - specified and if not we simply assume that it is text that - the user wants included in the type and include that too up - to the next zero or semicolon. */ - if(*sep==';') { - if(!checkprefix(";filename=", sep)) { - sep2 = strchr(sep+1, ';'); - if(sep2) - sep = sep2; - else - sep = sep + strlen(sep); /* point to end of string */ - } - } - else - semicolon = FALSE; - - if(*sep) { - *sep = '\0'; /* zero terminate type string */ - - ptr = sep+1; - } - else - ptr = NULL; /* end */ + ++ptr; + contp = get_param_word(&ptr, &word_end); + semicolon = (';' == *ptr) ? TRUE : FALSE; + *word_end = '\0'; /* terminate the contp */ + + /* have other content, continue parse */ + while(semicolon) { + /* have type or filename field */ + ++ptr; + while(*ptr && (ISSPACE(*ptr))) + ++ptr; + + if(checkprefix("type=", ptr)) { + /* set type pointer */ + type = &ptr[5]; + + /* verify that this is a fine type specifier */ + if(2 != sscanf(type, "%127[^/]/%127[^;,\n]", + type_major, type_minor)) { + warnf(config, "Illegally formatted content-type field!\n"); + Curl_safefree(contents); + FreeMultiInfo(&multi_start, &multi_current); + return 2; /* illegal content-type syntax! */ } - else if(checkprefix("filename=", ptr)) { - filename = &ptr[9]; - ptr = strchr(filename, ';'); - if(!ptr) { - ptr = strchr(filename, ','); - } - if(ptr) { - *ptr = '\0'; /* zero terminate */ - ptr++; + + /* now point beyond the content-type specifier */ + sep = (char *)type + strlen(type_major)+strlen(type_minor)+1; + + /* there's a semicolon following - we check if it is a filename + specified and if not we simply assume that it is text that + the user wants included in the type and include that too up + to the next sep. */ + ptr = sep; + if(*sep==';') { + if(!checkprefix(";filename=", sep)) { + ptr = sep + 1; + (void)get_param_word(&ptr, &sep); + semicolon = (';' == *ptr) ? TRUE : FALSE; } } else - /* confusion, bail out of loop */ - break; - } + semicolon = FALSE; - sep = ptr; + if(*sep) + *sep = '\0'; /* zero terminate type string */ + } + else if(checkprefix("filename=", ptr)) { + ptr += 9; + filename = get_param_word(&ptr, &word_end); + semicolon = (';' == *ptr) ? TRUE : FALSE; + *word_end = '\0'; + } + else { + /* unknown prefix, skip to next block */ + char *unknown = NULL; + unknown = get_param_word(&ptr, &word_end); + semicolon = (';' == *ptr) ? TRUE : FALSE; + if(*unknown) { + *word_end = '\0'; + warnf(config, "skip unknown form field: %s\n", unknown); + } + } } + /* now ptr point to comma or string end */ + /* if type == NULL curl_formadd takes care of the problem */ - if(!AddMultiFiles(contp, type, filename, &multi_start, + if(*contp && !AddMultiFiles(contp, type, filename, &multi_start, &multi_current)) { warnf(config, "Error building form post!\n"); Curl_safefree(contents); FreeMultiInfo(&multi_start, &multi_current); return 3; } - contp = sep; /* move the contents pointer to after the separator */ - } while(sep && *sep); /* loop if there's another file name */ + /* *ptr could be '\0', so we just check with the string end */ + } while(ptr < end); /* loop if there's another file name */ /* now we add the multiple files section */ if(multi_start) { struct curl_forms *forms = NULL; - struct multi_files *ptr = multi_start; + struct multi_files *start = multi_start; unsigned int i, count = 0; - while(ptr) { - ptr = ptr->next; + while(start) { + start = start->next; ++count; } forms = malloc((count+1)*sizeof(struct curl_forms)); @@ -228,9 +282,9 @@ int formparse(struct Configurable *config, FreeMultiInfo(&multi_start, &multi_current); return 4; } - for(i = 0, ptr = multi_start; i < count; ++i, ptr = ptr->next) { - forms[i].option = ptr->form.option; - forms[i].value = ptr->form.value; + for(i = 0, start = multi_start; i < count; ++i, start = start->next) { + forms[i].option = start->form.option; + forms[i].value = start->form.value; } forms[count].option = CURLFORM_END; FreeMultiInfo(&multi_start, &multi_current); diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 89c96ce13..d2a9be1b6 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -75,7 +75,7 @@ test1094 test1095 test1096 test1097 test1098 test1099 test1100 test1101 \ test1102 test1103 test1104 test1105 test1106 test1107 test1108 test1109 \ test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 \ test1118 test1119 test1120 test1121 test1122 test1123 test1124 test1125 \ -test1126 test1127 test1128 test1129 test1130 test1131 test1132 \ +test1126 test1127 test1128 test1129 test1130 test1131 test1132 test1133 \ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \ test1208 test1209 test1210 test1211 \ test1220 test1221 test1222 test1223 \ diff --git a/tests/data/test1133 b/tests/data/test1133 new file mode 100644 index 000000000..733793114 --- /dev/null +++ b/tests/data/test1133 @@ -0,0 +1,95 @@ + + + +HTTP +HTTP FORMPOST + + +# Server-side + + +HTTP/1.1 200 OK +Date: Thu, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Length: 10 + +blablabla + + + +# Client-side + + +http + + +HTTP RFC1867-type formposting with filename contains ',', ';', '"' + + +http://%HOSTIP:%HTTPPORT/we/want/1133 -F "file=@\"log/test1133,a\\\"nd;.txt\";type=mo/foo;filename=\"faker,and;.txt\"" -F 'file2=@"log/test1133,a\"nd;.txt"' -F 'file3=@"log/test1133,a\"nd;.txt";type=m/f,"log/test1133,a\"nd;.txt"' + +# We create this file before the command is invoked! + +foo bar +This is a bar foo +bar +foo + + + +# Verify data after the test has been "shot" + + +^(User-Agent:|Content-Type: multipart/form-data;|Content-Type: multipart/mixed, boundary=|-------).* + + +POST /we/want/1133 HTTP/1.1 +User-Agent: curl/7.10.4 (i686-pc-linux-gnu) libcurl/7.10.4 OpenSSL/0.9.7a ipv6 zlib/1.1.3 +Host: %HOSTIP:%HTTPPORT +Accept: */* +Content-Length: 967 +Expect: 100-continue +Content-Type: multipart/form-data; boundary=----------------------------24e78000bd32 + +------------------------------24e78000bd32 +Content-Disposition: form-data; name="file"; filename="faker,and;.txt" +Content-Type: mo/foo + +foo bar +This is a bar foo +bar +foo + +------------------------------24e78000bd32 +Content-Disposition: form-data; name="file2"; filename="test1133,a\"nd;.txt" +Content-Type: text/plain + +foo bar +This is a bar foo +bar +foo + +------------------------------24e78000bd32 +Content-Disposition: form-data; name="file3" +Content-Type: multipart/mixed, boundary=----------------------------7f0e85a48b0b + +Content-Disposition: attachment; filename="test1133,a\"nd;.txt" +Content-Type: m/f + +foo bar +This is a bar foo +bar +foo + +Content-Disposition: attachment; filename="test1133,a\"nd;.txt" +Content-Type: text/plain + +foo bar +This is a bar foo +bar +foo + +------------------------------24e78000bd32-- + + + diff --git a/tests/data/test39 b/tests/data/test39 index a2e1441d1..1867b6081 100644 --- a/tests/data/test39 +++ b/tests/data/test39 @@ -26,7 +26,7 @@ http HTTP RFC1867-type formposting with filename= and type= -http://%HOSTIP:%HTTPPORT/we/want/39 -F name=daniel -F tool=curl --form-string "str1=@literal" --form-string "str2= # We create this file before the command is invoked! @@ -47,7 +47,7 @@ POST /we/want/39 HTTP/1.1 User-Agent: curl/7.10.4 (i686-pc-linux-gnu) libcurl/7.10.4 OpenSSL/0.9.7a ipv6 zlib/1.1.3 Host: %HOSTIP:%HTTPPORT Accept: */* -Content-Length: 810 +Content-Length: 1184 Expect: 100-continue Content-Type: multipart/form-data; boundary=----------------------------24e78000bd32 @@ -85,6 +85,24 @@ This is a bar foo bar foo +------------------------------24e78000bd32 +Content-Disposition: form-data; name="file3"; filename="f\\\\ak\\\\er,\\an\\d;.t\"xt" +Content-Type: mo/foo + +foo bar +This is a bar foo +bar +foo + +------------------------------24e78000bd32 +Content-Disposition: form-data; name="file4"; filename="A\\AA\"\"\\\"ZZZ" +Content-Type: text/plain + +foo bar +This is a bar foo +bar +foo + ------------------------------24e78000bd32-- diff --git a/tests/getpart.pm b/tests/getpart.pm index 58bb3e42c..92a237830 100644 --- a/tests/getpart.pm +++ b/tests/getpart.pm @@ -56,7 +56,7 @@ sub getpartattr { $inside++; my $attr=$1; - while($attr =~ s/ *([^=]*)= *(\"([^\"]*)\"|([^\"> ]*))//) { + while($attr =~ s/ *([^=]*)= *(\"([^\"]*)\"|([^\> ]*))//) { my ($var, $cont)=($1, $2); $cont =~ s/^\"(.*)\"$/$1/; $hash{$var}=$cont; -- cgit v1.2.3