diff options
| -rw-r--r-- | docs/libcurl/curl_url_set.3 | 10 | ||||
| -rw-r--r-- | include/curl/urlapi.h | 1 | ||||
| -rw-r--r-- | lib/urlapi.c | 48 | ||||
| -rw-r--r-- | tests/data/test1560 | 6 | ||||
| -rw-r--r-- | tests/libtest/lib1560.c | 26 | 
5 files changed, 79 insertions, 12 deletions
diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 index b2b273f82..95b76bd8c 100644 --- a/docs/libcurl/curl_url_set.3 +++ b/docs/libcurl/curl_url_set.3 @@ -96,6 +96,16 @@ The query part gets space-to-plus conversion before the URL conversion.  This URL encoding is charset unaware and will convert the input on a  byte-by-byte manner. +.IP CURLU_DEFAULT_SCHEME +If set, will make libcurl allow the URL to be set without a scheme and then +sets that to the default scheme: HTTPS. Overrides the \fICURLU_GUESS_SCHEME\fP +option if both are set. +.IP CURLU_GUESS_SCHEME +If set, will make libcurl allow the URL to be set without a scheme and it +instead "guesses" which scheme that was intended based on the host name.  If +the outermost sub-domain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP then +that scheme will be used, otherwise it picks HTTP. Conflicts with the +\fICURLU_DEFAULT_SCHEME\fP option which takes precendence if both are set.  .SH RETURN VALUE  Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went  fine. diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index b16cfce56..319de35b7 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -75,6 +75,7 @@ typedef enum {  #define CURLU_URLDECODE (1<<6)          /* URL decode on get */  #define CURLU_URLENCODE (1<<7)          /* URL encode on set */  #define CURLU_APPENDQUERY (1<<8)        /* append a form style part */ +#define CURLU_GUESS_SCHEME (1<<9)       /* legacy curl-style guessing */  typedef struct Curl_URL CURLU; diff --git a/lib/urlapi.c b/lib/urlapi.c index f6d911667..ef565d98d 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -554,7 +554,7 @@ static CURLUcode junkscan(char *part)  static CURLUcode hostname_check(char *hostname, unsigned int flags)  { -  const char *l; /* accepted characters */ +  const char *l = NULL; /* accepted characters */    size_t len;    size_t hlen = strlen(hostname);    (void)flags; @@ -564,14 +564,21 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags)      l = "0123456789abcdefABCDEF::.";      hlen -= 2;    } -  else /* % for URL escaped letters */ -    l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%"; - -  len = strspn(hostname, l); -  if(hlen != len) -    /* hostname with bad content */ -    return CURLUE_MALFORMED_INPUT; +  if(l) { +    /* only valid letters are ok */ +    len = strspn(hostname, l); +    if(hlen != len) +      /* hostname with bad content */ +      return CURLUE_MALFORMED_INPUT; +  } +  else { +    /* letters from the second string is not ok */ +    len = strcspn(hostname, " "); +    if(hlen != len) +      /* hostname with bad content */ +      return CURLUE_MALFORMED_INPUT; +  }    return CURLUE_OK;  } @@ -587,7 +594,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)    CURLUcode result;    bool url_has_scheme = FALSE;    char schemebuf[MAX_SCHEME_LEN]; -  char *schemep; +  char *schemep = NULL;    size_t schemelen = 0;    size_t urllen;    const struct Curl_handler *h = NULL; @@ -723,9 +730,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)      else {        /* no scheme! */ -      if(!(flags & CURLU_DEFAULT_SCHEME)) +      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))          return CURLUE_MALFORMED_INPUT; -      schemep = (char *) DEFAULT_SCHEME; +      if(flags & CURLU_DEFAULT_SCHEME) +        schemep = (char *) DEFAULT_SCHEME;        /*         * The URL was badly formatted, let's try without scheme specified. @@ -744,6 +752,24 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)      memcpy(hostname, hostp, len);      hostname[len] = 0; +    if((flags & CURLU_GUESS_SCHEME) && !schemep) { +      /* legacy curl-style guess based on host name */ +      if(checkprefix("ftp.", hostname)) +        schemep = (char *)"ftp"; +      else if(checkprefix("dict.", hostname)) +        schemep = (char *)"dict"; +      else if(checkprefix("ldap.", hostname)) +        schemep = (char *)"ldap"; +      else if(checkprefix("imap.", hostname)) +        schemep = (char *)"imap"; +      else if(checkprefix("smtp.", hostname)) +        schemep = (char *)"smtp"; +      else if(checkprefix("pop3.", hostname)) +        schemep = (char *)"pop3"; +      else +        schemep = (char *)"http"; +    } +      len = strlen(p);      memcpy(path, p, len);      path[len] = 0; diff --git a/tests/data/test1560 b/tests/data/test1560 index 720df036f..4b6c97a53 100644 --- a/tests/data/test1560 +++ b/tests/data/test1560 @@ -16,6 +16,12 @@ none  file  https  http +pop3 +smtp +imap +ldap +dict +ftp  </features>   <name>  URL API diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 669ea9ada..30fb582a2 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -246,8 +246,32 @@ static struct testcase get_parts_list[] ={  };  static struct urltestcase get_url_list[] = { +  {"smtp.example.com/path/html", +   "smtp://smtp.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"https.example.com/path/html", +   "http://https.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"dict.example.com/path/html", +   "dict://dict.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"pop3.example.com/path/html", +   "pop3://pop3.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"ldap.example.com/path/html", +   "ldap://ldap.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"imap.example.com/path/html", +   "imap://imap.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"ftp.example.com/path/html", +   "ftp://ftp.example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK}, +  {"example.com/path/html", +   "http://example.com/path/html", +   CURLU_GUESS_SCHEME, 0, CURLUE_OK},    {"HTTP://test/", "http://test/", 0, 0, CURLUE_OK}, -  {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT}, +  {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK},    {"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},    {"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER},    {"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},  | 
