aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/libcurl/curl_url_set.310
-rw-r--r--include/curl/urlapi.h1
-rw-r--r--lib/urlapi.c48
-rw-r--r--tests/data/test15606
-rw-r--r--tests/libtest/lib1560.c26
5 files changed, 79 insertions, 12 deletions
diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3
index b2b273f82..95b76bd8c 100644
--- a/docs/libcurl/curl_url_set.3
+++ b/docs/libcurl/curl_url_set.3
@@ -96,6 +96,16 @@ The query part gets space-to-plus conversion before the URL conversion.
This URL encoding is charset unaware and will convert the input on a
byte-by-byte manner.
+.IP CURLU_DEFAULT_SCHEME
+If set, will make libcurl allow the URL to be set without a scheme and then
+sets that to the default scheme: HTTPS. Overrides the \fICURLU_GUESS_SCHEME\fP
+option if both are set.
+.IP CURLU_GUESS_SCHEME
+If set, will make libcurl allow the URL to be set without a scheme and it
+instead "guesses" which scheme that was intended based on the host name. If
+the outermost sub-domain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP then
+that scheme will be used, otherwise it picks HTTP. Conflicts with the
+\fICURLU_DEFAULT_SCHEME\fP option which takes precendence if both are set.
.SH RETURN VALUE
Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went
fine.
diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h
index b16cfce56..319de35b7 100644
--- a/include/curl/urlapi.h
+++ b/include/curl/urlapi.h
@@ -75,6 +75,7 @@ typedef enum {
#define CURLU_URLDECODE (1<<6) /* URL decode on get */
#define CURLU_URLENCODE (1<<7) /* URL encode on set */
#define CURLU_APPENDQUERY (1<<8) /* append a form style part */
+#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */
typedef struct Curl_URL CURLU;
diff --git a/lib/urlapi.c b/lib/urlapi.c
index f6d911667..ef565d98d 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -554,7 +554,7 @@ static CURLUcode junkscan(char *part)
static CURLUcode hostname_check(char *hostname, unsigned int flags)
{
- const char *l; /* accepted characters */
+ const char *l = NULL; /* accepted characters */
size_t len;
size_t hlen = strlen(hostname);
(void)flags;
@@ -564,14 +564,21 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags)
l = "0123456789abcdefABCDEF::.";
hlen -= 2;
}
- else /* % for URL escaped letters */
- l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%";
-
- len = strspn(hostname, l);
- if(hlen != len)
- /* hostname with bad content */
- return CURLUE_MALFORMED_INPUT;
+ if(l) {
+ /* only valid letters are ok */
+ len = strspn(hostname, l);
+ if(hlen != len)
+ /* hostname with bad content */
+ return CURLUE_MALFORMED_INPUT;
+ }
+ else {
+ /* letters from the second string is not ok */
+ len = strcspn(hostname, " ");
+ if(hlen != len)
+ /* hostname with bad content */
+ return CURLUE_MALFORMED_INPUT;
+ }
return CURLUE_OK;
}
@@ -587,7 +594,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
CURLUcode result;
bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN];
- char *schemep;
+ char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
const struct Curl_handler *h = NULL;
@@ -723,9 +730,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
else {
/* no scheme! */
- if(!(flags & CURLU_DEFAULT_SCHEME))
+ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
return CURLUE_MALFORMED_INPUT;
- schemep = (char *) DEFAULT_SCHEME;
+ if(flags & CURLU_DEFAULT_SCHEME)
+ schemep = (char *) DEFAULT_SCHEME;
/*
* The URL was badly formatted, let's try without scheme specified.
@@ -744,6 +752,24 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
memcpy(hostname, hostp, len);
hostname[len] = 0;
+ if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ /* legacy curl-style guess based on host name */
+ if(checkprefix("ftp.", hostname))
+ schemep = (char *)"ftp";
+ else if(checkprefix("dict.", hostname))
+ schemep = (char *)"dict";
+ else if(checkprefix("ldap.", hostname))
+ schemep = (char *)"ldap";
+ else if(checkprefix("imap.", hostname))
+ schemep = (char *)"imap";
+ else if(checkprefix("smtp.", hostname))
+ schemep = (char *)"smtp";
+ else if(checkprefix("pop3.", hostname))
+ schemep = (char *)"pop3";
+ else
+ schemep = (char *)"http";
+ }
+
len = strlen(p);
memcpy(path, p, len);
path[len] = 0;
diff --git a/tests/data/test1560 b/tests/data/test1560
index 720df036f..4b6c97a53 100644
--- a/tests/data/test1560
+++ b/tests/data/test1560
@@ -16,6 +16,12 @@ none
file
https
http
+pop3
+smtp
+imap
+ldap
+dict
+ftp
</features>
<name>
URL API
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 669ea9ada..30fb582a2 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -246,8 +246,32 @@ static struct testcase get_parts_list[] ={
};
static struct urltestcase get_url_list[] = {
+ {"smtp.example.com/path/html",
+ "smtp://smtp.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"https.example.com/path/html",
+ "http://https.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"dict.example.com/path/html",
+ "dict://dict.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"pop3.example.com/path/html",
+ "pop3://pop3.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"ldap.example.com/path/html",
+ "ldap://ldap.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"imap.example.com/path/html",
+ "imap://imap.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"ftp.example.com/path/html",
+ "ftp://ftp.example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
+ {"example.com/path/html",
+ "http://example.com/path/html",
+ CURLU_GUESS_SCHEME, 0, CURLUE_OK},
{"HTTP://test/", "http://test/", 0, 0, CURLUE_OK},
- {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT},
+ {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK},
{"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},