urlapi: strip off scope id from numerical IPv6 addresses

... to make the host name "usable". Store the scope id and put it back when extracting a URL out of it. Also makes curl_url_set() syntax check CURLUPART_HOST. Fixes #3817 Closes #3822
author: Daniel Stenberg <daniel@haxx.se> 2019-04-30 16:59:08 +0200
committer: Daniel Stenberg <daniel@haxx.se> 2019-05-03 12:17:22 +0200
commit: bdb2dbc1032e7ca33cfc161fd1d5bfbabdf65841 (patch)
tree: d9e4788e86e610c1a841e91fa6fcc1b3179c3d1c
parent: 028126281901fb651819821f5c05caaf40976209 (diff)
5 files changed, 192 insertions, 12 deletions
diff --git a/docs/TODO b/docs/TODO
index 912eefc1a..5454b5e02 100644
--- a/docs/TODO
+++ b/docs/TODO
@@ -35,6 +35,7 @@
  1.16 Try to URL encode given URL
  1.17 Add support for IRIs
  1.18 try next proxy if one doesn't work
+ 1.19 add CURLUPART_SCOPEID
  1.20 SRV and URI DNS records
  1.21 Have the URL API offer IDN decoding
  1.22 CURLINFO_PAUSE_STATE
@@ -372,6 +373,11 @@
 
  https://github.com/curl/curl/issues/896
 
+1.19 add CURLUPART_SCOPEID
+
+ Add support for CURLUPART_SCOPEID to curl_url_set() and curl_url_get(). It is
+ only really used when the host name is an IPv6 numerical address.
+
 1.20 SRV and URI DNS records
 
  Offer support for resolving SRV and URI DNS records for libcurl to know which
diff --git a/lib/urlapi.c b/lib/urlapi.c
index 57f82cac5..c42dc737a 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -56,6 +56,7 @@ struct Curl_URL {
   char *password;
   char *options; /* IMAP only? */
   char *host;
+  char *scopeid; /* for numerical IPv6 addresses */
   char *port;
   char *path;
   char *query;
@@ -74,6 +75,7 @@ static void free_urlhandle(struct Curl_URL *u)
   free(u->password);
   free(u->options);
   free(u->host);
+  free(u->scopeid);
   free(u->port);
   free(u->path);
   free(u->query);
@@ -504,7 +506,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
       portptr = &hostname[len];
     else if('%' == endbracket) {
       int zonelen = len;
-      if(1 == sscanf(hostname + zonelen, "25%*[^]]%c%n", &endbracket, &len)) {
+      if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
         if(']' != endbracket)
           return CURLUE_MALFORMED_INPUT;
         portptr = &hostname[--zonelen + len + 1];
@@ -587,25 +589,45 @@ static CURLUcode junkscan(char *part)
   return CURLUE_OK;
 }
 
-static CURLUcode hostname_check(char *hostname, unsigned int flags)
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
 {
   const char *l = NULL; /* accepted characters */
   size_t len;
   size_t hlen = strlen(hostname);
-  (void)flags;
 
   if(hostname[0] == '[') {
     hostname++;
-    l = "0123456789abcdefABCDEF::.%";
+    l = "0123456789abcdefABCDEF::.";
     hlen -= 2;
   }
 
   if(l) {
     /* only valid letters are ok */
     len = strspn(hostname, l);
-    if(hlen != len)
-      /* hostname with bad content */
-      return CURLUE_MALFORMED_INPUT;
+    if(hlen != len) {
+      /* this could now be '%[zone id]' */
+      char scopeid[16];
+      if(hostname[len] == '%') {
+        int i = 0;
+        char *h = &hostname[len + 1];
+        /* pass '25' if present and is a url encoded percent sign */
+        if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
+          h += 2;
+        while(*h && (*h != ']') && (i < 15))
+          scopeid[i++] = *h++;
+        if(!i || (']' != *h))
+          return CURLUE_MALFORMED_INPUT;
+        scopeid[i] = 0;
+        u->scopeid = strdup(scopeid);
+        if(!u->scopeid)
+          return CURLUE_OUT_OF_MEMORY;
+        hostname[len] = ']'; /* insert end bracket */
+        hostname[len + 1] = 0; /* terminate the hostname */
+      }
+      else
+        return CURLUE_MALFORMED_INPUT;
+      /* hostname is fine */
+    }
   }
   else {
     /* letters from the second string is not ok */
@@ -856,7 +878,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
     if(result)
       return result;
 
-    result = hostname_check(hostname, flags);
+    result = hostname_check(u, hostname);
     if(result)
       return result;
 
@@ -1021,6 +1043,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
     char *scheme;
     char *options = u->options;
     char *port = u->port;
+    char *allochost = NULL;
     if(u->scheme && strcasecompare("file", u->scheme)) {
       url = aprintf("file://%s%s%s",
                     u->path,
@@ -1059,6 +1082,18 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
       if(h && !(h->flags & PROTOPT_URLOPTIONS))
         options = NULL;
 
+      if((u->host[0] == '[') && u->scopeid) {
+        /* make it '[ host %25 scopeid ]' */
+        size_t hostlen = strlen(u->host);
+        size_t alen = hostlen + 3 + strlen(u->scopeid) + 1;
+        allochost = malloc(alen);
+        if(!allochost)
+          return CURLUE_OUT_OF_MEMORY;
+        memcpy(allochost, u->host, hostlen - 1);
+        msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
+                  "%%25%s]", u->scopeid);
+      }
+
       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                     scheme,
                     u->user ? u->user : "",
@@ -1067,7 +1102,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
                     options ? ";" : "",
                     options ? options : "",
                     (u->user || u->password || options) ? "@": "",
-                    u->host,
+                    allochost ? allochost : u->host,
                     port ? ":": "",
                     port ? port : "",
                     (u->path && (u->path[0] != '/')) ? "/": "",
@@ -1076,6 +1111,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
                     (u->query && u->query[0]) ? u->query : "",
                     u->fragment? "#": "",
                     u->fragment? u->fragment : "");
+      free(allochost);
     }
     if(!url)
       return CURLUE_OUT_OF_MEMORY;
@@ -1191,6 +1227,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
     break;
   case CURLUPART_HOST:
     storep = &u->host;
+    free(u->scopeid);
+    u->scopeid = NULL;
     break;
   case CURLUPART_PORT:
   {
@@ -1370,6 +1408,13 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
       }
     }
 
+    if(what == CURLUPART_HOST) {
+      if(hostname_check(u, (char *)newp)) {
+        free((char *)newp);
+        return CURLUE_MALFORMED_INPUT;
+      }
+    }
+
     free(*storep);
     *storep = (char *)newp;
   }
diff --git a/tests/data/test1560 b/tests/data/test1560
index 4b6c97a53..9f6a122a0 100644
--- a/tests/data/test1560
+++ b/tests/data/test1560
@@ -31,4 +31,14 @@ lib1560
 </tool>
 </client>
 
+<verify>
+<stdout>
+we got [fe80::20c:29ff:fe9c:409b]
+we got https://[::1]/hello.html
+we got https://example.com/hello.html
+we got https://[fe80::20c:29ff:fe9c:409b%25eth0]/hello.html
+we got [fe80::20c:29ff:fe9c:409b]
+success
+</stdout>
+</verify>
 </testcase>
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 4dcd3e3df..0b9495767 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -153,7 +153,13 @@ static struct testcase get_parts_list[] ={
    "http | [11] | [12] | [13] | [fd00:a41::50] | [15] | / | [16] | [17]",
    CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
   {"https://[::1%252]:1234",
-   "https | [11] | [12] | [13] | [::1%252] | 1234 | / | [16] | [17]",
+   "https | [11] | [12] | [13] | [::1] | 1234 | / | [16] | [17]",
+   CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
+
+  /* here's "bad" zone id */
+  {"https://[fe80::20c:29ff:fe9c:409b%eth0]:1234",
+   "https | [11] | [12] | [13] | [fe80::20c:29ff:fe9c:409b] | 1234 "
+   "| / | [16] | [17]",
    CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
   {"https://127.0.0.1:443",
    "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]",
@@ -273,6 +279,18 @@ static struct testcase get_parts_list[] ={
 };
 
 static struct urltestcase get_url_list[] = {
+  {"https://[fe80::20c:29ff:fe9c:409b%]:1234",
+   "",
+   0, 0, CURLUE_MALFORMED_INPUT},
+  {"https://[fe80::20c:29ff:fe9c:409b%25]:1234",
+   "https://[fe80::20c:29ff:fe9c:409b%2525]:1234/",
+   0, 0, CURLUE_OK},
+  {"https://[fe80::20c:29ff:fe9c:409b%eth0]:1234",
+   "https://[fe80::20c:29ff:fe9c:409b%25eth0]:1234/",
+   0, 0, CURLUE_OK},
+  {"https://[::%25fakeit]/moo",
+   "https://[::%25fakeit]/moo",
+   0, 0, CURLUE_OK},
   {"smtp.example.com/path/html",
    "smtp://smtp.example.com/path/html",
    CURLU_GUESS_SCHEME, 0, CURLUE_OK},
@@ -831,10 +849,111 @@ static int append(void)
   return error;
 }
 
+static int scopeid(void)
+{
+  CURLU *u;
+  int error = 0;
+  CURLUcode rc;
+  char *url;
+
+  u = curl_url();
+  rc = curl_url_set(u, CURLUPART_URL,
+                    "https://[fe80::20c:29ff:fe9c:409b%25eth0]/hello.html", 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_set returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+
+  rc = curl_url_get(u, CURLUPART_HOST, &url, 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_get CURLUPART_HOST returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+  else {
+    printf("we got %s\n", url);
+    curl_free(url);
+  }
+
+  rc = curl_url_set(u, CURLUPART_HOST, "[::1]", 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_set CURLUPART_HOST returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+
+  rc = curl_url_get(u, CURLUPART_URL, &url, 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_get CURLUPART_URL returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+  else {
+    printf("we got %s\n", url);
+    curl_free(url);
+  }
+
+  rc = curl_url_set(u, CURLUPART_HOST, "example.com", 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_set CURLUPART_HOST returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+
+  rc = curl_url_get(u, CURLUPART_URL, &url, 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_get CURLUPART_URL returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+  else {
+    printf("we got %s\n", url);
+    curl_free(url);
+  }
+
+  rc = curl_url_set(u, CURLUPART_HOST,
+                    "[fe80::20c:29ff:fe9c:409b%25eth0]", 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_set CURLUPART_HOST returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+
+  rc = curl_url_get(u, CURLUPART_URL, &url, 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_get CURLUPART_URL returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+  else {
+    printf("we got %s\n", url);
+    curl_free(url);
+  }
+
+  rc = curl_url_get(u, CURLUPART_HOST, &url, 0);
+  if(rc != CURLUE_OK) {
+    fprintf(stderr, "%s:%d curl_url_get CURLUPART_HOST returned %d\n",
+            __FILE__, __LINE__, (int)rc);
+    error++;
+  }
+  else {
+    printf("we got %s\n", url);
+    curl_free(url);
+  }
+
+  curl_url_cleanup(u);
+
+  return error;
+}
+
 int test(char *URL)
 {
   (void)URL; /* not used */
 
+  if(scopeid())
+    return 6;
+
   if(append())
     return 5;
 
diff --git a/tests/unit/unit1653.c b/tests/unit/unit1653.c
index 2f7ccd5ca..c5d8f3b3a 100644
--- a/tests/unit/unit1653.c
+++ b/tests/unit/unit1653.c
@@ -168,7 +168,7 @@ UNITTEST_START
   u = curl_url();
   if(!u)
     goto fail;
-  ipv6port = strdup("[fe80::250:56ff:fea7:da15%!25eth3]:80");
+  ipv6port = strdup("[fe80::250:56ff:fea7:da15!25eth3]:80");
   if(!ipv6port)
     goto fail;
   ret = Curl_parse_port(u, ipv6port);
@@ -184,7 +184,7 @@ UNITTEST_START
   if(!ipv6port)
     goto fail;
   ret = Curl_parse_port(u, ipv6port);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port returned non-error");
+  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
   fail:
   free(ipv6port);
   curl_url_cleanup(u);
author	Daniel Stenberg <daniel@haxx.se>	2019-04-30 16:59:08 +0200
committer	Daniel Stenberg <daniel@haxx.se>	2019-05-03 12:17:22 +0200
commit	bdb2dbc1032e7ca33cfc161fd1d5bfbabdf65841 (patch)
tree	d9e4788e86e610c1a841e91fa6fcc1b3179c3d1c
parent	028126281901fb651819821f5c05caaf40976209 (diff)