HTTP: support multiple Content-Encodings

This is implemented as an output streaming stack of unencoders, the last calling the client write procedure. New test 230 checks this feature. Bug: https://github.com/curl/curl/pull/2002 Reported-By: Daniel Bankhead
author: Patrick Monnerat <patrick@monnerat.net> 2017-11-05 15:09:48 +0100
committer: Patrick Monnerat <patrick@monnerat.net> 2017-11-05 15:09:48 +0100
commit: dbcced8e32b50c068ac297106f0502ee200a1ebd (patch)
tree: 0fa1ce28f33ab968792e21264e9a9318807d9f9b /lib
parent: 462f3cac34c09b9c28b449258e71dc37171dc604 (diff)
7 files changed, 532 insertions, 272 deletions
diff --git a/lib/content_encoding.c b/lib/content_encoding.c
index 110226034..76a9e6866 100644
--- a/lib/content_encoding.c
+++ b/lib/content_encoding.c
@@ -22,16 +22,23 @@
 
 #include "curl_setup.h"
 
-#ifdef HAVE_LIBZ
-
 #include "urldata.h"
 #include <curl/curl.h>
+#include <stddef.h>
 #include "sendf.h"
+#include "http.h"
 #include "content_encoding.h"
 #include "strdup.h"
+#include "strcase.h"
 #include "curl_memory.h"
 #include "memdebug.h"
 
+#define CONTENT_ENCODING_DEFAULT  "identity"
+
+#ifndef CURL_DISABLE_HTTP
+
+#ifdef HAVE_LIBZ
+
 /* Comment this out if zlib is always going to be at least ver. 1.2.0.4
    (doing so will reduce code size slightly). */
 #define OLD_ZLIB_SUPPORT 1
@@ -49,6 +56,21 @@
 #define COMMENT      0x10 /* bit 4 set: file comment present */
 #define RESERVED     0xE0 /* bits 5..7: reserved */
 
+typedef enum {
+  ZLIB_UNINIT,          /* uninitialized */
+  ZLIB_INIT,            /* initialized */
+  ZLIB_GZIP_HEADER,     /* reading gzip header */
+  ZLIB_GZIP_INFLATING,  /* inflating gzip stream */
+  ZLIB_INIT_GZIP        /* initialized in transparent gzip mode */
+} zlibInitState;
+
+/* Writer parameters. */
+typedef struct {
+  zlibInitState zlib_init;    /* zlib init state */
+  z_stream z;                /* State structure for zlib. */
+}  zlib_params;
+
+
 static voidpf
 zalloc_cb(voidpf opaque, unsigned int items, unsigned int size)
 {
@@ -79,19 +101,27 @@ process_zlib_error(struct connectdata *conn, z_stream *z)
 }
 
 static CURLcode
-exit_zlib(z_stream *z, zlibInitState *zlib_init, CURLcode result)
+exit_zlib(struct connectdata *conn,
+          z_stream *z, zlibInitState *zlib_init, CURLcode result)
 {
-  inflateEnd(z);
-  *zlib_init = ZLIB_UNINIT;
+  if(*zlib_init == ZLIB_GZIP_HEADER)
+    Curl_safefree(z->next_in);
+
+  if(*zlib_init != ZLIB_UNINIT) {
+    if(inflateEnd(z) != Z_OK && result == CURLE_OK)
+      result = process_zlib_error(conn, z);
+    *zlib_init = ZLIB_UNINIT;
+  }
+
   return result;
 }
 
 static CURLcode
-inflate_stream(struct connectdata *conn,
-               struct SingleRequest *k)
+inflate_stream(struct connectdata *conn, contenc_writer *writer)
 {
+  zlib_params *zp = (zlib_params *) &writer->params;
   int allow_restart = 1;
-  z_stream *z = &k->z;          /* zlib state structure */
+  z_stream *z = &zp->z;         /* zlib state structure */
   uInt nread = z->avail_in;
   Bytef *orig_in = z->next_in;
   int status;                   /* zlib status */
@@ -102,35 +132,31 @@ inflate_stream(struct connectdata *conn,
      large to hold on the stack */
   decomp = malloc(DSIZ);
   if(decomp == NULL) {
-    return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
+    return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
   }
 
   /* because the buffer size is fixed, iteratively decompress and transfer to
      the client via client_write. */
   for(;;) {
     /* (re)set buffer for decompressed output for every iteration */
-    z->next_out = (Bytef *)decomp;
+    z->next_out = (Bytef *) decomp;
     z->avail_out = DSIZ;
 
     status = inflate(z, Z_SYNC_FLUSH);
     if(status == Z_OK || status == Z_STREAM_END) {
       allow_restart = 0;
-      if((DSIZ - z->avail_out) && (!k->ignorebody)) {
-        result = Curl_client_write(conn, CLIENTWRITE_BODY, decomp,
+      result = Curl_unencode_write(conn, writer->downstream, decomp,
                                    DSIZ - z->avail_out);
-        /* if !CURLE_OK, clean up, return */
-        if(result) {
-          free(decomp);
-          return exit_zlib(z, &k->zlib_init, result);
-        }
+      /* if !CURLE_OK, clean up, return */
+      if(result) {
+        free(decomp);
+        return exit_zlib(conn, z, &zp->zlib_init, result);
       }
 
       /* Done? clean up, return */
       if(status == Z_STREAM_END) {
         free(decomp);
-        if(inflateEnd(z) == Z_OK)
-          return exit_zlib(z, &k->zlib_init, result);
-        return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z));
+        return exit_zlib(conn, z, &zp->zlib_init, result);
       }
 
       /* Done with these bytes, exit */
@@ -148,7 +174,8 @@ inflate_stream(struct connectdata *conn,
       (void) inflateEnd(z);     /* don't care about the return code */
       if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
         free(decomp);
-        return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z));
+        zp->zlib_init = ZLIB_UNINIT;  /* inflateEnd() already called. */
+        return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
       }
       z->next_in = orig_in;
       z->avail_in = nread;
@@ -157,36 +184,97 @@ inflate_stream(struct connectdata *conn,
     }
     else {                      /* Error; exit loop, handle below */
       free(decomp);
-      return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z));
+      return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
     }
   }
-  /* Will never get here */
+  /* UNREACHED */
 }
 
-CURLcode
-Curl_unencode_deflate_write(struct connectdata *conn,
-                            struct SingleRequest *k,
-                            ssize_t nread)
+
+/* Deflate handler. */
+static CURLcode deflate_init_writer(struct connectdata *conn,
+                                    contenc_writer *writer)
 {
-  z_stream *z = &k->z;          /* zlib state structure */
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
 
-  /* Initialize zlib? */
-  if(k->zlib_init == ZLIB_UNINIT) {
-    memset(z, 0, sizeof(z_stream));
-    z->zalloc = (alloc_func)zalloc_cb;
-    z->zfree = (free_func)zfree_cb;
+  if(!writer->downstream)
+    return CURLE_WRITE_ERROR;
 
-    if(inflateInit(z) != Z_OK)
-      return process_zlib_error(conn, z);
-    k->zlib_init = ZLIB_INIT;
-  }
+  /* Initialize zlib */
+  z->zalloc = (alloc_func) zalloc_cb;
+  z->zfree = (free_func) zfree_cb;
+
+  if(inflateInit(z) != Z_OK)
+    return process_zlib_error(conn, z);
+  zp->zlib_init = ZLIB_INIT;
+  return CURLE_OK;
+}
+
+static CURLcode deflate_unencode_write(struct connectdata *conn,
+                                       contenc_writer *writer,
+                                       const char *buf, size_t nbytes)
+{
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
 
   /* Set the compressed input when this function is called */
-  z->next_in = (Bytef *)k->str;
-  z->avail_in = (uInt)nread;
+  z->next_in = (Bytef *) buf;
+  z->avail_in = (uInt) nbytes;
 
   /* Now uncompress the data */
-  return inflate_stream(conn, k);
+  return inflate_stream(conn, writer);
+}
+
+static void deflate_close_writer(struct connectdata *conn,
+                                 contenc_writer *writer)
+{
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
+
+  exit_zlib(conn, z, &zp->zlib_init, CURLE_OK);
+}
+
+static const content_encoding deflate_encoding = {
+  "deflate",
+  NULL,
+  deflate_init_writer,
+  deflate_unencode_write,
+  deflate_close_writer,
+  sizeof(zlib_params)
+};
+
+
+/* Gzip handler. */
+static CURLcode gzip_init_writer(struct connectdata *conn,
+                                 contenc_writer *writer)
+{
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
+
+  if(!writer->downstream)
+    return CURLE_WRITE_ERROR;
+
+  /* Initialize zlib */
+  z->zalloc = (alloc_func) zalloc_cb;
+  z->zfree = (free_func) zfree_cb;
+
+  if(strcmp(zlibVersion(), "1.2.0.4") >= 0) {
+    /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */
+    if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) {
+      return process_zlib_error(conn, z);
+    }
+    zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */
+  }
+  else {
+    /* we must parse the gzip header ourselves */
+    if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
+      return process_zlib_error(conn, z);
+    }
+    zp->zlib_init = ZLIB_INIT;   /* Initial call state */
+  }
+
+  return CURLE_OK;
 }
 
 #ifdef OLD_ZLIB_SUPPORT
@@ -273,47 +361,25 @@ static enum {
 }
 #endif
 
-CURLcode
-Curl_unencode_gzip_write(struct connectdata *conn,
-                         struct SingleRequest *k,
-                         ssize_t nread)
+static CURLcode gzip_unencode_write(struct connectdata *conn,
+                                    contenc_writer *writer,
+                                    const char *buf, size_t nbytes)
 {
-  z_stream *z = &k->z;          /* zlib state structure */
-
-  /* Initialize zlib? */
-  if(k->zlib_init == ZLIB_UNINIT) {
-    memset(z, 0, sizeof(z_stream));
-    z->zalloc = (alloc_func)zalloc_cb;
-    z->zfree = (free_func)zfree_cb;
-
-    if(strcmp(zlibVersion(), "1.2.0.4") >= 0) {
-      /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */
-      if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) {
-        return process_zlib_error(conn, z);
-      }
-      k->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */
-    }
-    else {
-      /* we must parse the gzip header ourselves */
-      if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
-        return process_zlib_error(conn, z);
-      }
-      k->zlib_init = ZLIB_INIT;   /* Initial call state */
-    }
-  }
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
 
-  if(k->zlib_init == ZLIB_INIT_GZIP) {
+  if(zp->zlib_init == ZLIB_INIT_GZIP) {
     /* Let zlib handle the gzip decompression entirely */
-    z->next_in = (Bytef *)k->str;
-    z->avail_in = (uInt)nread;
+    z->next_in = (Bytef *) buf;
+    z->avail_in = (uInt) nbytes;
     /* Now uncompress the data */
-    return inflate_stream(conn, k);
+    return inflate_stream(conn, writer);
   }
 
 #ifndef OLD_ZLIB_SUPPORT
   /* Support for old zlib versions is compiled away and we are running with
      an old version, so return an error. */
-  return exit_zlib(z, &k->zlib_init, CURLE_WRITE_ERROR);
+  return exit_zlib(conn, z, &zp->zlib_init, CURLE_WRITE_ERROR);
 
 #else
   /* This next mess is to get around the potential case where there isn't
@@ -326,18 +392,18 @@ Curl_unencode_gzip_write(struct connectdata *conn,
    * can handle the gzip header themselves.
    */
 
-  switch(k->zlib_init) {
+  switch(zp->zlib_init) {
   /* Skip over gzip header? */
   case ZLIB_INIT:
   {
     /* Initial call state */
     ssize_t hlen;
 
-    switch(check_gzip_header((unsigned char *)k->str, nread, &hlen)) {
+    switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) {
     case GZIP_OK:
-      z->next_in = (Bytef *)k->str + hlen;
-      z->avail_in = (uInt)(nread - hlen);
-      k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */
+      z->next_in = (Bytef *) buf + hlen;
+      z->avail_in = (uInt) (nbytes - hlen);
+      zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */
       break;
 
     case GZIP_UNDERFLOW:
@@ -348,19 +414,19 @@ Curl_unencode_gzip_write(struct connectdata *conn,
        * the first place, and it's even more unlikely for a transfer to fail
        * immediately afterwards, it should seldom be a problem.
        */
-      z->avail_in = (uInt)nread;
+      z->avail_in = (uInt) nbytes;
       z->next_in = malloc(z->avail_in);
       if(z->next_in == NULL) {
-        return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
+        return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
       }
-      memcpy(z->next_in, k->str, z->avail_in);
-      k->zlib_init = ZLIB_GZIP_HEADER;   /* Need more gzip header data state */
+      memcpy(z->next_in, buf, z->avail_in);
+      zp->zlib_init = ZLIB_GZIP_HEADER;  /* Need more gzip header data state */
       /* We don't have any data to inflate yet */
       return CURLE_OK;
 
     case GZIP_BAD:
     default:
-      return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z));
+      return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
     }
 
   }
@@ -370,22 +436,22 @@ Curl_unencode_gzip_write(struct connectdata *conn,
   {
     /* Need more gzip header data state */
     ssize_t hlen;
-    z->avail_in += (uInt)nread;
+    z->avail_in += (uInt) nbytes;
     z->next_in = Curl_saferealloc(z->next_in, z->avail_in);
     if(z->next_in == NULL) {
-      return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
+      return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
     }
     /* Append the new block of data to the previous one */
-    memcpy(z->next_in + z->avail_in - nread, k->str, nread);
+    memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes);
 
     switch(check_gzip_header(z->next_in, z->avail_in, &hlen)) {
     case GZIP_OK:
       /* This is the zlib stream data */
       free(z->next_in);
       /* Don't point into the malloced block since we just freed it */
-      z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in;
-      z->avail_in = (uInt)(z->avail_in - hlen);
-      k->zlib_init = ZLIB_GZIP_INFLATING;   /* Inflating stream state */
+      z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in;
+      z->avail_in = (uInt) (z->avail_in - hlen);
+      zp->zlib_init = ZLIB_GZIP_INFLATING;   /* Inflating stream state */
       break;
 
     case GZIP_UNDERFLOW:
@@ -394,8 +460,7 @@ Curl_unencode_gzip_write(struct connectdata *conn,
 
     case GZIP_BAD:
     default:
-      free(z->next_in);
-      return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z));
+      return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
     }
 
   }
@@ -404,8 +469,8 @@ Curl_unencode_gzip_write(struct connectdata *conn,
   case ZLIB_GZIP_INFLATING:
   default:
     /* Inflating stream state */
-    z->next_in = (Bytef *)k->str;
-    z->avail_in = (uInt)nread;
+    z->next_in = (Bytef *) buf;
+    z->avail_in = (uInt) nbytes;
     break;
   }
 
@@ -415,17 +480,332 @@ Curl_unencode_gzip_write(struct connectdata *conn,
   }
 
   /* We've parsed the header, now uncompress the data */
-  return inflate_stream(conn, k);
+  return inflate_stream(conn, writer);
 #endif
 }
 
+static void gzip_close_writer(struct connectdata *conn,
+                              contenc_writer *writer)
+{
+  zlib_params *zp = (zlib_params *) &writer->params;
+  z_stream *z = &zp->z;     /* zlib state structure */
+
+  exit_zlib(conn, z, &zp->zlib_init, CURLE_OK);
+}
+
+static const content_encoding gzip_encoding = {
+  "gzip",
+  "x-gzip",
+  gzip_init_writer,
+  gzip_unencode_write,
+  gzip_close_writer,
+  sizeof(zlib_params)
+};
+
+#endif /* HAVE_LIBZ */
+
+
+/* Identity handler. */
+static CURLcode identity_init_writer(struct connectdata *conn,
+                                     contenc_writer *writer)
+{
+  (void) conn;
+  return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR;
+}
+
+static CURLcode identity_unencode_write(struct connectdata *conn,
+                                        contenc_writer *writer,
+                                        const char *buf, size_t nbytes)
+{
+  return Curl_unencode_write(conn, writer->downstream, buf, nbytes);
+}
+
+static void identity_close_writer(struct connectdata *conn,
+                                  contenc_writer *writer)
+{
+  (void) conn;
+  (void) writer;
+}
+
+static const content_encoding identity_encoding = {
+  "identity",
+  NULL,
+  identity_init_writer,
+  identity_unencode_write,
+  identity_close_writer,
+  0
+};
+
+
+/* supported content encodings table. */
+static const content_encoding * const encodings[] = {
+  &identity_encoding,
+#ifdef HAVE_LIBZ
+  &deflate_encoding,
+  &gzip_encoding,
+#endif
+  NULL
+};
+
+
+/* Return a list of comma-separated names of supported encodings. */
+char *Curl_all_content_encodings(void)
+{
+  size_t len = 0;
+  const content_encoding * const *cep;
+  const content_encoding *ce;
+  char *ace;
+  char *p;
+
+  for(cep = encodings; *cep; cep++) {
+    ce = *cep;
+    if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT))
+      len += strlen(ce->name) + 2;
+  }
+
+  if(!len)
+    return strdup(CONTENT_ENCODING_DEFAULT);
+
+  ace = malloc(len);
+  if(ace) {
+    p = ace;
+    for(cep = encodings; *cep; cep++) {
+      ce = *cep;
+      if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) {
+        strcpy(p, ce->name);
+        p += strlen(p);
+        *p++ = ',';
+        *p++ = ' ';
+      }
+    }
+    p[-2] = '\0';
+  }
+
+  return ace;
+}
+
+
+/* Real client writer: no downstream. */
+static CURLcode client_init_writer(struct connectdata *conn,
+                                   contenc_writer *writer)
+{
+  (void) conn;
+  return writer->downstream? CURLE_WRITE_ERROR: CURLE_OK;
+}
+
+static CURLcode client_unencode_write(struct connectdata *conn,
+                                      contenc_writer *writer,
+                                      const char *buf, size_t nbytes)
+{
+  struct Curl_easy *data = conn->data;
+  struct SingleRequest *k = &data->req;
+
+  (void) writer;
+
+  if(!nbytes || k->ignorebody)
+    return CURLE_OK;
+
+  return Curl_client_write(conn, CLIENTWRITE_BODY, (char *) buf, nbytes);
+}
+
+static void client_close_writer(struct connectdata *conn,
+                                contenc_writer *writer)
+{
+  (void) conn;
+  (void) writer;
+}
+
+static const content_encoding client_encoding = {
+  NULL,
+  NULL,
+  client_init_writer,
+  client_unencode_write,
+  client_close_writer,
+  0
+};
+
+
+/* Deferred error dummy writer. */
+static CURLcode error_init_writer(struct connectdata *conn,
+                                  contenc_writer *writer)
+{
+  (void) conn;
+  return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR;
+}
+
+static CURLcode error_unencode_write(struct connectdata *conn,
+                                     contenc_writer *writer,
+                                     const char *buf, size_t nbytes)
+{
+  char *all = Curl_all_content_encodings();
+
+  (void) writer;
+  (void) buf;
+  (void) nbytes;
+
+  if(!all)
+    return CURLE_OUT_OF_MEMORY;
+  failf(conn->data, "Unrecognized content encoding type. "
+                    "libcurl understands %s content encodings.", all);
+  free(all);
+  return CURLE_BAD_CONTENT_ENCODING;
+}
+
+static void error_close_writer(struct connectdata *conn,
+                               contenc_writer *writer)
+{
+  (void) conn;
+  (void) writer;
+}
+
+static const content_encoding error_encoding = {
+  NULL,
+  NULL,
+  error_init_writer,
+  error_unencode_write,
+  error_close_writer,
+  0
+};
+
+/* Create an unencoding writer stage using the given handler. */
+static contenc_writer *new_unencoding_writer(struct connectdata *conn,
+                                             const content_encoding *handler,
+                                             contenc_writer *downstream)
+{
+  size_t sz = offsetof(contenc_writer, params) + handler->paramsize;
+  contenc_writer *writer = (contenc_writer *) malloc(sz);
+
+  if(writer) {
+    memset(writer, 0, sz);
+    writer->handler = handler;
+    writer->downstream = downstream;
+    if(handler->init_writer(conn, writer)) {
+      free(writer);
+      writer = NULL;
+    }
+  }
+
+  return writer;
+}
+
+/* Write data using an unencoding writer stack. */
+CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer,
+                             const char *buf, size_t nbytes)
+{
+  return writer->handler->unencode_write(conn, writer, buf, nbytes);
+}
+
+/* Close and clean-up the connection's writer stack. */
 void Curl_unencode_cleanup(struct connectdata *conn)
 {
   struct Curl_easy *data = conn->data;
   struct SingleRequest *k = &data->req;
-  z_stream *z = &k->z;
-  if(k->zlib_init != ZLIB_UNINIT)
-    (void) exit_zlib(z, &k->zlib_init, CURLE_OK);
+  contenc_writer *writer = k->writer_stack;
+
+  while(writer) {
+    k->writer_stack = writer->downstream;
+    writer->handler->close_writer(conn, writer);
+    free(writer);
+    writer = k->writer_stack;
+  }
 }
 
-#endif /* HAVE_LIBZ */
+/* Find the content encoding by name. */
+static const content_encoding *find_encoding(const char *name, size_t len)
+{
+  const content_encoding * const *cep;
+  const content_encoding *ce;
+
+  for(cep = encodings; *cep; cep++) {
+    ce = *cep;
+    if((strncasecompare(name, ce->name, len) && !ce->name[len]) ||
+       (ce->alias && strncasecompare(name, ce->alias, len) && !ce->alias[len]))
+      return ce;
+  }
+  return NULL;
+}
+
+/* Set-up the unencoding stack from the Content-Encoding header value.
+ * See RFC 7231 section 3.1.2.2. */
+CURLcode Curl_build_unencoding_stack(struct connectdata *conn,
+                                     const char *enclist, int maybechunked)
+{
+  struct Curl_easy *data = conn->data;
+  struct SingleRequest *k = &data->req;
+
+  do {
+    const char *name;
+    size_t namelen;
+
+    /* Parse a single encoding name. */
+    while(ISSPACE(*enclist) || *enclist == ',')
+      enclist++;
+
+    name = enclist;
+
+    for(namelen = 0; *enclist && *enclist != ','; enclist++)
+      if(!ISSPACE(*enclist))
+        namelen = enclist - name + 1;
+
+    /* Special case: chunked encoding is handled at the reader level. */
+    if(maybechunked && namelen == 7 && strncasecompare(name, "chunked", 7)) {
+      k->chunk = TRUE;             /* chunks coming our way. */
+      Curl_httpchunk_init(conn);   /* init our chunky engine. */
+    }
+    else if(namelen) {
+      const content_encoding *encoding = find_encoding(name, namelen);
+      contenc_writer *writer;
+
+      if(!k->writer_stack) {
+        k->writer_stack = new_unencoding_writer(conn, &client_encoding, NULL);
+
+        if(!k->writer_stack)
+          return CURLE_OUT_OF_MEMORY;
+      }
+
+      if(!encoding)
+        encoding = &error_encoding;  /* Defer error at stack use. */
+
+      /* Stack the unencoding stage. */
+      writer = new_unencoding_writer(conn, encoding, k->writer_stack);
+      if(!writer)
+        return CURLE_OUT_OF_MEMORY;
+      k->writer_stack = writer;
+    }
+  } while(*enclist);
+
+  return CURLE_OK;
+}
+
+#else
+/* Stubs for builds without HTTP. */
+CURLcode Curl_build_unencoding_stack(struct connectdata *conn,
+                                     const char *enclist, int maybechunked)
+{
+  (void) conn;
+  (void) enclist;
+  (void) maybechunked;
+  return CURLE_NOT_BUILT_IN;
+}
+
+CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer,
+                             const char *buf, size_t nbytes)
+{
+  (void) conn;
+  (void) writer;
+  (void) buf;
+  (void) nbytes;
+  return CURLE_NOT_BUILT_IN;
+}
+
+void Curl_unencode_cleanup(struct connectdata *conn)
+{
+  (void) conn;
+}
+
+char *Curl_all_content_encodings(void)
+{
+  return strdup(CONTENT_ENCODING_DEFAULT);  /* Satisfy caller. */
+}
+
+#endif /* CURL_DISABLE_HTTP */
diff --git a/lib/content_encoding.h b/lib/content_encoding.h
index 3fadd2899..4cd52be62 100644
--- a/lib/content_encoding.h
+++ b/lib/content_encoding.h
@@ -7,7 +7,7 @@
  *                            | (__| |_| |  _ <| |___
  *                             \___|\___/|_| \_\_____|
  *
- * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
  *
  * This software is licensed as described in the file COPYING, which
  * you should have received as part of this distribution. The terms
@@ -23,26 +23,33 @@
  ***************************************************************************/
 #include "curl_setup.h"
 
-/*
- * Comma-separated list all supported Content-Encodings ('identity' is implied)
- */
-#ifdef HAVE_LIBZ
-#define ALL_CONTENT_ENCODINGS "deflate, gzip"
-/* force a cleanup */
-void Curl_unencode_cleanup(struct connectdata *conn);
-#else
-#define ALL_CONTENT_ENCODINGS "identity"
-#define Curl_unencode_cleanup(x) Curl_nop_stmt
-#endif
+/* Decoding writer. */
+typedef struct contenc_writer_s contenc_writer;
+typedef struct content_encoding_s content_encoding;
+
+struct contenc_writer_s {
+  const content_encoding *handler;  /* Encoding handler. */
+  contenc_writer *downstream;  /* Downstream writer. */
+  void *params;  /* Encoding-specific storage (variable length). */
+};
 
-CURLcode Curl_unencode_deflate_write(struct connectdata *conn,
-                                     struct SingleRequest *req,
-                                     ssize_t nread);
+/* Content encoding writer. */
+struct content_encoding_s {
+  const char *name;        /* Encoding name. */
+  const char *alias;       /* Encoding name alias. */
+  CURLcode (*init_writer)(struct connectdata *conn, contenc_writer *writer);
+  CURLcode (*unencode_write)(struct connectdata *conn, contenc_writer *writer,
+                             const char *buf, size_t nbytes);
+  void (*close_writer)(struct connectdata *conn, contenc_writer *writer);
+  size_t paramsize;
+};
 
-CURLcode
-Curl_unencode_gzip_write(struct connectdata *conn,
-                         struct SingleRequest *k,
-                         ssize_t nread);
 
+CURLcode Curl_build_unencoding_stack(struct connectdata *conn,
+                                     const char *enclist, int maybechunked);
+CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer,
+                             const char *buf, size_t nbytes);
+void Curl_unencode_cleanup(struct connectdata *conn);
+char *Curl_all_content_encodings(void);
 
 #endif /* HEADER_CURL_CONTENT_ENCODING_H */
diff --git a/lib/http.c b/lib/http.c
index 0716f8eea..def51abc3 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -3103,7 +3103,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
            !(conn->handler->protocol & CURLPROTO_RTSP) &&
            data->set.httpreq != HTTPREQ_HEAD) {
           /* On HTTP 1.1, when connection is not to get closed, but no
-             Content-Length nor Content-Encoding chunked have been
+             Content-Length nor Transfer-Encoding chunked have been
              received, according to RFC2616 section 4.4 point 5, we
              assume that the server will close the connection to
              signal the end of the document. */
@@ -3613,51 +3613,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
        * of chunks, and a chunk-data set to zero signals the
        * end-of-chunks. */
 
-      char *start;
-
-      /* Find the first non-space letter */
-      start = k->p + 18;
-
-      for(;;) {
-        /* skip whitespaces and commas */
-        while(*start && (ISSPACE(*start) || (*start == ',')))
-          start++;
-
-        if(checkprefix("chunked", start)) {
-          k->chunk = TRUE; /* chunks coming our way */
-
-          /* init our chunky engine */
-          Curl_httpchunk_init(conn);
-
-          start += 7;
-        }
-
-        if(k->auto_decoding)
-          /* TODO: we only support the first mentioned compression for now */
-          break;
-
-        if(checkprefix("identity", start)) {
-          k->auto_decoding = IDENTITY;
-          start += 8;
-        }
-        else if(checkprefix("deflate", start)) {
-          k->auto_decoding = DEFLATE;
-          start += 7;
-        }
-        else if(checkprefix("gzip", start)) {
-          k->auto_decoding = GZIP;
-          start += 4;
-        }
-        else if(checkprefix("x-gzip", start)) {
-          k->auto_decoding = GZIP;
-          start += 6;
-        }
-        else
-          /* unknown! */
-          break;
-
-      }
-
+      result = Curl_build_unencoding_stack(conn, k->p + 18, TRUE);
+      if(result)
+        return result;
     }
     else if(checkprefix("Content-Encoding:", k->p) &&
             data->set.str[STRING_ENCODING]) {
@@ -3668,21 +3626,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
        * 2616). zlib cannot handle compress.  However, errors are
        * handled further down when the response body is processed
        */
-      char *start;
-
-      /* Find the first non-space letter */
-      start = k->p + 17;
-      while(*start && ISSPACE(*start))
-        start++;
-
-      /* Record the content-encoding for later use */
-      if(checkprefix("identity", start))
-        k->auto_decoding = IDENTITY;
-      else if(checkprefix("deflate", start))
-        k->auto_decoding = DEFLATE;
-      else if(checkprefix("gzip", start)
-              || checkprefix("x-gzip", start))
-        k->auto_decoding = GZIP;
+      result = Curl_build_unencoding_stack(conn, k->p + 17, FALSE);
+      if(result)
+        return result;
     }
     else if(checkprefix("Content-Range:", k->p)) {
       /* Content-Range: bytes [num]-
diff --git a/lib/http_chunks.c b/lib/http_chunks.c
index 92d773112..161642969 100644
--- a/lib/http_chunks.c
+++ b/lib/http_chunks.c
@@ -187,49 +187,17 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
       piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
 
       /* Write the data portion available */
-#ifdef HAVE_LIBZ
-      switch(conn->data->set.http_ce_skip?
-             IDENTITY : data->req.auto_decoding) {
-      case IDENTITY:
-#endif
-        if(!k->ignorebody) {
-          if(!data->set.http_te_skip)
-            result = Curl_client_write(conn, CLIENTWRITE_BODY, datap,
-                                       piece);
-          else
-            result = CURLE_OK;
-        }
-#ifdef HAVE_LIBZ
-        break;
-
-      case DEFLATE:
-        /* update data->req.keep.str to point to the chunk data. */
-        data->req.str = datap;
-        result = Curl_unencode_deflate_write(conn, &data->req,
-                                             (ssize_t)piece);
-        break;
-
-      case GZIP:
-        /* update data->req.keep.str to point to the chunk data. */
-        data->req.str = datap;
-        result = Curl_unencode_gzip_write(conn, &data->req,
-                                          (ssize_t)piece);
-        break;
-
-      default:
-        failf(conn->data,
-              "Unrecognized content encoding type. "
-              "libcurl understands `identity', `deflate' and `gzip' "
-              "content encodings.");
-        return CHUNKE_BAD_ENCODING;
+      if(conn->data->set.http_ce_skip || !k->writer_stack) {
+        if(!k->ignorebody)
+          result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece);
       }
-#endif
+      else
+        result = Curl_unencode_write(conn, k->writer_stack, datap, piece);
 
       if(result)
         return CHUNKE_WRITE_ERROR;
 
       *wrote += piece;
-
       ch->datasize -= piece; /* decrease amount left to expect */
       datap += piece;    /* move read pointer forward */
       length -= piece;   /* decrease space left in this round */
diff --git a/lib/transfer.c b/lib/transfer.c
index 937477670..8f15b1a15 100644
--- a/lib/transfer.c
+++ b/lib/transfer.c
@@ -779,48 +779,19 @@ static CURLcode readwrite_data(struct Curl_easy *data,
              in http_chunks.c.
              Make sure that ALL_CONTENT_ENCODINGS contains all the
              encodings handled here. */
-#ifdef HAVE_LIBZ
-          switch(conn->data->set.http_ce_skip ?
-                 IDENTITY : k->auto_decoding) {
-          case IDENTITY:
-#endif
-            /* This is the default when the server sends no
-               Content-Encoding header. See Curl_readwrite_init; the
-               memset() call initializes k->auto_decoding to zero. */
+          if(conn->data->set.http_ce_skip || !k->writer_stack) {
             if(!k->ignorebody) {
-
 #ifndef CURL_DISABLE_POP3
-              if(conn->handler->protocol&PROTO_FAMILY_POP3)
+              if(conn->handler->protocol & PROTO_FAMILY_POP3)
                 result = Curl_pop3_write(conn, k->str, nread);
               else
 #endif /* CURL_DISABLE_POP3 */
-
                 result = Curl_client_write(conn, CLIENTWRITE_BODY, k->str,
                                            nread);
             }
-#ifdef HAVE_LIBZ
-            break;
-
-          case DEFLATE:
-            /* Assume CLIENTWRITE_BODY; headers are not encoded. */
-            if(!k->ignorebody)
-              result = Curl_unencode_deflate_write(conn, k, nread);
-            break;
-
-          case GZIP:
-            /* Assume CLIENTWRITE_BODY; headers are not encoded. */
-            if(!k->ignorebody)
-              result = Curl_unencode_gzip_write(conn, k, nread);
-            break;
-
-          default:
-            failf(data, "Unrecognized content encoding type. "
-                  "libcurl understands `identity', `deflate' and `gzip' "
-                  "content encodings.");
-            result = CURLE_BAD_CONTENT_ENCODING;
-            break;
           }
-#endif
+          else
+            result = Curl_unencode_write(conn, k->writer_stack, k->str, nread);
         }
         k->badheader = HEADER_NORMAL; /* taken care of now */
 
diff --git a/lib/url.c b/lib/url.c
index 374ac6cfa..aeb0c9027 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -1011,9 +1011,17 @@ CURLcode Curl_setopt(struct Curl_easy *data, CURLoption option,
      *
      */
     argptr = va_arg(param, char *);
-    result = setstropt(&data->set.str[STRING_ENCODING],
-                       (argptr && !*argptr)?
-                       ALL_CONTENT_ENCODINGS: argptr);
+    if(argptr && !*argptr) {
+      argptr = Curl_all_content_encodings();
+      if(!argptr)
+        result = CURLE_OUT_OF_MEMORY;
+      else {
+        result = setstropt(&data->set.str[STRING_ENCODING], argptr);
+        free(argptr);
+      }
+    }
+    else
+      result = setstropt(&data->set.str[STRING_ENCODING], argptr);
     break;
 
   case CURLOPT_TRANSFER_ENCODING:
diff --git a/lib/urldata.h b/lib/urldata.h
index e5aae1430..cfdd8d028 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
@@ -464,16 +464,6 @@ struct hostname {
 #define KEEP_SENDBITS (KEEP_SEND | KEEP_SEND_HOLD | KEEP_SEND_PAUSE)
 
 
-#ifdef HAVE_LIBZ
-typedef enum {
-  ZLIB_UNINIT,          /* uninitialized */
-  ZLIB_INIT,            /* initialized */
-  ZLIB_GZIP_HEADER,     /* reading gzip header */
-  ZLIB_GZIP_INFLATING,  /* inflating gzip stream */
-  ZLIB_INIT_GZIP        /* initialized in transparent gzip mode */
-} zlibInitState;
-#endif
-
 #ifdef CURLRES_ASYNCH
 struct Curl_async {
   char *hostname;
@@ -561,18 +551,8 @@ struct SingleRequest {
   enum expect100 exp100;        /* expect 100 continue state */
   enum upgrade101 upgr101;      /* 101 upgrade state */
 
-  int auto_decoding;            /* What content encoding. sec 3.5, RFC2616. */
-
-#define IDENTITY 0              /* No encoding */
-#define DEFLATE 1               /* zlib deflate [RFC 1950 & 1951] */
-#define GZIP 2                  /* gzip algorithm [RFC 1952] */
-
-#ifdef HAVE_LIBZ
-  zlibInitState zlib_init;      /* possible zlib init state;
-                                   undefined if Content-Encoding header. */
-  z_stream z;                   /* State structure for zlib. */
-#endif
-
+  struct contenc_writer_s *writer_stack;  /* Content unencoding stack. */
+                                          /* See sec 3.5, RFC2616. */
   time_t timeofdoc;
   long bodywrites;
author	Patrick Monnerat <patrick@monnerat.net>	2017-11-05 15:09:48 +0100
committer	Patrick Monnerat <patrick@monnerat.net>	2017-11-05 15:09:48 +0100
commit	dbcced8e32b50c068ac297106f0502ee200a1ebd (patch)
tree	0fa1ce28f33ab968792e21264e9a9318807d9f9b /lib
parent	462f3cac34c09b9c28b449258e71dc37171dc604 (diff)