diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile.am | 6 | ||||
-rw-r--r-- | lib/README.encoding | 53 | ||||
-rw-r--r-- | lib/content_encoding.c | 122 | ||||
-rw-r--r-- | lib/content_encoding.h | 34 | ||||
-rw-r--r-- | lib/http.c | 3 | ||||
-rw-r--r-- | lib/http_chunks.c | 29 | ||||
-rw-r--r-- | lib/http_chunks.h | 1 | ||||
-rw-r--r-- | lib/transfer.c | 70 | ||||
-rw-r--r-- | lib/url.c | 17 | ||||
-rw-r--r-- | lib/urldata.h | 22 |
10 files changed, 350 insertions, 7 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am index 12a582d92..cf0258f2c 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -7,7 +7,7 @@ AUTOMAKE_OPTIONS = foreign nostdinc EXTRA_DIST = getdate.y Makefile.b32 Makefile.b32.resp Makefile.m32 \ Makefile.vc6 Makefile.riscos libcurl.def dllinit.c curllib.dsp \ curllib.dsw config-vms.h config-win32.h config-riscos.h config-mac.h \ - config.h.in ca-bundle.crt + config.h.in ca-bundle.crt README.encoding lib_LTLIBRARIES = libcurl.la @@ -45,6 +45,8 @@ libcurl_la_LDFLAGS = -version-info 2:2:0 # set age to 0. # +# I could not get conditional compilation using LIBADD or LDADD and +# EXTRA_..._SOURCES to work. 08/28/02 jhrg libcurl_la_SOURCES = \ arpa_telnet.h file.c getpass.h netrc.h timeval.c \ base64.c file.h hostip.c progress.c timeval.h \ @@ -60,7 +62,7 @@ escape.h getpass.c netrc.c telnet.h \ getinfo.c getinfo.h transfer.c strequal.c strequal.h easy.c \ security.h security.c krb4.c krb4.h memdebug.c memdebug.h inet_ntoa_r.h \ http_chunks.c http_chunks.h strtok.c strtok.h connect.c connect.h \ -llist.c llist.h hash.c hash.h multi.c share.c share.h +llist.c llist.h hash.c hash.h multi.c content_encoding.c noinst_HEADERS = setup.h transfer.h diff --git a/lib/README.encoding b/lib/README.encoding new file mode 100644 index 000000000..ef5c8036f --- /dev/null +++ b/lib/README.encoding @@ -0,0 +1,53 @@ + + Content Encoding Support for libcurl + +* About content encodings: + +HTTP/1.1 [RFC 2616] specifies that a client may request that a server encode +its response. This is usually used to compress a response using one of a set +of commonly available compression techniques. These schemes are `deflate' +(the zlib algorithm), `gzip' and `compress' [sec 3.5, RFC 2616]. A client +requests that the sever perform an encoding by including an Accept-Encoding +header in the request document. The value of the header should be one of the +recognized tokens `deflate', ... (there's a way to register new +schemes/tokens, see sec 3.5 of the spec). A server MAY honor the client's +encoding request. When a response is encoded, the server includes a +Content-Encoding header in the response. The value of the Content-Encoding +header indicates which scheme was used to encode the data. + +A client may tell a server that it can understand several different encoding +schemes. In this case the server may choose any one of those and use it to +encode the response (indicating which one using the Content-Encoding header). +It's also possible for a client to attach priorities to different schemes so +that the server knows which it prefers. See sec 14.3 of RFC 2616 for more +information on the Accept-Encoding header. + +* Current support for content encoding: + +I added support for the 'deflate' content encoding to both libcurl and curl. +Both regular and chunked transfers should work although I've tested only the +former. The library zlib is required for this feature. Places where I +modified the source code are commented and typically include my initials and +the date (e.g., 08/29/02 jhrg). + +* The libcurl interface: + +To cause libcurl to request a content encoding use: + + curl_easy_setopt(curl, CURLOPT_ENCODING, <string>) + +where <string> is the intended value of the Accept-Encoding header. + +Currently, libcurl only understands how to process responses that use the +`deflate' Content-Encoding, so the only value for CURLOPT_ENCODING that will +work (besides "identity," which does nothing) is "deflate." If a response is +encoded using either the `gzip' or `compress' methods, libcurl will return an +error indicating that the response could not be decoded. If <string> is null +or empty no Accept-Encoding header is generated. + +* The curl interface: + +Use the --compressed option with curl to cause it to ask servers to compress +responses using deflate. + +James Gallagher <jgallagher@gso.uri.edu> diff --git a/lib/content_encoding.c b/lib/content_encoding.c new file mode 100644 index 000000000..51b59c584 --- /dev/null +++ b/lib/content_encoding.c @@ -0,0 +1,122 @@ +/***************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2002, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * In order to be useful for every potential user, curl and libcurl are + * dual-licensed under the MPL and the MIT/X-derivate licenses. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the MPL or the MIT/X-derivate + * licenses. You may pick one of these licenses. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * $Id$ + *****************************************************************************/ + +#include "setup.h" + +#ifdef HAVE_LIBZ + +#include "urldata.h" +#include <curl/curl.h> +#include <curl/types.h> +#include "sendf.h" + +#define DSIZ 4096 /* buffer size for decompressed data */ + + +static CURLcode +process_zlib_error(struct SessionHandle *data, z_stream *z) +{ + if (z->msg) + failf (data, "Error while processing content unencoding.\n%s", + z->msg); + else + failf (data, "Error while processing content unencoding.\n" + "Unknown failure within decompression software."); + + return CURLE_BAD_CONTENT_ENCODING; +} + +static CURLcode +exit_zlib(z_stream *z, bool *zlib_init, CURLcode result) +{ + inflateEnd(z); + *zlib_init = 0; + return result; +} + +CURLcode +Curl_unencode_deflate_write(struct SessionHandle *data, + struct Curl_transfer_keeper *k, + ssize_t nread) +{ + int status; /* zlib status */ + int result; /* Curl_client_write status */ + char decomp[DSIZ]; /* Put the decompressed data here. */ + z_stream *z = &k->z; /* zlib state structure */ + + /* Initialize zlib? */ + if (!k->zlib_init) { + z->zalloc = (alloc_func)Z_NULL; + z->zfree = (free_func)Z_NULL; + z->opaque = 0; /* of dubious use 08/27/02 jhrg */ + if (inflateInit(z) != Z_OK) + return process_zlib_error(data, z); + k->zlib_init = 1; + } + + /* Set the compressed input when this fucntion is called */ + z->next_in = (Bytef *)k->str; + z->avail_in = nread; + + /* because the buffer size is fixed, iteratively decompress + and transfer to the client via client_write. */ + for (;;) { + /* (re)set buffer for decompressed output for every iteration */ + z->next_out = (Bytef *)&decomp[0]; + z->avail_out = DSIZ; + + status = inflate(z, Z_SYNC_FLUSH); + if (status == Z_OK || status == Z_STREAM_END) { + result = Curl_client_write(data, CLIENTWRITE_BODY, decomp, + DSIZ - z->avail_out); + /* if !CURLE_OK, clean up, return */ + if (result) { + return exit_zlib(z, &k->zlib_init, result); + } + + /* Done?; clean up, return */ + if (status == Z_STREAM_END) { + if (inflateEnd(z) == Z_OK) + return exit_zlib(z, &k->zlib_init, result); + else + return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z)); + } + + /* Done with these bytes, exit */ + if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0) + return result; + } + else { /* Error; exit loop, handle below */ + return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z)); + } + } +} +#endif /* HAVE_LIBZ */ + +/* + * local variables: + * eval: (load-file "../curl-mode.el") + * end: + * vim600: fdm=marker + * vim: et sw=2 ts=2 sts=2 tw=78 + */ diff --git a/lib/content_encoding.h b/lib/content_encoding.h new file mode 100644 index 000000000..91ba7d110 --- /dev/null +++ b/lib/content_encoding.h @@ -0,0 +1,34 @@ +/***************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2002, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * In order to be useful for every potential user, curl and libcurl are + * dual-licensed under the MPL and the MIT/X-derivate licenses. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the MPL or the MIT/X-derivate + * licenses. You may pick one of these licenses. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * $Id$ + *****************************************************************************/ + +CURLcode Curl_unencode_deflate_write(struct SessionHandle *data, + struct Curl_transfer_keeper *k, + ssize_t nread); + +/* + * local variables: + * eval: (load-file "../curl-mode.el") + * end: + * vim600: fdm=marker + * vim: et sw=2 ts=2 sts=2 tw=78 + */ diff --git a/lib/http.c b/lib/http.c index 08a5536dc..8957968d8 100644 --- a/lib/http.c +++ b/lib/http.c @@ -707,6 +707,7 @@ CURLcode Curl_http(struct connectdata *conn) "%s" /* host */ "%s" /* pragma */ "%s" /* accept */ + "%s" /* accept-encoding */ "%s", /* referer */ data->set.customrequest?data->set.customrequest: @@ -727,6 +728,8 @@ CURLcode Curl_http(struct connectdata *conn) (conn->allocptr.host?conn->allocptr.host:""), /* Host: host */ http->p_pragma?http->p_pragma:"", http->p_accept?http->p_accept:"", + (data->set.encoding && *data->set.encoding && conn->allocptr.accept_encoding)? + conn->allocptr.accept_encoding:"", /* 08/28/02 jhrg */ (data->change.referer && conn->allocptr.ref)?conn->allocptr.ref:"" /* Referer: <data> <CRLF> */ ); diff --git a/lib/http_chunks.c b/lib/http_chunks.c index 939e86a91..784d231fe 100644 --- a/lib/http_chunks.c +++ b/lib/http_chunks.c @@ -33,6 +33,8 @@ #include "urldata.h" /* it includes http_chunks.h */ #include "sendf.h" /* for the client write stuff */ +#include "content_encoding.h" /* 08/29/02 jhrg */ + #define _MPRINTF_REPLACE /* use our functions only */ #include <curl/mprintf.h> @@ -172,7 +174,32 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, piece = (ch->datasize >= length)?length:ch->datasize; /* Write the data portion available */ - result = Curl_client_write(conn->data, CLIENTWRITE_BODY, datap, piece); + /* Added content-encoding here; untested but almost identical to the + tested code in transfer.c. 08/29/02 jhrg */ +#ifdef HAVE_LIBZ + switch (conn->keep.content_encoding) { + case IDENTITY: +#endif + result = Curl_client_write(conn->data, CLIENTWRITE_BODY, datap, + piece); +#ifdef HAVE_LIBZ + break; + + case DEFLATE: + result = Curl_unencode_deflate_write(conn->data, &conn->keep, piece); + break; + + case GZIP: + case COMPRESS: + default: + failf (conn->data, + "Unrecognized content encoding type. " + "libcurl understands `identity' and `deflate' " + "content encodings."); + return CHUNKE_BAD_ENCODING; + } +#endif + if(result) return CHUNKE_WRITE_ERROR; *wrote += piece; diff --git a/lib/http_chunks.h b/lib/http_chunks.h index 48bdbd37b..482612d25 100644 --- a/lib/http_chunks.h +++ b/lib/http_chunks.h @@ -73,6 +73,7 @@ typedef enum { CHUNKE_BAD_CHUNK, CHUNKE_WRITE_ERROR, CHUNKE_STATE_ERROR, + CHUNKE_BAD_ENCODING, CHUNKE_LAST } CHUNKcode; diff --git a/lib/transfer.c b/lib/transfer.c index 20088b17d..2567ca91b 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -82,6 +82,8 @@ #include <curl/types.h> #include "netrc.h" +#include "content_encoding.h" /* content encoding support. 08/27/02 jhrg */ + #include "hostip.h" #include "transfer.h" #include "sendf.h" @@ -368,7 +370,7 @@ CURLcode Curl_readwrite(struct connectdata *conn, * we got: "417 Expectation Failed" this means: * we have made a HTTP call and our Expect Header * seems to cause a problem => abort the write operations - * (or prevent them from starting + * (or prevent them from starting). */ k->write_after_100_header = FALSE; k->keepon &= ~KEEP_WRITE; @@ -575,6 +577,34 @@ CURLcode Curl_readwrite(struct connectdata *conn, /* init our chunky engine */ Curl_httpchunk_init(conn); } + else if (strnequal("Content-Encoding:", k->p, 17) && + data->set.encoding) { + /* + * Process Content-Encoding. Look for the values: identity, gzip, + * defalte, compress, x-gzip and x-compress. x-gzip and + * x-compress are the same as gzip and compress. (Sec 3.5 RFC + * 2616). zlib cannot handle compress, and gzip is not currently + * implemented. However, errors are handled further down when the + * response body is processed 08/27/02 jhrg */ + char *start; + + /* Find the first non-space letter */ + for(start=k->p+18; + *start && isspace((int)*start); + start++); + + /* Record the content-encoding for later use. 08/27/02 jhrg */ + if (strnequal("identity", start, 8)) + k->content_encoding = IDENTITY; + else if (strnequal("deflate", start, 7)) + k->content_encoding = DEFLATE; + else if (strnequal("gzip", start, 4) + || strnequal("x-gzip", start, 6)) + k->content_encoding = GZIP; + else if (strnequal("compress", start, 8) + || strnequal("x-compress", start, 10)) + k->content_encoding = COMPRESS; + } else if (strnequal("Content-Range:", k->p, 14)) { if (sscanf (k->p+14, " bytes %d-", &k->offset) || sscanf (k->p+14, " bytes: %d-", &k->offset)) { @@ -737,6 +767,7 @@ CURLcode Curl_readwrite(struct connectdata *conn, * the name says read, this function both reads and writes away * the data. The returned 'nread' holds the number of actual * data it wrote to the client. */ + /* Handle chunking here? 08/27/02 jhrg */ CHUNKcode res = Curl_httpchunk_read(conn, k->str, nread, &nread); @@ -776,8 +807,39 @@ CURLcode Curl_readwrite(struct connectdata *conn, if(!conn->bits.chunk && nread) { /* If this is chunky transfer, it was already written */ - result = Curl_client_write(data, CLIENTWRITE_BODY, k->str, - nread); + + /* This switch handles various content encodings. If there's an + error here, be sure to check over the almost identical code in + http_chunk.c. 08/29/02 jhrg */ +#ifdef HAVE_LIBZ + switch (k->content_encoding) { + case IDENTITY: +#endif + /* This is the default when the server sends no + Content-Encoding header. See Curl_readwrite_init; the + memset() call initializes k->content_encoding to zero. + 08/28/02 jhrg */ + result = Curl_client_write(data, CLIENTWRITE_BODY, k->str, + nread); +#ifdef HAVE_LIBZ + break; + + case DEFLATE: + /* Assume CLIENTWRITE_BODY; headers are not encoded. */ + result = Curl_unencode_deflate_write(data, k, nread); + break; + + case GZIP: /* FIXME 08/27/02 jhrg */ + case COMPRESS: + default: + failf (data, "Unrecognized content encoding type. " + "libcurl understands `identity' and `deflate' " + "content encodings."); + result = CURLE_BAD_CONTENT_ENCODING; + break; + } +#endif + if(result) return result; } @@ -954,6 +1016,8 @@ CURLcode Curl_readwrite_init(struct connectdata *conn) struct SessionHandle *data = conn->data; struct Curl_transfer_keeper *k = &conn->keep; + /* NB: the content encoding software depends on this initialization of + Curl_transfer_keeper. 08/28/02 jhrg */ memset(k, 0, sizeof(struct Curl_transfer_keeper)); k->start = Curl_tvnow(); /* start time */ @@ -303,7 +303,7 @@ CURLcode Curl_open(struct SessionHandle **curl) data->set.ssl.verifyhost = 2; #ifdef CURL_CA_BUNDLE /* This is our prefered CA cert bundle since install time */ - data->set.ssl.CAfile = CURL_CA_BUNDLE; + data->set.ssl.CAfile = (char *)CURL_CA_BUNDLE; #endif @@ -781,6 +781,13 @@ CURLcode Curl_setopt(struct SessionHandle *data, CURLoption option, ...) */ data->set.useragent = va_arg(param, char *); break; + case CURLOPT_ENCODING: + /* + * String to use at the value of Accept-Encoding header. 08/28/02 jhrg + */ + data->set.encoding = va_arg(param, char *); + break; + case CURLOPT_USERPWD: /* * user:password to use in the operation @@ -1127,6 +1134,8 @@ CURLcode Curl_disconnect(struct connectdata *conn) free(conn->allocptr.uagent); if(conn->allocptr.userpwd) free(conn->allocptr.userpwd); + if(conn->allocptr.accept_encoding) + free(conn->allocptr.accept_encoding); if(conn->allocptr.rangeline) free(conn->allocptr.rangeline); if(conn->allocptr.ref) @@ -2715,6 +2724,12 @@ static CURLcode CreateConnection(struct SessionHandle *data, } } + if(data->set.encoding) { + if(conn->allocptr.accept_encoding) + free(conn->allocptr.accept_encoding); + conn->allocptr.accept_encoding = + aprintf("Accept-Encoding: %s\015\012", data->set.encoding); + } conn->bytecount = 0; conn->headerbytecount = 0; diff --git a/lib/urldata.h b/lib/urldata.h index 2f183711e..74a7c492e 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -82,6 +82,10 @@ #include "http_chunks.h" /* for the structs and enum stuff */ +#ifdef HAVE_ZLIB_H +#include <zlib.h> /* for content-encoding 08/28/02 jhrg */ +#endif + /* Download buffer size, keep it fairly big for speed reasons */ #define BUFSIZE CURL_MAX_WRITE_SIZE @@ -241,6 +245,20 @@ struct Curl_transfer_keeper { we received a 100-continue/timeout or directly */ + /* for content-encoding 08/28/02 jhrg */ + int content_encoding; /* What content encoding. sec 3.5, RFC2616. */ + +#define IDENTITY 0 /* No encoding */ +#define DEFLATE 1 /* zlib delfate [RFC 1950 & 1951] */ +#define GZIP 2 /* gzip algorithm [RFC 1952] */ +#define COMPRESS 3 /* Not handled, added for completeness */ + +#ifdef HAVE_LIBZ + bool zlib_init; /* True if zlib already initialized; + undefined if Content-Encdoing header. */ + z_stream z; /* State structure for zlib. */ +#endif + /* for the low speed checks: */ time_t timeofdoc; long bodywrites; @@ -365,6 +383,7 @@ struct connectdata { struct dynamically_allocated_data { char *proxyuserpwd; /* free later if not NULL! */ char *uagent; /* free later if not NULL! */ + char *accept_encoding; /* free later if not NULL! 08/28/02 jhrg */ char *userpwd; /* free later if not NULL! */ char *rangeline; /* free later if not NULL! */ char *ref; /* free later if not NULL! */ @@ -424,6 +443,8 @@ struct connectdata { supposed to be called, after ->curl_do() */ }; +/* The end of connectdata. 08/27/02 jhrg */ + /* * Struct to keep statistical and informational data. */ @@ -593,6 +614,7 @@ struct UserDefined { bool free_referer; /* set TRUE if 'referer' points to a string we allocated */ char *useragent; /* User-Agent string */ + char *encoding; /* Accept-Encoding string 08/28/02 jhrg */ char *postfields; /* if POST, set the fields' values here */ size_t postfieldsize; /* if POST, this might have a size to use instead of strlen(), and then the data *may* be binary (contain |