From f19d333ef6b067809cb2b0c153fbd3f5db4321a1 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Wed, 21 Feb 2007 21:59:40 +0000 Subject: - Ravi Pratap provided work on libcurl making pipelining more robust and fixing some bugs: o Don't mix GET and POST requests in a pipeline o Fix the order in which requests are dispatched from the pipeline o Fixed several curl bugs with pipelining when the server is returning chunked encoding: * Added states to chunked parsing for final CRLF * Rewind buffer after parsing chunk with data remaining * Moved chunked header initializing to a spot just before receiving headers --- CHANGES | 17 +++++++++++++ RELEASE-NOTES | 6 +++-- lib/http_chunks.c | 72 +++++++++++++++++++++++++++++++++++++++++++------------ lib/http_chunks.h | 6 ++++- lib/multi.c | 40 ++++++++++++++++++++----------- lib/transfer.c | 45 +++++++++++++++++++++++++++------- lib/transfer.h | 1 + lib/url.c | 42 +++++++++++++++++++++++++------- tests/data/test34 | 2 +- 9 files changed, 182 insertions(+), 49 deletions(-) diff --git a/CHANGES b/CHANGES index fb23d4b62..dc3dc171b 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,23 @@ Changelog +Daniel (21 February 2007) +- Ravi Pratap provided work on libcurl making pipelining more robust and + fixing some bugs: + o Don't mix GET and POST requests in a pipeline + o Fix the order in which requests are dispatched from the pipeline + o Fixed several curl bugs with pipelining when the server is returning + chunked encoding: + * Added states to chunked parsing for final CRLF + * Rewind buffer after parsing chunk with data remaining + * Moved chunked header initializing to a spot just before receiving + headers + +Daniel (20 February 2007) +- Linus Nielsen Feltzing changed the CURLOPT_FTP_SSL_CCC option to handle + active and passive CCC shutdown and added the --ftp-ssl-ccc-mode command + line option. + Daniel (19 February 2007) - Ian Turner fixed the libcurl.m4 macro's support for --with-libcurl. diff --git a/RELEASE-NOTES b/RELEASE-NOTES index dd1e7955c..4da85fa17 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -2,7 +2,7 @@ Curl and libcurl 7.16.2 Public curl release number: 98 Releases counted from the very beginning: 125 - Available command line options: 116 + Available command line options: 117 Available curl_easy_setopt() options: 141 Number of public functions in libcurl: 54 Amount of public web site mirrors: 39 @@ -32,6 +32,7 @@ This release includes the following bugfixes: o libcurl.m4's --with-libcurl is improved o curl-config --libs and libcurl.pc no longer list unnecessary dependencies o fixed an issue with CCC not working on some servers + o several HTTP pipelining problems This release includes the following known bugs: @@ -50,6 +51,7 @@ advice from friends like these: Yang Tse, Manfred Schwarb, Michael Wallner, Jeff Pohlmeyer, Shmulik Regev, Rob Crittenden, Robert A. Monat, Dan Fandrich, Duncan Mac-Vicar Prett, - Michal Marek, Robson Braga Araujo, Ian Turner + Michal Marek, Robson Braga Araujo, Ian Turner, Linus Nielsen Feltzing, + Ravi Pratap Thanks! (and sorry if I forgot to mention someone) diff --git a/lib/http_chunks.c b/lib/http_chunks.c index 36bee789c..4b416c136 100644 --- a/lib/http_chunks.c +++ b/lib/http_chunks.c @@ -181,19 +181,24 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, if(0 == ch->datasize) { if (conn->bits.trailerHdrPresent!=TRUE) { /* No Trailer: header found - revert to original Curl processing */ - ch->state = CHUNK_STOP; - if (1 == length) { - /* This is the final byte, return right now */ - return CHUNKE_STOP; - } + ch->state = CHUNK_STOPCR; + + /* We need to increment the datap here since we bypass the + increment below with the immediate break */ + length--; + datap++; + + /* This is the final byte, continue to read the final CRLF */ + break; } else { ch->state = CHUNK_TRAILER; /* attempt to read trailers */ conn->trlPos=0; } } - else + else { ch->state = CHUNK_DATA; + } } else /* previously we got a fake CR, go back to CR waiting! */ @@ -270,8 +275,9 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, datap++; length--; } - else + else { return CHUNKE_BAD_CHUNK; + } break; case CHUNK_POSTLF: @@ -284,8 +290,10 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, datap++; length--; } - else + else { return CHUNKE_BAD_CHUNK; + } + break; case CHUNK_TRAILER: @@ -331,7 +339,17 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, conn->trailer[conn->trlPos]=0; if (conn->trlPos==2) { ch->state = CHUNK_STOP; - return CHUNKE_STOP; + datap++; + length--; + + /* + * Note that this case skips over the final STOP states since we've + * already read the final CRLF and need to return + */ + + ch->dataleft = length; + + return CHUNKE_STOP; /* return stop */ } else { #ifdef CURL_DOES_CONVERSIONS @@ -346,8 +364,8 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, } #endif /* CURL_DOES_CONVERSIONS */ if ( !data->set.http_te_skip ) - Curl_client_write(conn, CLIENTWRITE_HEADER, - conn->trailer, conn->trlPos); + Curl_client_write(conn, CLIENTWRITE_HEADER, + conn->trailer, conn->trlPos); } ch->state = CHUNK_TRAILER; conn->trlPos=0; @@ -358,11 +376,35 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, return CHUNKE_BAD_CHUNK; break; + case CHUNK_STOPCR: + /* Read the final CRLF that ends all chunk bodies */ + + if(*datap == 0x0d) { + ch->state = CHUNK_STOP; + datap++; + length--; + } + else { + return CHUNKE_BAD_CHUNK; + } + break; + case CHUNK_STOP: - /* If we arrive here, there is data left in the end of the buffer - even if there's no more chunks to read */ - ch->dataleft = length; - return CHUNKE_STOP; /* return stop */ + if (*datap == 0x0a) { + datap++; + length--; + + /* Record the length of any data left in the end of the buffer + even if there's no more chunks to read */ + + ch->dataleft = length; + return CHUNKE_STOP; /* return stop */ + } + else { + return CHUNKE_BAD_CHUNK; + } + break; + default: return CHUNKE_STATE_ERROR; } diff --git a/lib/http_chunks.h b/lib/http_chunks.h index 211818ab7..c478799c1 100644 --- a/lib/http_chunks.h +++ b/lib/http_chunks.h @@ -7,7 +7,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2005, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2007, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -56,6 +56,10 @@ typedef enum { CRLF combination marks the end of a chunk */ CHUNK_POSTLF, + /* Each Chunk body should end with a CRLF. Read a CR and nothing else, + then move to CHUNK_STOP */ + CHUNK_STOPCR, + /* This is mainly used to really mark that we're out of the game. NOTE: that there's a 'dataleft' field in the struct that will tell how many bytes that were not passed to the client in the end of the last diff --git a/lib/multi.c b/lib/multi.c index 49db7d064..2a7f50baa 100644 --- a/lib/multi.c +++ b/lib/multi.c @@ -354,6 +354,12 @@ CURLM *curl_multi_init(void) return NULL; } + /* Let's make the doubly-linked list a circular list. This makes + the linked list code simpler and allows inserting at the end + with less work (we didn't keep a tail pointer before). */ + multi->easy.next = &multi->easy; + multi->easy.prev = &multi->easy; + return (CURLM *) multi; } @@ -435,18 +441,21 @@ CURLMcode curl_multi_add_handle(CURLM *multi_handle, /* Make sure the type is setup correctly */ easy->easy_handle->state.connc->type = CONNCACHE_MULTI; - /* We add this new entry first in the list. We make our 'next' point to the - previous next and our 'prev' point back to the 'first' struct */ - easy->next = multi->easy.next; - easy->prev = &multi->easy; + /* This adds the new entry at the back of the list + to try and maintain a FIFO queue so the pipelined + requests are in order. */ + + /* We add this new entry last in the list. We make our 'next' point to the + 'first' struct and our 'prev' point to the previous 'prev' */ + easy->next = &multi->easy; + easy->prev = multi->easy.prev; - /* make 'easy' the first node in the chain */ - multi->easy.next = easy; + /* make 'easy' the last node in the chain */ + multi->easy.prev = easy; - /* if there was a next node, make sure its 'prev' pointer links back to + /* if there was a prev node, make sure its 'next' pointer links to the new node */ - if(easy->next) - easy->next->prev = easy; + easy->prev->next = easy; Curl_easy_addmulti(easy_handle, multi_handle); @@ -506,7 +515,7 @@ CURLMcode curl_multi_remove_handle(CURLM *multi_handle, /* scan through the list and remove the 'curl_handle' */ easy = multi->easy.next; - while(easy) { + while(easy != &multi->easy) { if(easy->easy_handle == (struct SessionHandle *)curl_handle) break; easy=easy->next; @@ -726,7 +735,7 @@ CURLMcode curl_multi_fdset(CURLM *multi_handle, return CURLM_BAD_HANDLE; easy=multi->easy.next; - while(easy) { + while(easy != &multi->easy) { bitmap = multi_getsock(easy, sockbunch, MAX_SOCKSPEREASYHANDLE); for(i=0; i< MAX_SOCKSPEREASYHANDLE; i++) { @@ -1092,6 +1101,9 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi, easy->easy_conn->recv_pipe); multistate(easy, CURLM_STATE_WAITPERFORM); result = CURLM_CALL_MULTI_PERFORM; + + Curl_pre_readwrite(easy->easy_conn); + break; case CURLM_STATE_WAITPERFORM: @@ -1313,7 +1325,7 @@ CURLMcode curl_multi_perform(CURLM *multi_handle, int *running_handles) return CURLM_BAD_HANDLE; easy=multi->easy.next; - while(easy) { + while(easy != &multi->easy) { CURLMcode result; if (easy->easy_handle->state.cancelled && @@ -1409,7 +1421,7 @@ CURLMcode curl_multi_cleanup(CURLM *multi_handle) /* remove all easy handles */ easy = multi->easy.next; - while(easy) { + while(easy != &multi->easy) { nexteasy=easy->next; if(easy->easy_handle->dns.hostcachetype == HCACHE_MULTI) { /* clear out the usage of the shared DNS cache */ @@ -1588,7 +1600,7 @@ static CURLMcode multi_socket(struct Curl_multi *multi, /* walk through each easy handle and do the socket state change magic and callbacks */ easyp=multi->easy.next; - while(easyp) { + while(easyp != &multi->easy) { singlesocket(multi, easyp); easyp = easyp->next; } diff --git a/lib/transfer.c b/lib/transfer.c index 7fb08883a..324aba260 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1214,11 +1214,11 @@ CURLcode Curl_readwrite(struct connectdata *conn, #ifndef CURL_DISABLE_HTTP if(conn->bits.chunk) { /* - * Bless me father for I have sinned. Here comes a chunked - * transfer flying and we need to decode this properly. While - * the name says read, this function both reads and writes away - * the data. The returned 'nread' holds the number of actual - * data it wrote to the client. */ + * Here comes a chunked transfer flying and we need to decode this + * properly. While the name says read, this function both reads + * and writes away the data. The returned 'nread' holds the number + * of actual data it wrote to the client. + */ CHUNKcode res = Curl_httpchunk_read(conn, k->str, nread, &nread); @@ -1232,12 +1232,22 @@ CURLcode Curl_readwrite(struct connectdata *conn, return CURLE_RECV_ERROR; } else if(CHUNKE_STOP == res) { + size_t dataleft; /* we're done reading chunks! */ k->keepon &= ~KEEP_READ; /* read no more */ /* There are now possibly N number of bytes at the end of the - str buffer that weren't written to the client, but we don't - care about them right now. */ + str buffer that weren't written to the client. + + We DO care about this data if we are pipelining. + Push it back to be read on the next pass. */ + + dataleft = data->reqdata.proto.http->chunk.dataleft; + if (dataleft != 0) { + infof(conn->data, "Leftovers after chunking. " + " Rewinding %d bytes\n",dataleft); + read_rewind(conn, dataleft); + } } /* If it returned OK, we just keep going */ } @@ -1691,6 +1701,23 @@ CURLcode Curl_readwrite_init(struct connectdata *conn) return CURLE_OK; } +/* + * Curl_readwrite may get called multiple times. This function is called + * immediately before the first Curl_readwrite. Note that this can't be moved + * to Curl_readwrite_init since that function can get called while another + * pipeline request is in the middle of receiving data. + * + * We init chunking and trailer bits to their default values here immediately + * before receiving any header data for the current request in the pipeline. + */ +void Curl_pre_readwrite(struct connectdata *conn) +{ + DEBUGF(infof(conn->data, "Pre readwrite setting chunky header " + "values to default\n")); + conn->bits.chunk=FALSE; + conn->bits.trailerHdrPresent=FALSE; +} + /* * Curl_single_getsock() gets called by the multi interface code when the app * has requested to get the sockets for the current connection. This function @@ -1756,10 +1783,12 @@ Transfer(struct connectdata *conn) struct Curl_transfer_keeper *k = &data->reqdata.keep; bool done=FALSE; - if(!(conn->protocol & PROT_FILE)) + if(!(conn->protocol & PROT_FILE)) { /* Only do this if we are not transferring FILE:, since the file: treatment is different*/ Curl_readwrite_init(conn); + Curl_pre_readwrite(conn); + } if((conn->sockfd == CURL_SOCKET_BAD) && (conn->writesockfd == CURL_SOCKET_BAD)) diff --git a/lib/transfer.h b/lib/transfer.h index 3c415cc72..d99ba070b 100644 --- a/lib/transfer.h +++ b/lib/transfer.h @@ -32,6 +32,7 @@ int Curl_single_getsock(struct connectdata *conn, curl_socket_t *socks, int numsocks); CURLcode Curl_readwrite_init(struct connectdata *conn); +void Curl_pre_readwrite(struct connectdata *conn); CURLcode Curl_readrewind(struct connectdata *conn); CURLcode Curl_fillreadbuffer(struct connectdata *conn, int bytes, int *nreadp); bool Curl_retry_request(struct connectdata *conn, char **url); diff --git a/lib/url.c b/lib/url.c index 0368fee3a..4ca629cb2 100644 --- a/lib/url.c +++ b/lib/url.c @@ -164,6 +164,8 @@ static void conn_free(struct connectdata *conn); static void signalPipeClose(struct curl_llist *pipe); +static struct SessionHandle* gethandleathead(struct curl_llist *pipe); + #define MAX_PIPELINE_LENGTH 5 /* @@ -1973,6 +1975,16 @@ bool Curl_isHandleAtHead(struct SessionHandle *handle, return FALSE; } +static struct SessionHandle* gethandleathead(struct curl_llist *pipe) +{ + struct curl_llist_element *curr = pipe->head; + if (curr) { + return (struct SessionHandle *) curr->ptr; + } + + return NULL; +} + static void signalPipeClose(struct curl_llist *pipe) { struct curl_llist_element *curr; @@ -2017,6 +2029,7 @@ ConnectionExists(struct SessionHandle *data, for(i=0; i< data->state.connc->num; i++) { bool match = FALSE; + int pipeLen = 0; /* * Note that if we use a HTTP proxy, we check connections to that * proxy and not to the actual remote server. @@ -2026,18 +2039,20 @@ ConnectionExists(struct SessionHandle *data, /* NULL pointer means not filled-in entry */ continue; + pipeLen = check->send_pipe->size + check->recv_pipe->size; + if (check->connectindex == -1) { check->connectindex = i; /* Set this appropriately since it might have been set to -1 when the easy was removed from the multi */ } - DEBUGF(infof(data, "Examining connection #%ld for reuse\n", - check->connectindex)); + DEBUGF(infof(data, "Examining connection #%ld for reuse \ + (pipeLen = %ld)\n", check->connectindex, pipeLen)); - if(check->inuse && !canPipeline) { + if(pipeLen > 0 && !canPipeline) { /* can only happen within multi handles, and means that another easy - handle is using this connection */ + handle is using this connection */ continue; } @@ -2052,13 +2067,26 @@ ConnectionExists(struct SessionHandle *data, } #endif - if (check->send_pipe->size + - check->recv_pipe->size >= MAX_PIPELINE_LENGTH) { + if (pipeLen >= MAX_PIPELINE_LENGTH) { infof(data, "Connection #%ld has its pipeline full, can't reuse\n", check->connectindex); continue; } + if (canPipeline) { + /* Make sure the pipe has only GET requests */ + struct SessionHandle* sh = gethandleathead(check->send_pipe); + struct SessionHandle* rh = gethandleathead(check->recv_pipe); + if (sh) { + if(!IsPipeliningPossible(sh)) + continue; + } + else if (rh) { + if(!IsPipeliningPossible(rh)) + continue; + } + } + if (check->bits.close) { /* Don't pick a connection that is going to be closed. */ infof(data, "Connection #%ld has been marked for close, can't reuse\n", @@ -3731,8 +3759,6 @@ else { /* re-use init */ conn->bits.reuse = TRUE; /* yes, we're re-using here */ - conn->bits.chunk = FALSE; /* always assume not chunked unless told - otherwise */ Curl_safefree(old_conn->user); Curl_safefree(old_conn->passwd); diff --git a/tests/data/test34 b/tests/data/test34 index 2c73137ba..0e857ca31 100644 --- a/tests/data/test34 +++ b/tests/data/test34 @@ -23,7 +23,7 @@ bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb cccccccccccccccccccccccccccccccc 0 -muuu + HTTP/1.1 200 funky chunky! -- cgit v1.2.3