aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ares/CHANGES10
-rw-r--r--ares/ares.h2
-rw-r--r--ares/ares_init.c11
-rw-r--r--ares/ares_private.h8
-rw-r--r--ares/ares_process.c50
-rw-r--r--ares/ares_send.c8
6 files changed, 62 insertions, 27 deletions
diff --git a/ares/CHANGES b/ares/CHANGES
index dd9bac6c9..c590079b4 100644
--- a/ares/CHANGES
+++ b/ares/CHANGES
@@ -1,5 +1,15 @@
Changelog for the c-ares project
+* Nov 1 2008 (Daniel Stenberg)
+- Carlo Contavalli added support for the glibc "rotate" option, as documented
+ in man resolv.conf:
+
+ causes round robin selection of nameservers from among those listed. This
+ has the effect of spreading the query load among all listed servers, rather
+ than having all clients try the first listed server first every time.
+
+ You can enable it with ARES_OPT_ROTATE
+
* Oct 21 2008 (Yang Tse)
Charles Hardin added handling of EINPROGRESS for UDP connects.
diff --git a/ares/ares.h b/ares/ares.h
index 3609f8676..1507f41e9 100644
--- a/ares/ares.h
+++ b/ares/ares.h
@@ -114,6 +114,7 @@ extern "C" {
#define ARES_OPT_SOCK_SNDBUF (1 << 11)
#define ARES_OPT_SOCK_RCVBUF (1 << 12)
#define ARES_OPT_TIMEOUTMS (1 << 13)
+#define ARES_OPT_ROTATE (1 << 14)
/* Nameinfo flag values */
#define ARES_NI_NOFQDN (1 << 0)
@@ -184,6 +185,7 @@ struct ares_options {
int timeout; /* in seconds or milliseconds, depending on options */
int tries;
int ndots;
+ int rotate;
unsigned short udp_port;
unsigned short tcp_port;
int socket_send_buffer_size;
diff --git a/ares/ares_init.c b/ares/ares_init.c
index 4d4ba1ea5..4a147fc42 100644
--- a/ares/ares_init.c
+++ b/ares/ares_init.c
@@ -144,6 +144,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options,
channel->timeout = -1;
channel->tries = -1;
channel->ndots = -1;
+ channel->rotate = -1;
channel->udp_port = -1;
channel->tcp_port = -1;
channel->socket_send_buffer_size = -1;
@@ -159,6 +160,7 @@ int ares_init_options(ares_channel *channelptr, struct ares_options *options,
channel->sock_state_cb = NULL;
channel->sock_state_cb_data = NULL;
+ channel->last_server = 0;
channel->last_timeout_processed = (time_t)now.tv_sec;
/* Initialize our lists of queries */
@@ -352,6 +354,8 @@ static int init_by_options(ares_channel channel,
channel->tries = options->tries;
if ((optmask & ARES_OPT_NDOTS) && channel->ndots == -1)
channel->ndots = options->ndots;
+ if ((optmask & ARES_OPT_ROTATE) && channel->rotate == -1)
+ channel->rotate = options->rotate;
if ((optmask & ARES_OPT_UDP_PORT) && channel->udp_port == -1)
channel->udp_port = options->udp_port;
if ((optmask & ARES_OPT_TCP_PORT) && channel->tcp_port == -1)
@@ -932,6 +936,8 @@ static int init_by_defaults(ares_channel channel)
channel->tries = DEFAULT_TRIES;
if (channel->ndots == -1)
channel->ndots = 1;
+ if (channel->rotate == -1)
+ channel->rotate = 0;
if (channel->udp_port == -1)
channel->udp_port = htons(NAMESERVER_PORT);
if (channel->tcp_port == -1)
@@ -1302,6 +1308,9 @@ static int set_options(ares_channel channel, const char *str)
val = try_option(p, q, "retry:");
if (val && channel->tries == -1)
channel->tries = atoi(val);
+ val = try_option(p, q, "rotate");
+ if (val && channel->rotate == -1)
+ channel->rotate = 1;
p = q;
while (ISSPACE(*p))
p++;
@@ -1374,7 +1383,7 @@ static char *try_config(char *s, const char *opt)
static const char *try_option(const char *p, const char *q, const char *opt)
{
size_t len = strlen(opt);
- return ((size_t)(q - p) > len && !strncmp(p, opt, len)) ? &p[len] : NULL;
+ return ((size_t)(q - p) >= len && !strncmp(p, opt, len)) ? &p[len] : NULL;
}
#ifndef WIN32
diff --git a/ares/ares_private.h b/ares/ares_private.h
index 0b4edf94e..976fa9f10 100644
--- a/ares/ares_private.h
+++ b/ares/ares_private.h
@@ -195,8 +195,8 @@ struct query {
void *arg;
/* Query status */
- int try;
- int server;
+ int try; /* Number of times we tried this query already. */
+ int server; /* Server this query has last been sent to. */
struct query_server_info *server_info; /* per-server state */
int using_tcp;
int error_status;
@@ -242,6 +242,7 @@ struct ares_channeldata {
int timeout; /* in milliseconds */
int tries;
int ndots;
+ int rotate; /* if true, all servers specified are used */
int udp_port;
int tcp_port;
int socket_send_buffer_size;
@@ -268,6 +269,9 @@ struct ares_channeldata {
just to draw the line somewhere. */
time_t last_timeout_processed;
+ /* Last server we sent a query to. */
+ int last_server;
+
/* Circular, doubly-linked list of queries, bucketed various ways.... */
/* All active queries in a single list: */
struct list_node all_queries;
diff --git a/ares/ares_process.c b/ares/ares_process.c
index 675af489d..b7f375e19 100644
--- a/ares/ares_process.c
+++ b/ares/ares_process.c
@@ -670,30 +670,33 @@ static void skip_server(ares_channel channel, struct query *query,
static void next_server(ares_channel channel, struct query *query,
struct timeval *now)
{
- /* Advance to the next server or try. */
- query->server++;
- for (; query->try < channel->tries; query->try++)
+ /* We need to try each server channel->tries times. We have channel->nservers
+ * servers to try. In total, we need to do channel->nservers * channel->tries
+ * attempts. Use query->try to remember how many times we already attempted
+ * this query. Use modular arithmetic to find the next server to try. */
+ while (++(query->try) < (channel->nservers * channel->tries))
{
- for (; query->server < channel->nservers; query->server++)
+ struct server_state *server;
+
+ /* Move on to the next server. */
+ query->server = (query->server + 1) % channel->nservers;
+ server = &channel->servers[query->server];
+
+ /* We don't want to use this server if (1) we decided this
+ * connection is broken, and thus about to be closed, (2)
+ * we've decided to skip this server because of earlier
+ * errors we encountered, or (3) we already sent this query
+ * over this exact connection.
+ */
+ if (!server->is_broken &&
+ !query->server_info[query->server].skip_server &&
+ !(query->using_tcp &&
+ (query->server_info[query->server].tcp_connection_generation ==
+ server->tcp_connection_generation)))
{
- struct server_state *server = &channel->servers[query->server];
- /* We don't want to use this server if (1) we decided this
- * connection is broken, and thus about to be closed, (2)
- * we've decided to skip this server because of earlier
- * errors we encountered, or (3) we already sent this query
- * over this exact connection.
- */
- if (!server->is_broken &&
- !query->server_info[query->server].skip_server &&
- !(query->using_tcp &&
- (query->server_info[query->server].tcp_connection_generation ==
- server->tcp_connection_generation)))
- {
- ares__send_query(channel, query, now);
- return;
- }
+ ares__send_query(channel, query, now);
+ return;
}
- query->server = 0;
/* You might think that with TCP we only need one try. However,
* even when using TCP, servers can time-out our connection just
@@ -702,6 +705,8 @@ static void next_server(ares_channel channel, struct query *query,
* tickle a bug that drops our request.
*/
}
+
+ /* If we are here, all attempts to perform query failed. */
end_query(channel, query, query->error_status, NULL, 0);
}
@@ -775,8 +780,7 @@ void ares__send_query(ares_channel channel, struct query *query,
}
query->timeout = *now;
ares__timeadd(&query->timeout,
- (query->try == 0) ? channel->timeout
- : channel->timeout << query->try / channel->nservers);
+ channel->timeout << (query->try / channel->nservers));
/* Keep track of queries bucketed by timeout, so we can process
* timeout events quickly.
*/
diff --git a/ares/ares_send.c b/ares/ares_send.c
index a5811d36d..be5478df2 100644
--- a/ares/ares_send.c
+++ b/ares/ares_send.c
@@ -95,7 +95,13 @@ void ares_send(ares_channel channel, const unsigned char *qbuf, int qlen,
/* Initialize query status. */
query->try = 0;
- query->server = 0;
+
+ /* Choose the server to send the query to. If rotation is enabled, keep track
+ * of the next server we want to use. */
+ query->server = channel->last_server;
+ if (channel->rotate == 1)
+ channel->last_server = (channel->last_server + 1) % channel->nservers;
+
for (i = 0; i < channel->nservers; i++)
{
query->server_info[i].skip_server = 0;