From 6c849321d7545bcba1eb014d53b55db561015e52 Mon Sep 17 00:00:00 2001 From: Yang Tse Date: Wed, 5 Oct 2011 20:16:16 +0200 Subject: curl tool: reviewed code moved to tool_*.[ch] files --- packages/Symbian/group/curl.mmp | 3 +- src/Makefile.inc | 6 +- src/Makefile.vc6 | 12 +- src/tool_operate.c | 2 +- src/tool_urlglob.c | 597 ++++++++++++++++++++++++++++++++++++++++ src/tool_urlglob.h | 72 +++++ src/urlglob.c | 570 -------------------------------------- src/urlglob.h | 69 ----- src/vc6curlsrc.dsp | 8 +- 9 files changed, 686 insertions(+), 653 deletions(-) create mode 100644 src/tool_urlglob.c create mode 100644 src/tool_urlglob.h delete mode 100644 src/urlglob.c delete mode 100644 src/urlglob.h diff --git a/packages/Symbian/group/curl.mmp b/packages/Symbian/group/curl.mmp index 0f548c6cf..22bb5c05e 100644 --- a/packages/Symbian/group/curl.mmp +++ b/packages/Symbian/group/curl.mmp @@ -8,7 +8,7 @@ UID 0x00000000 0xF0206442 SOURCEPATH ../../../src SOURCE \ - hugehelp.c urlglob.c writeout.c writeenv.c \ + hugehelp.c writeout.c writeenv.c \ getpass.c homedir.c curlutil.c xattr.c \ tool_binmode.c \ tool_bname.c \ @@ -39,6 +39,7 @@ SOURCE \ tool_parsecfg.c \ tool_setopt.c \ tool_sleep.c \ + tool_urlglob.c \ tool_vms.c SOURCEPATH ../../../lib diff --git a/src/Makefile.inc b/src/Makefile.inc index af6f6b47c..d2ee0e679 100644 --- a/src/Makefile.inc +++ b/src/Makefile.inc @@ -14,7 +14,7 @@ CURLX_ONES = $(top_srcdir)/lib/strtoofft.c \ $(top_srcdir)/lib/rawstr.c \ $(top_srcdir)/lib/nonblock.c -CURL_CFILES = hugehelp.c urlglob.c writeout.c writeenv.c \ +CURL_CFILES = hugehelp.c writeout.c writeenv.c \ getpass.c homedir.c curlutil.c xattr.c \ tool_binmode.c \ tool_bname.c \ @@ -45,10 +45,11 @@ CURL_CFILES = hugehelp.c urlglob.c writeout.c writeenv.c \ tool_parsecfg.c \ tool_setopt.c \ tool_sleep.c \ + tool_urlglob.c \ tool_vms.c CURL_HFILES = hugehelp.h setup.h config-win32.h config-mac.h \ - config-riscos.h urlglob.h version.h xattr.h \ + config-riscos.h version.h xattr.h \ writeout.h writeenv.h getpass.h homedir.h curlutil.h \ tool_binmode.h \ tool_bname.h \ @@ -80,6 +81,7 @@ CURL_HFILES = hugehelp.h setup.h config-win32.h config-mac.h \ tool_sdecls.h \ tool_setopt.h \ tool_sleep.h \ + tool_urlglob.h \ tool_vms.h curl_SOURCES = $(CURL_CFILES) $(CURLX_ONES) $(CURL_HFILES) diff --git a/src/Makefile.vc6 b/src/Makefile.vc6 index 5cec77a9c..fe03afe52 100644 --- a/src/Makefile.vc6 +++ b/src/Makefile.vc6 @@ -169,8 +169,8 @@ RELEASE_OBJS= \ tool_parsecfgr.obj \ tool_setoptr.obj \ tool_sleepr.obj \ + tool_urlglobr.obj \ tool_vmsr.obj \ - urlglobr.obj \ writeoutr.obj \ xattrr.obj \ curlr.res @@ -212,8 +212,8 @@ DEBUG_OBJS= \ tool_parsecfgd.obj \ tool_setoptd.obj \ tool_sleepd.obj \ + tool_urlglobd.obj \ tool_vmsd.obj \ - urlglobd.obj \ writeoutd.obj \ xattrd.obj \ curld.res @@ -342,8 +342,6 @@ hugehelpr.obj: hugehelp.c $(CCR) $(CFLAGS) /Zm200 /Fo"$@" hugehelp.c writeoutr.obj: writeout.c $(CCR) $(CFLAGS) /Fo"$@" writeout.c -urlglobr.obj: urlglob.c - $(CCR) $(CFLAGS) /Fo"$@" urlglob.c getpassr.obj: getpass.c $(CCR) $(CFLAGS) /Fo"$@" getpass.c homedirr.obj: homedir.c @@ -414,6 +412,8 @@ tool_setoptr.obj: tool_setopt.c $(CCR) $(CFLAGS) /Fo"$@" tool_setopt.c tool_sleepr.obj: tool_sleep.c $(CCR) $(CFLAGS) /Fo"$@" tool_sleep.c +tool_urlglobr.obj: tool_urlglob.c + $(CCR) $(CFLAGS) /Fo"$@" tool_urlglob.c tool_vmsr.obj: tool_vms.c $(CCR) $(CFLAGS) /Fo"$@" tool_vms.c xattrr.obj: xattr.c @@ -426,8 +426,6 @@ hugehelpd.obj: hugehelp.c $(CCD) $(CFLAGS) /Zm200 /Fo"$@" hugehelp.c writeoutd.obj: writeout.c $(CCD) $(CFLAGS) /Fo"$@" writeout.c -urlglobd.obj: urlglob.c - $(CCD) $(CFLAGS) /Fo"$@" urlglob.c getpassd.obj: getpass.c $(CCD) $(CFLAGS) /Fo"$@" getpass.c homedird.obj: homedir.c @@ -498,6 +496,8 @@ tool_setoptd.obj: tool_setopt.c $(CCD) $(CFLAGS) /Fo"$@" tool_setopt.c tool_sleepd.obj: tool_sleep.c $(CCD) $(CFLAGS) /Fo"$@" tool_sleep.c +tool_urlglobd.obj: tool_urlglob.c + $(CCD) $(CFLAGS) /Fo"$@" tool_urlglob.c tool_vmsd.obj: tool_vms.c $(CCD) $(CFLAGS) /Fo"$@" tool_vms.c xattrd.obj: xattr.c diff --git a/src/tool_operate.c b/src/tool_operate.c index d4671eb39..b7da80a71 100644 --- a/src/tool_operate.c +++ b/src/tool_operate.c @@ -49,7 +49,6 @@ #include "curlutil.h" #include "homedir.h" -#include "urlglob.h" #include "writeout.h" #include "xattr.h" @@ -79,6 +78,7 @@ #include "tool_parsecfg.h" #include "tool_setopt.h" #include "tool_sleep.h" +#include "tool_urlglob.h" #include "memdebug.h" /* keep this as LAST include */ diff --git a/src/tool_urlglob.c b/src/tool_urlglob.c new file mode 100644 index 000000000..108fc3987 --- /dev/null +++ b/src/tool_urlglob.c @@ -0,0 +1,597 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +#include "setup.h" + +#include + +#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */ +#include + +#include "tool_urlglob.h" +#include "tool_vms.h" + +#include "memdebug.h" /* keep this as LAST include */ + +typedef enum { + GLOB_OK, + GLOB_NO_MEM, + GLOB_ERROR +} GlobCode; + +/* + * glob_word() + * + * Input a full globbed string, set the forth argument to the amount of + * strings we get out of this. Return GlobCode. + */ +static GlobCode glob_word(URLGlob *, /* object anchor */ + char *, /* globbed string */ + size_t, /* position */ + int *); /* returned number of strings */ + +static GlobCode glob_set(URLGlob *glob, char *pattern, + size_t pos, int *amount) +{ + /* processes a set expression with the point behind the opening '{' + ','-separated elements are collected until the next closing '}' + */ + URLPattern *pat; + GlobCode res; + bool done = FALSE; + char* buf = glob->glob_buffer; + + pat = &glob->pattern[glob->size / 2]; + /* patterns 0,1,2,... correspond to size=1,3,5,... */ + pat->type = UPTSet; + pat->content.Set.size = 0; + pat->content.Set.ptr_s = 0; + pat->content.Set.elements = NULL; + + ++glob->size; + + while(!done) { + switch (*pattern) { + case '\0': /* URL ended while set was still open */ + snprintf(glob->errormsg, sizeof(glob->errormsg), + "unmatched brace at pos %zu\n", pos); + return GLOB_ERROR; + + case '{': + case '[': /* no nested expressions at this time */ + snprintf(glob->errormsg, sizeof(glob->errormsg), + "nested braces not supported at pos %zu\n", pos); + return GLOB_ERROR; + + case ',': + case '}': /* set element completed */ + *buf = '\0'; + if(pat->content.Set.elements) { + char **new_arr = realloc(pat->content.Set.elements, + (pat->content.Set.size + 1) * sizeof(char*)); + if(!new_arr) { + short elem; + for(elem = 0; elem < pat->content.Set.size; elem++) + Curl_safefree(pat->content.Set.elements[elem]); + Curl_safefree(pat->content.Set.elements); + pat->content.Set.ptr_s = 0; + pat->content.Set.size = 0; + } + pat->content.Set.elements = new_arr; + } + else + pat->content.Set.elements = malloc(sizeof(char*)); + if(!pat->content.Set.elements) { + snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); + return GLOB_NO_MEM; + } + pat->content.Set.elements[pat->content.Set.size] = + strdup(glob->glob_buffer); + if(!pat->content.Set.elements[pat->content.Set.size]) { + short elem; + for(elem = 0; elem < pat->content.Set.size; elem++) + Curl_safefree(pat->content.Set.elements[elem]); + Curl_safefree(pat->content.Set.elements); + pat->content.Set.ptr_s = 0; + pat->content.Set.size = 0; + snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); + return GLOB_NO_MEM; + } + ++pat->content.Set.size; + + if(*pattern == '}') { + /* entire set pattern completed */ + int wordamount; + + /* always check for a literal (may be "") between patterns */ + res = glob_word(glob, ++pattern, ++pos, &wordamount); + if(res) { + short elem; + for(elem = 0; elem < pat->content.Set.size; elem++) + Curl_safefree(pat->content.Set.elements[elem]); + Curl_safefree(pat->content.Set.elements); + pat->content.Set.ptr_s = 0; + pat->content.Set.size = 0; + return res; + } + + *amount = pat->content.Set.size * wordamount; + + done = TRUE; + continue; + } + + buf = glob->glob_buffer; + ++pattern; + ++pos; + break; + + case ']': /* illegal closing bracket */ + snprintf(glob->errormsg, sizeof(glob->errormsg), + "illegal pattern at pos %zu\n", pos); + return GLOB_ERROR; + + case '\\': /* escaped character, skip '\' */ + if(pattern[1]) { + ++pattern; + ++pos; + } + /* intentional fallthrough */ + default: + *buf++ = *pattern++; /* copy character to set element */ + ++pos; + } + } + return GLOB_OK; +} + +static GlobCode glob_range(URLGlob *glob, char *pattern, + size_t pos, int *amount) +{ + /* processes a range expression with the point behind the opening '[' + - char range: e.g. "a-z]", "B-Q]" + - num range: e.g. "0-9]", "17-2000]" + - num range with leading zeros: e.g. "001-999]" + expression is checked for well-formedness and collected until the next ']' + */ + URLPattern *pat; + char *c; + char sep; + char sep2; + int step; + int rc; + GlobCode res; + int wordamount = 1; + + pat = &glob->pattern[glob->size / 2]; + /* patterns 0,1,2,... correspond to size=1,3,5,... */ + ++glob->size; + + if(ISALPHA(*pattern)) { + /* character range detected */ + char min_c; + char max_c; + + pat->type = UPTCharRange; + + rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2); + + if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) { + /* the pattern is not well-formed */ + snprintf(glob->errormsg, sizeof(glob->errormsg), + "error: bad range specification after pos %zu\n", pos); + return GLOB_ERROR; + } + + /* check the (first) separating character */ + if((sep != ']') && (sep != ':')) { + snprintf(glob->errormsg, sizeof(glob->errormsg), + "error: unsupported character (%c) after range at pos %zu\n", + sep, pos); + return GLOB_ERROR; + } + + /* if there was a ":[num]" thing, use that as step or else use 1 */ + pat->content.CharRange.step = + ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1; + + pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; + pat->content.CharRange.max_c = max_c; + } + else if(ISDIGIT(*pattern)) { + /* numeric range detected */ + int min_n; + int max_n; + + pat->type = UPTNumRange; + pat->content.NumRange.padlength = 0; + + rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2); + + if((rc < 2) || (min_n > max_n)) { + /* the pattern is not well-formed */ + snprintf(glob->errormsg, sizeof(glob->errormsg), + "error: bad range specification after pos %zu\n", pos); + return GLOB_ERROR; + } + pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; + pat->content.NumRange.max_n = max_n; + + /* if there was a ":[num]" thing, use that as step or else use 1 */ + pat->content.NumRange.step = + ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1; + + if(*pattern == '0') { + /* leading zero specified */ + c = pattern; + while(ISDIGIT(*c)) { + c++; + ++pat->content.NumRange.padlength; /* padding length is set for all + instances of this pattern */ + } + } + } + else { + snprintf(glob->errormsg, sizeof(glob->errormsg), + "illegal character in range specification at pos %zu\n", pos); + return GLOB_ERROR; + } + + c = (char*)strchr(pattern, ']'); /* continue after next ']' */ + if(c) + c++; + else { + snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); + return GLOB_ERROR; /* missing ']' */ + } + + /* always check for a literal (may be "") between patterns */ + + res = glob_word(glob, c, pos + (c - pattern), &wordamount); + if(res == GLOB_ERROR) { + wordamount = 1; + res = GLOB_OK; + } + + if(!res) { + if(pat->type == UPTCharRange) + *amount = wordamount * (pat->content.CharRange.max_c - + pat->content.CharRange.min_c + 1); + else + *amount = wordamount * (pat->content.NumRange.max_n - + pat->content.NumRange.min_n + 1); + } + + return res; /* GLOB_OK or GLOB_NO_MEM */ +} + +static GlobCode glob_word(URLGlob *glob, char *pattern, + size_t pos, int *amount) +{ + /* processes a literal string component of a URL + special characters '{' and '[' branch to set/range processing functions + */ + char* buf = glob->glob_buffer; + size_t litindex; + GlobCode res = GLOB_OK; + + *amount = 1; /* default is one single string */ + + while(*pattern != '\0' && *pattern != '{' && *pattern != '[') { + if(*pattern == '}' || *pattern == ']') { + snprintf(glob->errormsg, sizeof(glob->errormsg), + "unmatched close brace/bracket at pos %zu\n", pos); + return GLOB_ERROR; + } + + /* only allow \ to escape known "special letters" */ + if(*pattern == '\\' && + (*(pattern+1) == '{' || *(pattern+1) == '[' || + *(pattern+1) == '}' || *(pattern+1) == ']') ) { + + /* escape character, skip '\' */ + ++pattern; + ++pos; + } + *buf++ = *pattern++; /* copy character to literal */ + ++pos; + } + *buf = '\0'; + litindex = glob->size / 2; + /* literals 0,1,2,... correspond to size=0,2,4,... */ + glob->literal[litindex] = strdup(glob->glob_buffer); + if(!glob->literal[litindex]) { + snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); + return GLOB_NO_MEM; + } + ++glob->size; + + switch (*pattern) { + case '\0': + /* singular URL processed */ + break; + + case '{': + /* process set pattern */ + res = glob_set(glob, ++pattern, ++pos, amount); + break; + + case '[': + /* process range pattern */ + res = glob_range(glob, ++pattern, ++pos, amount); + break; + } + + if(res) + Curl_safefree(glob->literal[litindex]); + + return res; +} + +int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error) +{ + /* + * We can deal with any-size, just make a buffer with the same length + * as the specified URL! + */ + URLGlob *glob_expand; + int amount; + char *glob_buffer; + GlobCode res; + + *glob = NULL; + + glob_buffer = malloc(strlen(url) + 1); + if(!glob_buffer) + return CURLE_OUT_OF_MEMORY; + + glob_expand = calloc(1, sizeof(URLGlob)); + if(!glob_expand) { + Curl_safefree(glob_buffer); + return CURLE_OUT_OF_MEMORY; + } + glob_expand->size = 0; + glob_expand->urllen = strlen(url); + glob_expand->glob_buffer = glob_buffer; + glob_expand->beenhere = 0; + + res = glob_word(glob_expand, url, 1, &amount); + if(!res) + *urlnum = amount; + else { + if(error && glob_expand->errormsg[0]) { + /* send error description to the error-stream */ + fprintf(error, "curl: (%d) [globbing] %s", + (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT, + glob_expand->errormsg); + } + /* it failed, we cleanup */ + Curl_safefree(glob_buffer); + Curl_safefree(glob_expand); + *urlnum = 1; + return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; + } + + *glob = glob_expand; + return CURLE_OK; +} + +void glob_cleanup(URLGlob* glob) +{ + size_t i; + int elem; + + for(i = glob->size - 1; i < glob->size; --i) { + if(!(i & 1)) { /* even indexes contain literals */ + Curl_safefree(glob->literal[i/2]); + } + else { /* odd indexes contain sets or ranges */ + if((glob->pattern[i/2].type == UPTSet) && + (glob->pattern[i/2].content.Set.elements)) { + for(elem = glob->pattern[i/2].content.Set.size - 1; + elem >= 0; + --elem) { + Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]); + } + Curl_safefree(glob->pattern[i/2].content.Set.elements); + } + } + } + Curl_safefree(glob->glob_buffer); + Curl_safefree(glob); +} + +char *glob_next_url(URLGlob *glob) +{ + URLPattern *pat; + char *lit; + size_t i; + size_t j; + size_t len; + size_t buflen = glob->urllen + 1; + char *buf = glob->glob_buffer; + + if(!glob->beenhere) + glob->beenhere = 1; + else { + bool carry = TRUE; + + /* implement a counter over the index ranges of all patterns, + starting with the rightmost pattern */ + for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) { + carry = FALSE; + pat = &glob->pattern[i]; + switch (pat->type) { + case UPTSet: + if((pat->content.Set.elements) && + (++pat->content.Set.ptr_s == pat->content.Set.size)) { + pat->content.Set.ptr_s = 0; + carry = TRUE; + } + break; + case UPTCharRange: + pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step + + (int)((unsigned char)pat->content.CharRange.ptr_c)); + if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { + pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; + carry = TRUE; + } + break; + case UPTNumRange: + pat->content.NumRange.ptr_n += pat->content.NumRange.step; + if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { + pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; + carry = TRUE; + } + break; + default: + printf("internal error: invalid pattern type (%d)\n", (int)pat->type); + exit (CURLE_FAILED_INIT); + } + } + if(carry) /* first pattern ptr has run into overflow, done! */ + return NULL; + } + + for(j = 0; j < glob->size; ++j) { + if(!(j&1)) { /* every other term (j even) is a literal */ + lit = glob->literal[j/2]; + len = snprintf(buf, buflen, "%s", lit); + buf += len; + buflen -= len; + } + else { /* the rest (i odd) are patterns */ + pat = &glob->pattern[j/2]; + switch(pat->type) { + case UPTSet: + if(pat->content.Set.elements) { + len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); + snprintf(buf, buflen, "%s", + pat->content.Set.elements[pat->content.Set.ptr_s]); + buf += len; + buflen -= len; + } + break; + case UPTCharRange: + *buf++ = pat->content.CharRange.ptr_c; + break; + case UPTNumRange: + len = snprintf(buf, buflen, "%0*d", + pat->content.NumRange.padlength, + pat->content.NumRange.ptr_n); + buf += len; + buflen -= len; + break; + default: + printf("internal error: invalid pattern type (%d)\n", (int)pat->type); + exit (CURLE_FAILED_INIT); + } + } + } + *buf = '\0'; + return strdup(glob->glob_buffer); +} + +char *glob_match_url(char *filename, URLGlob *glob) +{ + char *target; + size_t allocsize; + char numbuf[18]; + char *appendthis = NULL; + size_t appendlen = 0; + size_t stringlen = 0; + + /* We cannot use the glob_buffer for storage here since the filename may + * be longer than the URL we use. We allocate a good start size, then + * we need to realloc in case of need. + */ + allocsize = strlen(filename) + 1; /* make it at least one byte to store the + trailing zero */ + target = malloc(allocsize); + if(!target) + return NULL; /* major failure */ + + while(*filename) { + if(*filename == '#' && ISDIGIT(filename[1])) { + unsigned long i; + char *ptr = filename; + unsigned long num = strtoul(&filename[1], &filename, 10); + i = num - 1UL; + + if(num && (i <= glob->size / 2)) { + URLPattern pat = glob->pattern[i]; + switch (pat.type) { + case UPTSet: + if(pat.content.Set.elements) { + appendthis = pat.content.Set.elements[pat.content.Set.ptr_s]; + appendlen = + strlen(pat.content.Set.elements[pat.content.Set.ptr_s]); + } + break; + case UPTCharRange: + numbuf[0] = pat.content.CharRange.ptr_c; + numbuf[1] = 0; + appendthis = numbuf; + appendlen = 1; + break; + case UPTNumRange: + snprintf(numbuf, sizeof(numbuf), "%0*d", + pat.content.NumRange.padlength, + pat.content.NumRange.ptr_n); + appendthis = numbuf; + appendlen = strlen(numbuf); + break; + default: + printf("internal error: invalid pattern type (%d)\n", + (int)pat.type); + Curl_safefree(target); + return NULL; + } + } + else { + /* #[num] out of range, use the #[num] in the output */ + filename = ptr; + appendthis = filename++; + appendlen = 1; + } + } + else { + appendthis = filename++; + appendlen = 1; + } + if(appendlen + stringlen >= allocsize) { + char *newstr; + /* we append a single byte to allow for the trailing byte to be appended + at the end of this function outside the while() loop */ + allocsize = (appendlen + stringlen) * 2; + newstr = realloc(target, allocsize + 1); + if(!newstr) { + Curl_safefree(target); + return NULL; + } + target = newstr; + } + memcpy(&target[stringlen], appendthis, appendlen); + stringlen += appendlen; + } + target[stringlen]= '\0'; + return target; +} diff --git a/src/tool_urlglob.h b/src/tool_urlglob.h new file mode 100644 index 000000000..cf54a9b16 --- /dev/null +++ b/src/tool_urlglob.h @@ -0,0 +1,72 @@ +#ifndef HEADER_CURL_TOOL_URLGLOB_H +#define HEADER_CURL_TOOL_URLGLOB_H +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +#include "setup.h" + +typedef enum { + UPTSet = 1, + UPTCharRange, + UPTNumRange +} URLPatternType; + +typedef struct { + URLPatternType type; + union { + struct { + char **elements; + short size; + short ptr_s; + } Set; + struct { + char min_c; + char max_c; + char ptr_c; + int step; + } CharRange; + struct { + int min_n; + int max_n; + short padlength; + int ptr_n; + int step; + } NumRange ; + } content; +} URLPattern; + +typedef struct { + char *literal[10]; + URLPattern pattern[9]; + size_t size; + size_t urllen; + char *glob_buffer; + char beenhere; + char errormsg[80]; /* error message buffer */ +} URLGlob; + +int glob_url(URLGlob**, char*, int *, FILE *); +char* glob_next_url(URLGlob*); +char* glob_match_url(char*, URLGlob *); +void glob_cleanup(URLGlob* glob); + +#endif /* HEADER_CURL_TOOL_URLGLOB_H */ + diff --git a/src/urlglob.c b/src/urlglob.c deleted file mode 100644 index aa870d833..000000000 --- a/src/urlglob.c +++ /dev/null @@ -1,570 +0,0 @@ -/*************************************************************************** - * _ _ ____ _ - * Project ___| | | | _ \| | - * / __| | | | |_) | | - * | (__| |_| | _ <| |___ - * \___|\___/|_| \_\_____| - * - * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. - * - * This software is licensed as described in the file COPYING, which - * you should have received as part of this distribution. The terms - * are also available at http://curl.haxx.se/docs/copyright.html. - * - * You may opt to use, copy, modify, merge, publish, distribute and/or sell - * copies of the Software, and permit persons to whom the Software is - * furnished to do so, under the terms of the COPYING file. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ***************************************************************************/ -#include "setup.h" - -#include - -#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */ -#include - -#include "urlglob.h" -#include "tool_vms.h" - -#include "memdebug.h" /* keep this as LAST include */ - -typedef enum { - GLOB_OK, - GLOB_ERROR -} GlobCode; - -/* - * glob_word() - * - * Input a full globbed string, set the forth argument to the amount of - * strings we get out of this. Return GlobCode. - */ -static GlobCode glob_word(URLGlob *, /* object anchor */ - char *, /* globbed string */ - size_t, /* position */ - int *); /* returned number of strings */ - -static GlobCode glob_set(URLGlob *glob, char *pattern, - size_t pos, int *amount) -{ - /* processes a set expression with the point behind the opening '{' - ','-separated elements are collected until the next closing '}' - */ - bool done = FALSE; - char* buf = glob->glob_buffer; - URLPattern *pat; - - pat = (URLPattern*)&glob->pattern[glob->size / 2]; - /* patterns 0,1,2,... correspond to size=1,3,5,... */ - pat->type = UPTSet; - pat->content.Set.size = 0; - pat->content.Set.ptr_s = 0; - pat->content.Set.elements = NULL; - - ++glob->size; - - while(!done) { - switch (*pattern) { - case '\0': /* URL ended while set was still open */ - snprintf(glob->errormsg, sizeof(glob->errormsg), - "unmatched brace at pos %zu\n", pos); - return GLOB_ERROR; - - case '{': - case '[': /* no nested expressions at this time */ - snprintf(glob->errormsg, sizeof(glob->errormsg), - "nested braces not supported at pos %zu\n", pos); - return GLOB_ERROR; - - case ',': - case '}': /* set element completed */ - *buf = '\0'; - if(pat->content.Set.elements) { - char **new_arr = realloc(pat->content.Set.elements, - (pat->content.Set.size + 1) * sizeof(char*)); - if(!new_arr) { - short elem; - for(elem = 0; elem < pat->content.Set.size; elem++) - Curl_safefree(pat->content.Set.elements[elem]); - Curl_safefree(pat->content.Set.elements); - pat->content.Set.ptr_s = 0; - pat->content.Set.size = 0; - } - pat->content.Set.elements = new_arr; - } - else - pat->content.Set.elements = malloc(sizeof(char*)); - if(!pat->content.Set.elements) { - snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory"); - return GLOB_ERROR; - } - pat->content.Set.elements[pat->content.Set.size] = - strdup(glob->glob_buffer); - if(!pat->content.Set.elements[pat->content.Set.size]) { - short elem; - for(elem = 0; elem < pat->content.Set.size; elem++) - Curl_safefree(pat->content.Set.elements[elem]); - Curl_safefree(pat->content.Set.elements); - pat->content.Set.ptr_s = 0; - pat->content.Set.size = 0; - snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory"); - return GLOB_ERROR; - } - ++pat->content.Set.size; - - if(*pattern == '}') { - /* entire set pattern completed */ - int wordamount; - - /* always check for a literal (may be "") between patterns */ - if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount)) - return GLOB_ERROR; - *amount = pat->content.Set.size * wordamount; - - done = TRUE; - continue; - } - - buf = glob->glob_buffer; - ++pattern; - ++pos; - break; - - case ']': /* illegal closing bracket */ - snprintf(glob->errormsg, sizeof(glob->errormsg), - "illegal pattern at pos %zu\n", pos); - return GLOB_ERROR; - - case '\\': /* escaped character, skip '\' */ - if(pattern[1]) { - ++pattern; - ++pos; - } - /* intentional fallthrough */ - default: - *buf++ = *pattern++; /* copy character to set element */ - ++pos; - } - } - return GLOB_OK; -} - -static GlobCode glob_range(URLGlob *glob, char *pattern, - size_t pos, int *amount) -{ - /* processes a range expression with the point behind the opening '[' - - char range: e.g. "a-z]", "B-Q]" - - num range: e.g. "0-9]", "17-2000]" - - num range with leading zeros: e.g. "001-999]" - expression is checked for well-formedness and collected until the next ']' - */ - URLPattern *pat; - char *c; - int wordamount=1; - char sep; - char sep2; - int step; - int rc; - - pat = (URLPattern*)&glob->pattern[glob->size / 2]; - /* patterns 0,1,2,... correspond to size=1,3,5,... */ - ++glob->size; - - if(ISALPHA(*pattern)) { /* character range detected */ - char min_c; - char max_c; - - pat->type = UPTCharRange; - rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2); - if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) { - /* the pattern is not well-formed */ - snprintf(glob->errormsg, sizeof(glob->errormsg), - "error: bad range specification after pos %zu\n", pos); - return GLOB_ERROR; - } - - /* check the (first) separating character */ - if((sep != ']') && (sep != ':')) { - snprintf(glob->errormsg, sizeof(glob->errormsg), - "error: unsupported character (%c) after range at pos %zu\n", - sep, pos); - return GLOB_ERROR; - } - - /* if there was a ":[num]" thing, use that as step or else use 1 */ - pat->content.CharRange.step = - ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; - - pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; - pat->content.CharRange.max_c = max_c; - } - else if(ISDIGIT(*pattern)) { /* numeric range detected */ - int min_n; - int max_n; - - pat->type = UPTNumRange; - pat->content.NumRange.padlength = 0; - - rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2); - - if((rc < 2) || (min_n > max_n)) { - /* the pattern is not well-formed */ - snprintf(glob->errormsg, sizeof(glob->errormsg), - "error: bad range specification after pos %zu\n", pos); - return GLOB_ERROR; - } - pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; - pat->content.NumRange.max_n = max_n; - - /* if there was a ":[num]" thing, use that as step or else use 1 */ - pat->content.NumRange.step = - ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; - - if(*pattern == '0') { /* leading zero specified */ - c = pattern; - while(ISDIGIT(*c)) { - c++; - ++pat->content.NumRange.padlength; /* padding length is set for all - instances of this pattern */ - } - } - - } - else { - snprintf(glob->errormsg, sizeof(glob->errormsg), - "illegal character in range specification at pos %zu\n", pos); - return GLOB_ERROR; - } - - c = (char*)strchr(pattern, ']'); /* continue after next ']' */ - if(c) - c++; - else { - snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); - return GLOB_ERROR; /* missing ']' */ - } - - /* always check for a literal (may be "") between patterns */ - - if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount)) - wordamount = 1; - - if(pat->type == UPTCharRange) - *amount = (pat->content.CharRange.max_c - - pat->content.CharRange.min_c + 1) * - wordamount; - else - *amount = (pat->content.NumRange.max_n - - pat->content.NumRange.min_n + 1) * wordamount; - - return GLOB_OK; -} - -static GlobCode glob_word(URLGlob *glob, char *pattern, - size_t pos, int *amount) -{ - /* processes a literal string component of a URL - special characters '{' and '[' branch to set/range processing functions - */ - char* buf = glob->glob_buffer; - size_t litindex; - GlobCode res = GLOB_OK; - - *amount = 1; /* default is one single string */ - - while(*pattern != '\0' && *pattern != '{' && *pattern != '[') { - if(*pattern == '}' || *pattern == ']') { - snprintf(glob->errormsg, sizeof(glob->errormsg), - "unmatched close brace/bracket at pos %zu\n", pos); - return GLOB_ERROR; - } - - /* only allow \ to escape known "special letters" */ - if(*pattern == '\\' && - (*(pattern+1) == '{' || *(pattern+1) == '[' || - *(pattern+1) == '}' || *(pattern+1) == ']') ) { - - /* escape character, skip '\' */ - ++pattern; - ++pos; - } - *buf++ = *pattern++; /* copy character to literal */ - ++pos; - } - *buf = '\0'; - litindex = glob->size / 2; - /* literals 0,1,2,... correspond to size=0,2,4,... */ - glob->literal[litindex] = strdup(glob->glob_buffer); - if(!glob->literal[litindex]) - return GLOB_ERROR; - ++glob->size; - - switch (*pattern) { - case '\0': - break; /* singular URL processed */ - - case '{': - /* process set pattern */ - res = glob_set(glob, ++pattern, ++pos, amount); - break; - - case '[': - /* process range pattern */ - res= glob_range(glob, ++pattern, ++pos, amount); - break; - } - - if(GLOB_OK != res) - /* free that strdup'ed string again */ - Curl_safefree(glob->literal[litindex]); - - return res; /* something got wrong */ -} - -int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error) -{ - /* - * We can deal with any-size, just make a buffer with the same length - * as the specified URL! - */ - URLGlob *glob_expand; - int amount; - char *glob_buffer = malloc(strlen(url)+1); - - *glob = NULL; - if(NULL == glob_buffer) - return CURLE_OUT_OF_MEMORY; - - glob_expand = calloc(1, sizeof(URLGlob)); - if(NULL == glob_expand) { - Curl_safefree(glob_buffer); - return CURLE_OUT_OF_MEMORY; - } - glob_expand->size = 0; - glob_expand->urllen = strlen(url); - glob_expand->glob_buffer = glob_buffer; - glob_expand->beenhere=0; - if(GLOB_OK == glob_word(glob_expand, url, 1, &amount)) - *urlnum = amount; - else { - if(error && glob_expand->errormsg[0]) { - /* send error description to the error-stream */ - fprintf(error, "curl: (%d) [globbing] %s", - CURLE_URL_MALFORMAT, glob_expand->errormsg); - } - /* it failed, we cleanup */ - Curl_safefree(glob_buffer); - Curl_safefree(glob_expand); - *urlnum = 1; - return CURLE_URL_MALFORMAT; - } - - *glob = glob_expand; - return CURLE_OK; -} - -void glob_cleanup(URLGlob* glob) -{ - size_t i; - int elem; - - for(i = glob->size - 1; i < glob->size; --i) { - if(!(i & 1)) { /* even indexes contain literals */ - Curl_safefree(glob->literal[i/2]); - } - else { /* odd indexes contain sets or ranges */ - if((glob->pattern[i/2].type == UPTSet) && - (glob->pattern[i/2].content.Set.elements)) { - for(elem = glob->pattern[i/2].content.Set.size - 1; - elem >= 0; - --elem) { - if(glob->pattern[i/2].content.Set.elements[elem]) - Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]); - } - Curl_safefree(glob->pattern[i/2].content.Set.elements); - } - } - } - Curl_safefree(glob->glob_buffer); - Curl_safefree(glob); -} - -char *glob_next_url(URLGlob *glob) -{ - char *buf = glob->glob_buffer; - URLPattern *pat; - char *lit; - size_t i; - size_t j; - size_t buflen = glob->urllen+1; - size_t len; - - if(!glob->beenhere) - glob->beenhere = 1; - else { - bool carry = TRUE; - - /* implement a counter over the index ranges of all patterns, - starting with the rightmost pattern */ - for(i = glob->size / 2 - 1; carry && i < glob->size; --i) { - carry = FALSE; - pat = &glob->pattern[i]; - switch (pat->type) { - case UPTSet: - if((pat->content.Set.elements) && - (++pat->content.Set.ptr_s == pat->content.Set.size)) { - pat->content.Set.ptr_s = 0; - carry = TRUE; - } - break; - case UPTCharRange: - pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step + - (int)((unsigned char)pat->content.CharRange.ptr_c)); - if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { - pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; - carry = TRUE; - } - break; - case UPTNumRange: - pat->content.NumRange.ptr_n += pat->content.NumRange.step; - if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { - pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; - carry = TRUE; - } - break; - default: - printf("internal error: invalid pattern type (%d)\n", (int)pat->type); - exit (CURLE_FAILED_INIT); - } - } - if(carry) /* first pattern ptr has run into overflow, done! */ - return NULL; - } - - for(j = 0; j < glob->size; ++j) { - if(!(j&1)) { /* every other term (j even) is a literal */ - lit = glob->literal[j/2]; - len = snprintf(buf, buflen, "%s", lit); - buf += len; - buflen -= len; - } - else { /* the rest (i odd) are patterns */ - pat = &glob->pattern[j/2]; - switch(pat->type) { - case UPTSet: - if(pat->content.Set.elements) { - len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); - snprintf(buf, buflen, "%s", - pat->content.Set.elements[pat->content.Set.ptr_s]); - buf += len; - buflen -= len; - } - break; - case UPTCharRange: - *buf++ = pat->content.CharRange.ptr_c; - break; - case UPTNumRange: - len = snprintf(buf, buflen, "%0*d", - pat->content.NumRange.padlength, - pat->content.NumRange.ptr_n); - buf += len; - buflen -= len; - break; - default: - printf("internal error: invalid pattern type (%d)\n", (int)pat->type); - exit (CURLE_FAILED_INIT); - } - } - } - *buf = '\0'; - return strdup(glob->glob_buffer); -} - -char *glob_match_url(char *filename, URLGlob *glob) -{ - char *target; - size_t allocsize; - size_t stringlen=0; - char numbuf[18]; - char *appendthis = NULL; - size_t appendlen = 0; - - /* We cannot use the glob_buffer for storage here since the filename may - * be longer than the URL we use. We allocate a good start size, then - * we need to realloc in case of need. - */ - allocsize=strlen(filename)+1; /* make it at least one byte to store the - trailing zero */ - target = malloc(allocsize); - if(NULL == target) - return NULL; /* major failure */ - - while(*filename) { - if(*filename == '#' && ISDIGIT(filename[1])) { - unsigned long i; - char *ptr = filename; - unsigned long num = strtoul(&filename[1], &filename, 10); - i = num-1; - - if(num && (i <= glob->size / 2)) { - URLPattern pat = glob->pattern[i]; - switch (pat.type) { - case UPTSet: - if(pat.content.Set.elements) { - appendthis = pat.content.Set.elements[pat.content.Set.ptr_s]; - appendlen = - strlen(pat.content.Set.elements[pat.content.Set.ptr_s]); - } - break; - case UPTCharRange: - numbuf[0]=pat.content.CharRange.ptr_c; - numbuf[1]=0; - appendthis=numbuf; - appendlen=1; - break; - case UPTNumRange: - snprintf(numbuf, sizeof(numbuf), "%0*d", - pat.content.NumRange.padlength, - pat.content.NumRange.ptr_n); - appendthis = numbuf; - appendlen = strlen(numbuf); - break; - default: - printf("internal error: invalid pattern type (%d)\n", - (int)pat.type); - Curl_safefree(target); - return NULL; - } - } - else { - /* #[num] out of range, use the #[num] in the output */ - filename = ptr; - appendthis=filename++; - appendlen=1; - } - } - else { - appendthis=filename++; - appendlen=1; - } - if(appendlen + stringlen >= allocsize) { - char *newstr; - /* we append a single byte to allow for the trailing byte to be appended - at the end of this function outside the while() loop */ - allocsize = (appendlen + stringlen)*2; - newstr=realloc(target, allocsize + 1); - if(NULL ==newstr) { - Curl_safefree(target); - return NULL; - } - target=newstr; - } - memcpy(&target[stringlen], appendthis, appendlen); - stringlen += appendlen; - } - target[stringlen]= '\0'; - return target; -} diff --git a/src/urlglob.h b/src/urlglob.h deleted file mode 100644 index 44681ea90..000000000 --- a/src/urlglob.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef HEADER_CURL_URLGLOB_H -#define HEADER_CURL_URLGLOB_H -/*************************************************************************** - * _ _ ____ _ - * Project ___| | | | _ \| | - * / __| | | | |_) | | - * | (__| |_| | _ <| |___ - * \___|\___/|_| \_\_____| - * - * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. - * - * This software is licensed as described in the file COPYING, which - * you should have received as part of this distribution. The terms - * are also available at http://curl.haxx.se/docs/copyright.html. - * - * You may opt to use, copy, modify, merge, publish, distribute and/or sell - * copies of the Software, and permit persons to whom the Software is - * furnished to do so, under the terms of the COPYING file. - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY - * KIND, either express or implied. - * - ***************************************************************************/ -#include "setup.h" - -typedef enum { - UPTSet=1, - UPTCharRange, - UPTNumRange -} URLPatternType; - -typedef struct { - URLPatternType type; - union { - struct { - char **elements; - short size; - short ptr_s; - } Set; - struct { - char min_c, max_c; - char ptr_c; - int step; - } CharRange; - struct { - int min_n, max_n; - short padlength; - int ptr_n; - int step; - } NumRange ; - } content; -} URLPattern; - -typedef struct { - char* literal[10]; - URLPattern pattern[9]; - size_t size; - size_t urllen; - char *glob_buffer; - char beenhere; - char errormsg[80]; /* error message buffer */ -} URLGlob; - -int glob_url(URLGlob**, char*, int *, FILE *); -char* glob_next_url(URLGlob*); -char* glob_match_url(char*, URLGlob *); -void glob_cleanup(URLGlob* glob); - -#endif /* HEADER_CURL_URLGLOB_H */ diff --git a/src/vc6curlsrc.dsp b/src/vc6curlsrc.dsp index f550d64fb..d2f6a2fa0 100644 --- a/src/vc6curlsrc.dsp +++ b/src/vc6curlsrc.dsp @@ -283,11 +283,11 @@ SOURCE=.\tool_sleep.c # End Source File # Begin Source File -SOURCE=.\tool_vms.c +SOURCE=.\tool_urlglob.c # End Source File # Begin Source File -SOURCE=.\urlglob.c +SOURCE=.\tool_vms.c # End Source File # Begin Source File @@ -463,11 +463,11 @@ SOURCE=.\tool_sleep.h # End Source File # Begin Source File -SOURCE=.\tool_vms.h +SOURCE=.\tool_urlglob.h # End Source File # Begin Source File -SOURCE=.\urlglob.h +SOURCE=.\tool_vms.h # End Source File # Begin Source File -- cgit v1.2.3