aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Dymond <cmeister2@gmail.com>2017-09-11 20:51:58 +0100
committerDaniel Stenberg <daniel@haxx.se>2017-09-18 23:23:13 +0200
commitc73ebb85374164515eb9df6d619a5822b6568599 (patch)
tree12de79815f33cece38fd77ce9ed6443cdf247afa
parentbec50cc285995b18d57e5e5caf17e33100795f09 (diff)
ossfuzz: changes before merging the generated corpora
Before merging in the oss-fuzz corpora from Google, there are some changes to the fuzzer. - Add a read corpus script, to display corpus files nicely. - Change the behaviour of the fuzzer so that TLV parse failures all now go down the same execution paths, which should reduce the size of the corpora. - Make unknown TLVs a failure to parse, which should decrease the size of the corpora as well. Closes #1881
-rw-r--r--tests/fuzz/corpus.py96
-rw-r--r--tests/fuzz/curl_fuzzer.cc14
-rw-r--r--tests/fuzz/curl_fuzzer.h2
-rwxr-xr-xtests/fuzz/generate_corpus.py48
-rwxr-xr-xtests/fuzz/read_corpus.py69
5 files changed, 179 insertions, 50 deletions
diff --git a/tests/fuzz/corpus.py b/tests/fuzz/corpus.py
new file mode 100644
index 000000000..5474c99af
--- /dev/null
+++ b/tests/fuzz/corpus.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Common corpus functions
+import logging
+import struct
+log = logging.getLogger(__name__)
+
+
+class BaseType(object):
+ TYPE_URL = 1
+ TYPE_RSP1 = 2
+ TYPE_USERNAME = 3
+ TYPE_PASSWORD = 4
+ TYPE_POSTFIELDS = 5
+ TYPE_HEADER = 6
+ TYPE_COOKIE = 7
+ TYPE_UPLOAD1 = 8
+ TYPE_RANGE = 9
+ TYPE_CUSTOMREQUEST = 10
+ TYPE_MAIL_RECIPIENT = 11
+ TYPE_MAIL_FROM = 12
+
+
+class TLVEncoder(BaseType):
+ def __init__(self, output):
+ self.output = output
+
+ def write_string(self, tlv_type, wstring):
+ data = wstring.encode("utf-8")
+ self.write_tlv(tlv_type, len(data), data)
+
+ def write_bytes(self, tlv_type, bytedata):
+ self.write_tlv(tlv_type, len(bytedata), bytedata)
+
+ def maybe_write_string(self, tlv_type, wstring):
+ if wstring is not None:
+ self.write_string(tlv_type, wstring)
+
+ def write_tlv(self, tlv_type, tlv_length, tlv_data=None):
+ log.debug("Writing TLV %d, length %d, data %r",
+ tlv_type,
+ tlv_length,
+ tlv_data)
+
+ data = struct.pack("!H", tlv_type)
+ self.output.write(data)
+
+ data = struct.pack("!L", tlv_length)
+ self.output.write(data)
+
+ if tlv_data:
+ self.output.write(tlv_data)
+
+
+class TLVDecoder(BaseType):
+ def __init__(self, inputdata):
+ self.inputdata = inputdata
+ self.pos = 0
+ self.tlv = None
+
+ def __iter__(self):
+ self.pos = 0
+ self.tlv = None
+ return self
+
+ def __next__(self):
+ if self.tlv:
+ self.pos += self.tlv.total_length()
+
+ if (self.pos + TLVHeader.TLV_DECODE_FMT_LEN) > len(self.inputdata):
+ raise StopIteration
+
+ # Get the next TLV
+ self.tlv = TLVHeader(self.inputdata[self.pos:])
+ return self.tlv
+
+ next = __next__
+
+
+class TLVHeader(BaseType):
+ TLV_DECODE_FMT = "!HL"
+ TLV_DECODE_FMT_LEN = struct.calcsize(TLV_DECODE_FMT)
+
+ def __init__(self, data):
+ # Parse the data to populate the TLV fields
+ (self.type, self.length) = struct.unpack(self.TLV_DECODE_FMT, data[0:self.TLV_DECODE_FMT_LEN])
+
+ # Get the remaining data and store it.
+ self.data = data[self.TLV_DECODE_FMT_LEN:self.TLV_DECODE_FMT_LEN + self.length]
+
+ def __repr__(self):
+ return ("{self.__class__.__name__}(type={self.type!r}, length={self.length!r}, data={self.data!r})"
+ .format(self=self))
+
+ def total_length(self):
+ return self.TLV_DECODE_FMT_LEN + self.length \ No newline at end of file
diff --git a/tests/fuzz/curl_fuzzer.cc b/tests/fuzz/curl_fuzzer.cc
index fadb3231b..dd0298f36 100644
--- a/tests/fuzz/curl_fuzzer.cc
+++ b/tests/fuzz/curl_fuzzer.cc
@@ -53,8 +53,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
for(tlv_rc = fuzz_get_first_tlv(&fuzz, &tlv);
tlv_rc == 0;
tlv_rc = fuzz_get_next_tlv(&fuzz, &tlv)) {
+
/* Have the TLV in hand. Parse the TLV. */
- fuzz_parse_tlv(&fuzz, &tlv);
+ rc = fuzz_parse_tlv(&fuzz, &tlv);
+
+ if(rc != 0) {
+ /* Failed to parse the TLV. Can't continue. */
+ goto EXIT_LABEL;
+ }
}
if(tlv_rc != TLV_RC_NO_MORE_TLVS) {
@@ -408,8 +414,10 @@ int fuzz_parse_tlv(FUZZ_DATA *fuzz, TLV *tlv)
FSINGLETONTLV(TLV_TYPE_MAIL_FROM, mail_from, CURLOPT_MAIL_FROM);
default:
- /* The fuzzer generates lots of unknown TLVs, so don't do anything if
- the TLV isn't known. */
+ /* The fuzzer generates lots of unknown TLVs - we don't want these in the
+ corpus so we reject any unknown TLVs. */
+ rc = 255;
+ goto EXIT_LABEL;
break;
}
diff --git a/tests/fuzz/curl_fuzzer.h b/tests/fuzz/curl_fuzzer.h
index e7af89bb4..219ac3ee3 100644
--- a/tests/fuzz/curl_fuzzer.h
+++ b/tests/fuzz/curl_fuzzer.h
@@ -173,7 +173,7 @@ char *fuzz_tlv_to_string(TLV *tlv);
{ \
if (!(COND)) \
{ \
- rc = 1; \
+ rc = 255; \
goto EXIT_LABEL; \
} \
}
diff --git a/tests/fuzz/generate_corpus.py b/tests/fuzz/generate_corpus.py
index 04c799926..cffdd37bf 100755
--- a/tests/fuzz/generate_corpus.py
+++ b/tests/fuzz/generate_corpus.py
@@ -4,7 +4,7 @@
import argparse
import logging
-import struct
+import corpus
import sys
sys.path.append("..")
import curl_test_data
@@ -15,7 +15,7 @@ def generate_corpus(options):
td = curl_test_data.TestData("../data")
with open(options.output, "wb") as f:
- enc = TLVEncoder(f)
+ enc = corpus.TLVEncoder(f)
# Write the URL to the file.
enc.write_string(enc.TYPE_URL, options.url)
@@ -61,50 +61,6 @@ def generate_corpus(options):
return ScriptRC.SUCCESS
-class TLVEncoder(object):
- TYPE_URL = 1
- TYPE_RSP1 = 2
- TYPE_USERNAME = 3
- TYPE_PASSWORD = 4
- TYPE_POSTFIELDS = 5
- TYPE_HEADER = 6
- TYPE_COOKIE = 7
- TYPE_UPLOAD1 = 8
- TYPE_RANGE = 9
- TYPE_CUSTOMREQUEST = 10
- TYPE_MAIL_RECIPIENT = 11
- TYPE_MAIL_FROM = 12
-
- def __init__(self, output):
- self.output = output
-
- def write_string(self, tlv_type, wstring):
- data = wstring.encode("utf-8")
- self.write_tlv(tlv_type, len(data), data)
-
- def write_bytes(self, tlv_type, bytedata):
- self.write_tlv(tlv_type, len(bytedata), bytedata)
-
- def maybe_write_string(self, tlv_type, wstring):
- if wstring is not None:
- self.write_string(tlv_type, wstring)
-
- def write_tlv(self, tlv_type, tlv_length, tlv_data=None):
- log.debug("Writing TLV %d, length %d, data %r",
- tlv_type,
- tlv_length,
- tlv_data)
-
- data = struct.pack("!H", tlv_type)
- self.output.write(data)
-
- data = struct.pack("!L", tlv_length)
- self.output.write(data)
-
- if tlv_data:
- self.output.write(tlv_data)
-
-
def get_options():
parser = argparse.ArgumentParser()
parser.add_argument("--output", required=True)
diff --git a/tests/fuzz/read_corpus.py b/tests/fuzz/read_corpus.py
new file mode 100755
index 000000000..bb8fcedcd
--- /dev/null
+++ b/tests/fuzz/read_corpus.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+#
+# Simple script which reads corpus files.
+
+import argparse
+import logging
+import sys
+import corpus
+log = logging.getLogger(__name__)
+
+
+def read_corpus(options):
+ with open(options.input, "rb") as f:
+ dec = corpus.TLVDecoder(f.read())
+ for tlv in dec:
+ print(tlv)
+
+ return ScriptRC.SUCCESS
+
+
+def get_options():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--input", required=True)
+ return parser.parse_args()
+
+
+def setup_logging():
+ """
+ Set up logging from the command line options
+ """
+ root_logger = logging.getLogger()
+ formatter = logging.Formatter("%(asctime)s %(levelname)-5.5s %(message)s")
+ stdout_handler = logging.StreamHandler(sys.stdout)
+ stdout_handler.setFormatter(formatter)
+ stdout_handler.setLevel(logging.DEBUG)
+ root_logger.addHandler(stdout_handler)
+ root_logger.setLevel(logging.DEBUG)
+
+
+class ScriptRC(object):
+ """Enum for script return codes"""
+ SUCCESS = 0
+ FAILURE = 1
+ EXCEPTION = 2
+
+
+class ScriptException(Exception):
+ pass
+
+
+def main():
+ # Get the options from the user.
+ options = get_options()
+
+ setup_logging()
+
+ # Run main script.
+ try:
+ rc = read_corpus(options)
+ except Exception as e:
+ log.exception(e)
+ rc = ScriptRC.EXCEPTION
+
+ log.info("Returning %d", rc)
+ return rc
+
+
+if __name__ == '__main__':
+ sys.exit(main())