blob: 489721c467f1319221cefc4edb9d011d5ea1c047 [file] [edit]
# Domato - main generator script
# --------------------------------------
#
# Written and maintained by Ivan Fratric <ifratric@google.com>
#
# Copyright 2017 Google Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import re
import random
import argparse
from pathlib import Path
from grammar import Grammar
from svg_tags import _SVG_TYPES
from html_tags import _HTML_TYPES
from mathml_tags import _MATHML_TYPES
_N_MAIN_LINES = 1000
_N_EVENTHANDLER_LINES = 500
_N_ADDITIONAL_HTMLVARS = 5
def generate_html_elements(ctx, n):
for i in range(n):
tag = random.choice(list(_HTML_TYPES))
tagtype = _HTML_TYPES[tag]
ctx['htmlvarctr'] += 1
varname = 'htmlvar%05d' % ctx['htmlvarctr']
ctx['htmlvars'].append({'name': varname, 'type': tagtype})
ctx['htmlvargen'] += '/* newvar{' + varname + ':' + tagtype + '} */ var ' + varname + ' = document.createElement(\"' + tag + '\"); //' + tagtype + '\n'
def add_html_ids(matchobj, ctx):
tagname = matchobj.group(0)[1:-1]
if tagname in _HTML_TYPES:
ctx['htmlvarctr'] += 1
varname = 'htmlvar%05d' % ctx['htmlvarctr']
ctx['htmlvars'].append({'name': varname, 'type': _HTML_TYPES[tagname]})
ctx['htmlvargen'] += '/* newvar{' + varname + ':' + _HTML_TYPES[tagname] + '} */ var ' + varname + ' = document.getElementById(\"' + varname + '\"); //' + _HTML_TYPES[tagname] + '\n'
return matchobj.group(0) + 'id=\"' + varname + '\" '
elif tagname in _SVG_TYPES:
ctx['svgvarctr'] += 1
varname = 'svgvar%05d' % ctx['svgvarctr']
ctx['htmlvars'].append({'name': varname, 'type': _SVG_TYPES[tagname]})
ctx['htmlvargen'] += '/* newvar{' + varname + ':' + _SVG_TYPES[tagname] + '} */ var ' + varname + ' = document.getElementById(\"' + varname + '\"); //' + _SVG_TYPES[tagname] + '\n'
return matchobj.group(0) + 'id=\"' + varname + '\" '
elif tagname in _MATHML_TYPES:
ctx['mathmlvarctr'] += 1
varname = 'mathmlvar%05d' % ctx['mathmlvarctr']
ctx['htmlvars'].append({'name': varname, 'type': _MATHML_TYPES[tagname]})
ctx['htmlvargen'] += '/* newvar{' + varname + ':' + _MATHML_TYPES[tagname] + '} */ var ' + varname + ' = document.getElementById(\"' + varname + '\"); //' + _MATHML_TYPES[tagname] + '\n'
return matchobj.group(0) + 'id=\"' + varname + '\" '
else:
return matchobj.group(0)
def generate_function_body(jsgrammar, htmlctx, num_lines):
js = ''
js += 'var fuzzervars = {};\n\n'
js += "SetVariable(fuzzervars, window, 'Window');\nSetVariable(fuzzervars, document, 'Document');\nSetVariable(fuzzervars, document.body.firstChild, 'Element');\n\n"
js += '//beginjs\n'
js += htmlctx['htmlvargen']
js += jsgrammar._generate_code(num_lines, htmlctx['htmlvars'])
js += '\n//endjs\n'
js += 'var fuzzervars = {};\nfreememory()\n'
return js
def check_grammar(grammar):
"""Checks if grammar has errors and if so outputs them.
Args:
grammar: The grammar to check.
"""
for rule in grammar._all_rules:
for part in rule['parts']:
if part['type'] == 'text':
continue
tagname = part['tagname']
# print tagname
if tagname not in grammar._creators:
print('No creators for type ' + tagname)
def generate_new_sample(template, htmlgrammar, cssgrammar, jsgrammar):
"""Parses grammar rules from string.
Args:
template: A template string.
htmlgrammar: Grammar for generating HTML code.
cssgrammar: Grammar for generating CSS code.
jsgrammar: Grammar for generating JS code.
Returns:
A string containing sample data.
"""
result = template
css = cssgrammar.generate_symbol('rules')
html = htmlgrammar.generate_symbol('bodyelements')
htmlctx = {
'htmlvars': [],
'htmlvarctr': 0,
'svgvarctr': 0,
'mathmlvarctr': 0,
'htmlvargen': ''
}
html = re.sub(
r'<[a-zA-Z0-9_-]+ ',
lambda match: add_html_ids(match, htmlctx),
html
)
generate_html_elements(htmlctx, _N_ADDITIONAL_HTMLVARS)
result = result.replace('<cssfuzzer>', css)
result = result.replace('<htmlfuzzer>', html)
handlers = False
while '<jsfuzzer>' in result:
numlines = _N_MAIN_LINES
if handlers:
numlines = _N_EVENTHANDLER_LINES
else:
handlers = True
result = result.replace(
'<jsfuzzer>',
generate_function_body(jsgrammar, htmlctx, numlines),
1
)
return result
def generate_samples(template, outfiles):
"""Generates a set of samples and writes them to the output files.
Args:
grammar_dir: directory to load grammar files from.
outfiles: A list of output filenames.
"""
grammar_dir = os.path.join(os.path.dirname(__file__), 'rules')
htmlgrammar = Grammar()
err = htmlgrammar.parse_from_file(os.path.join(grammar_dir, 'html.txt'))
# CheckGrammar(htmlgrammar)
if err > 0:
print('There were errors parsing html grammar')
return
cssgrammar = Grammar()
err = cssgrammar.parse_from_file(os.path.join(grammar_dir ,'css.txt'))
# CheckGrammar(cssgrammar)
if err > 0:
print('There were errors parsing css grammar')
return
jsgrammar = Grammar()
err = jsgrammar.parse_from_file(os.path.join(grammar_dir,'js.txt'))
# CheckGrammar(jsgrammar)
if err > 0:
print('There were errors parsing js grammar')
return
# JS and HTML grammar need access to CSS grammar.
# Add it as import
htmlgrammar.add_import('cssgrammar', cssgrammar)
jsgrammar.add_import('cssgrammar', cssgrammar)
for outfile in outfiles:
result = generate_new_sample(template, htmlgrammar, cssgrammar, jsgrammar)
if result is not None:
print('Writing a sample to ' + outfile)
try:
with open(outfile, 'w') as f:
f.write(result)
except IOError:
print('Error writing to output')
def get_argument_parser():
parser = argparse.ArgumentParser(description="DOMATO (A DOM FUZZER)")
parser.add_argument("-f", "--file",
help="File name which is to be generated in the same directory")
parser.add_argument('-o', '--output_dir', type=str,
help='The output directory to put the generated files in')
parser.add_argument('-n', '--no_of_files', type=int,
help='number of files to be generated')
parser.add_argument('-t', '--template', type=Path, default=(Path(__file__).parent).joinpath('template.html'),
help='template file you want to use')
return parser
def main():
parser = get_argument_parser()
args = parser.parse_args()
with args.template.open("r") as f:
template = f.read()
if args.file:
generate_samples(template, [args.file])
elif args.output_dir:
if not args.no_of_files:
print("Please use switch -n to specify the number of files")
else:
print('Running on ClusterFuzz')
out_dir = args.output_dir
nsamples = args.no_of_files
print('Output directory: ' + out_dir)
print('Number of samples: ' + str(nsamples))
if not os.path.exists(out_dir):
os.mkdir(out_dir)
outfiles = []
for i in range(nsamples):
outfiles.append(os.path.join(out_dir, 'fuzz-' + str(i).zfill(5) + '.html'))
generate_samples(template, outfiles)
else:
parser.print_help()
if __name__ == '__main__':
main()