regression/testspec.py - 3p/nicta/cogent - Git at Google

 #!/usr/bin/python
 #
 # Copyright 2014, NICTA
 #
 # This software may be distributed and modified according to the terms of
 # the BSD 2-Clause license. Note that NO WARRANTY is provided.
 # See "LICENSE_BSD2.txt" for details.
 #
 # @TAG(NICTA_BSD)
 #

 import os
 import copy
 import heapq
 import glob
 import subprocess
 import itertools
 import argparse
 import sys

 from lxml import etree

 REGRESSION_DIR = os.path.dirname(os.path.realpath(__file__))
 REGRESSION_DTD = os.path.join(REGRESSION_DIR, "regression.dtd")

 class TestSpecParseException(Exception):
     pass

 class TestEnv():
     def __init__(self, pwd):
         self.pwd = pwd
         self.cwd = "."
         self.timeout = 0
         self.depends = set()

 class Test():
     def __init__(self, name, command, timeout=0, cwd="", depends=None):
         self.name = name
         self.command = command
         self.timeout = timeout
         self.cwd = cwd

         if depends == None:
             depends = set([])
         self.depends = depends

 def parse_attributes(tag, env, strict=True):
     """Parse attributes such as "timeout" in the given XML tag,
     updating the given "env" to reflect them."""
     if tag.get("timeout"):
         try:
             env.timeout = int(tag.get("timeout"))
         except:
             if strict:
                 raise
     if tag.get("cwd"):
         env.cwd = tag.get("cwd")
     if tag.get("depends"):
         env.depends |= set(tag.get("depends").split())

 def parse_test(doc, env, strict=True):
     """Parse a <test> tag."""
     env = copy.deepcopy(env)
     parse_attributes(doc, env, strict=strict)
     return Test(doc.get("name"), doc.text.strip(),
             timeout=env.timeout,
             cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
             depends=env.depends)

 def parse_sequence(doc, env, strict=True):
     # Create a copy of env so that the scope is restored.
     env = copy.deepcopy(env)

     # Parse attributes.
     parse_attributes(doc, env)

     # Parse children.
     tests = []
     for child in doc:
         if child.tag == "set":
             # Parse set, recording dependencies of the tests inside the set.
             new_tests = parse_set(child, env, strict=strict)
             for x in new_tests:
                 env.depends.add(x.name)
             tests += new_tests
         elif child.tag == "sequence":
             # Parse sequence, recording dependencies of the tests inside the set.
             new_tests = parse_sequence(child, env, strict=strict)
             for x in new_tests:
                 env.depends.add(x.name)
             tests += new_tests
         elif child.tag == "test":
             tests.append(parse_test(child, env, strict=strict))
             env.depends.add(tests[-1].name)
         elif strict:
             raise TestSpecParseException("Unknown tag '%s'" % child.tag)

     return tests

 def parse_set(doc, env, strict=True):
     # Create a copy of env so that the scope is restored.
     env = copy.deepcopy(env)

     # Parse attributes.
     parse_attributes(doc, env, strict=strict)

     # Parse children.
     tests = []
     for child in doc:
         if child.tag == "set":
             tests += parse_set(child, env, strict=strict)
         elif child.tag == "sequence":
             tests += parse_sequence(child, env, strict=strict)
         elif child.tag == "test":
             tests.append(parse_test(child, env, strict=strict))
         elif strict:
             raise TestSpecParseException("Unknown tag '%s'" % child.tag)

     return tests

 def find_cycle(keys, depends_on):
     """Find the shortest cycle in the input graph. Unnecessarily O(n**2)."""
     def dfs(n):
         safe = set()
         active = set()
         def do_dfs(n):
             if n in safe:
                 return None
             if n in active:
                 return [n]
             active.add(n)
             for c in depends_on(n):
                 x = do_dfs(c)
                 if x != None:
                     return [n] + x
             active.discard(n)
             safe.add(n)
         return do_dfs(n)
     shortest_cycle = None
     for i in keys:
         x = dfs(i)
         if x != None and (shortest_cycle == None
                 or len(x) < len(shortest_cycle)):
             shortest_cycle = x
     return shortest_cycle

 def toposort(keys, prio, depends_on):
     """topological sort of keys.

     Perform a toposort for keys, trying to order elements by the priority
     returned by function "prio" as closely as possible without breaking
     dependencies.
     """
     #
     # We start by creating a dictionary of which tests are dependent on others,
     # and then how many outstanding dependencies each test has.
     #
     # Instead of using "dependents" and "dependencies", we use "parents" and
     # "children". A parent must be processed before its child.
     #
     keys = sorted(keys, key=prio)
     children = {}
     num_parents = {}
     for key in keys:
         num_parents[key] = len(depends_on(key))
         for parent in depends_on(key):
             children.setdefault(parent, set()).add(key)

     #
     # Generate heap of tests without a parent, and keep popping off
     # the heap and processing the tests.
     #
     final_order = []
     parentless = sorted([(prio(k), k) for k in keys if num_parents[k] == 0])
     while len(parentless) > 0:
         (p, k) = heapq.heappop(parentless)
         final_order.append(k)
         for s in children.get(k, []):
             num_parents[s] -= 1
             if num_parents[s] == 0:
                 heapq.heappush(parentless, (prio(s), s))

     # Ensure we saw everybody. If we didn't, there is a cycle.
     if len(keys) != len(final_order):
         shortest_cycle = find_cycle(keys, depends_on)
         raise ValueError("Circular dependency involving: %s" %
                 (" -> ".join(shortest_cycle)))

     return final_order

 def validate_xml(filename):
     """Ensure the XML matches the regression DTD."""

     # Read in the DTD
     with open(REGRESSION_DTD) as dtd_file:
         dtd = etree.DTD(dtd_file)

     # Parse the file, and validate against the DTD.
     parser = etree.XMLParser(remove_comments=True)
     doc = etree.parse(filename, parser=parser)
     if not dtd.validate(doc):
         raise Exception(
                 "%s does not validate against DTD:\n\n" % filename
                 + str(dtd.error_log))

 def parse_testsuite_xml(filename, strict=True):

     # Validate the XML if requested.
     if strict:
         validate_xml(filename)

     # Parse the file. We try to keep reading broken XML. If "strict" is false,
     # keep trying to parse over broken XML.
     parser = etree.XMLParser(remove_comments=True, recover=(not strict))
     doc = etree.parse(filename, parser=parser).getroot()

     # Setup an empty environment
     env = TestEnv(os.path.dirname(filename))

     # Parse this tag as a set of tests.
     return parse_set(doc, env, strict=strict)

 def process_tests(tests, strict=False):
     """Given a list of tests (possibly from multiple XML file), check for
     errors and return a list of tests in dependency-satisfying order."""

     # Check for duplicate names.
     seen_names = set()
     for t in tests:
         if t.name in seen_names:
             if strict:
                 raise TestSpecParseException("Duplicate test name detected: %s" % t.name)
             for x in itertools.count(2):
                 proposed_name = "%s_%d" % (t.name, x)
                 if not proposed_name in seen_names:
                     t.name = proposed_name
                     break
         seen_names.add(t.name)

     # Check dependencies.
     valid_names = set()
     for test in tests:
         valid_names.add(test.name)
     for test in tests:
         test_depends = sorted(test.depends)
         for dependency_name in test_depends:
             if not dependency_name in valid_names:
                 if strict:
                     raise TestSpecParseException(
                             "Depedency '%s' invalid." % dependency_name)
                 test.depends.remove(dependency_name)

    # Toposort.
     test_ordering = {}
     for (n, t) in enumerate(tests):
         test_ordering[t.name] = n
     test_depends = {}
     for t in tests:
         test_depends[t.name] = t.depends
     try:
         ordering = toposort([t.name for t in tests],
                 lambda x: test_ordering[x],
                 lambda x: test_depends[x])
     except ValueError, e:
         if strict:
             raise TestSpecParseException(
                     "Cycle in dependencies: %s" % e.message)
         else:
             # There is a cycle, but we want to continue anyway.
             # Just ignore all deps and hope for the best.
             ordering = dict((t, n) for (n, t)
                     in enumerate(sorted([t.name for t in tests])))
     ordering = dict((t, n) for (n, t) in enumerate(ordering))
     tests = sorted(tests, key=lambda k: ordering[k.name])

     return tests

 def legacy_testspec(root):
     """Find tests inside makefiles."""

     # Find candidate "IsaMakefile"s
     candidates = sorted(
         glob.glob(os.path.join(root, "*", "IsaMakefile"))
         + glob.glob(os.path.join(root, "*", "*", "IsaMakefile")))

     # Get isabelle binary.
     isabelle_bin = os.path.abspath(os.path.join(root, "isabelle", "bin", "isabelle"))

     # Run "isabelle make report-regression" on each.
     def report_regression(filename):
         filename = os.path.abspath(filename)
         base_name = os.path.split(os.path.dirname(filename))[1]
         try:
             with open("/dev/null", "w") as devnull:
                 results = subprocess.check_output(
                     [isabelle_bin, "make", "-f", filename, "report-regression"],
                     cwd=os.path.dirname(filename),
                     stderr=devnull)
             return [(base_name + "/" + x, x) for x in results.strip().split()]
         except subprocess.CalledProcessError:
             return []

     # Search for tests.
     tests = []
     for candidate in candidates:
         targets = report_regression(os.path.abspath(candidate))
         for (name, target) in targets:
             new_test = Test(name, "isabelle make " + target, timeout=4*3600,
                         cwd=os.path.dirname(os.path.abspath(candidate)))
             tests.append(new_test)
     return tests

 def parse_test_files(xml_files, strict=False):
     tests = []
     seen_files = set()
     for x in xml_files:
         # Some files may be symlinked; don't process them multiple times.
         try:
             st = os.stat(x)
             if (st.st_dev, st.st_ino) in seen_files:
                 continue
             seen_files.add((st.st_dev, st.st_ino))
         except OSError:
             pass

         try:
             tests += parse_testsuite_xml(x)
         except:
             sys.stderr.write("Exception while parsing file: %s.\n" % x)
             if strict:
                 raise
     return process_tests(tests, strict=strict)

 def main():
     # Parse arguments
     parser = argparse.ArgumentParser(description="Regression Framework Testspec Parser")
     parser.add_argument("file", metavar="FILE", type=str, nargs="*",
             help="a regression XML file to parse")
     parser.add_argument("-r", "--relax", action="store_false", dest="strict",
             help="be less strict when parsing XML files")
     parser.add_argument("-l", "--legacy", action="store_true",
             help="use legacy 'IsaMakefile' specs")
     args = parser.parse_args()

     # Ensure we are either in legacy more or we have at least one file.
     if not args.legacy and len(args.file) == 0:
         parser.error("Please provide at least one XML file.")
     if args.legacy and len(args.file) > 0:
         parser.error("Can not use both legacy mode and XML files.")

     if args.legacy:
         # Fetch legacy tests.
         tests = legacy_testspec(os.getcwd())
     else:
         # Fetch XML tests.
         tests = parse_test_files(args.file, strict=args.strict)

     # Print results
     for test in tests:
         print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
             test.command, test.timeout, ",".join(test.depends), test.cwd))


 if __name__ == "__main__":
     main()
	#!/usr/bin/python
	#
	# Copyright 2014, NICTA
	#
	# This software may be distributed and modified according to the terms of
	# the BSD 2-Clause license. Note that NO WARRANTY is provided.
	# See "LICENSE_BSD2.txt" for details.
	#
	# @TAG(NICTA_BSD)
	#

	import os
	import copy
	import heapq
	import glob
	import subprocess
	import itertools
	import argparse
	import sys

	from lxml import etree

	REGRESSION_DIR = os.path.dirname(os.path.realpath(__file__))
	REGRESSION_DTD = os.path.join(REGRESSION_DIR, "regression.dtd")

	class TestSpecParseException(Exception):
	pass

	class TestEnv():
	def __init__(self, pwd):
	self.pwd = pwd
	self.cwd = "."
	self.timeout = 0
	self.depends = set()

	class Test():
	def __init__(self, name, command, timeout=0, cwd="", depends=None):
	self.name = name
	self.command = command
	self.timeout = timeout
	self.cwd = cwd

	if depends == None:
	depends = set([])
	self.depends = depends

	def parse_attributes(tag, env, strict=True):
	"""Parse attributes such as "timeout" in the given XML tag,
	updating the given "env" to reflect them."""
	if tag.get("timeout"):
	try:
	env.timeout = int(tag.get("timeout"))
	except:
	if strict:
	raise
	if tag.get("cwd"):
	env.cwd = tag.get("cwd")
	if tag.get("depends"):
	env.depends \|= set(tag.get("depends").split())

	def parse_test(doc, env, strict=True):
	"""Parse a <test> tag."""
	env = copy.deepcopy(env)
	parse_attributes(doc, env, strict=strict)
	return Test(doc.get("name"), doc.text.strip(),
	timeout=env.timeout,
	cwd=os.path.normpath(os.path.join(env.pwd, env.cwd)),
	depends=env.depends)

	def parse_sequence(doc, env, strict=True):
	# Create a copy of env so that the scope is restored.
	env = copy.deepcopy(env)

	# Parse attributes.
	parse_attributes(doc, env)

	# Parse children.
	tests = []
	for child in doc:
	if child.tag == "set":
	# Parse set, recording dependencies of the tests inside the set.
	new_tests = parse_set(child, env, strict=strict)
	for x in new_tests:
	env.depends.add(x.name)
	tests += new_tests
	elif child.tag == "sequence":
	# Parse sequence, recording dependencies of the tests inside the set.
	new_tests = parse_sequence(child, env, strict=strict)
	for x in new_tests:
	env.depends.add(x.name)
	tests += new_tests
	elif child.tag == "test":
	tests.append(parse_test(child, env, strict=strict))
	env.depends.add(tests[-1].name)
	elif strict:
	raise TestSpecParseException("Unknown tag '%s'" % child.tag)

	return tests

	def parse_set(doc, env, strict=True):
	# Create a copy of env so that the scope is restored.
	env = copy.deepcopy(env)

	# Parse attributes.
	parse_attributes(doc, env, strict=strict)

	# Parse children.
	tests = []
	for child in doc:
	if child.tag == "set":
	tests += parse_set(child, env, strict=strict)
	elif child.tag == "sequence":
	tests += parse_sequence(child, env, strict=strict)
	elif child.tag == "test":
	tests.append(parse_test(child, env, strict=strict))
	elif strict:
	raise TestSpecParseException("Unknown tag '%s'" % child.tag)

	return tests

	def find_cycle(keys, depends_on):
	"""Find the shortest cycle in the input graph. Unnecessarily O(n**2)."""
	def dfs(n):
	safe = set()
	active = set()
	def do_dfs(n):
	if n in safe:
	return None
	if n in active:
	return [n]
	active.add(n)
	for c in depends_on(n):
	x = do_dfs(c)
	if x != None:
	return [n] + x
	active.discard(n)
	safe.add(n)
	return do_dfs(n)
	shortest_cycle = None
	for i in keys:
	x = dfs(i)
	if x != None and (shortest_cycle == None
	or len(x) < len(shortest_cycle)):
	shortest_cycle = x
	return shortest_cycle

	def toposort(keys, prio, depends_on):
	"""topological sort of keys.

	Perform a toposort for keys, trying to order elements by the priority
	returned by function "prio" as closely as possible without breaking
	dependencies.
	"""
	#
	# We start by creating a dictionary of which tests are dependent on others,
	# and then how many outstanding dependencies each test has.
	#
	# Instead of using "dependents" and "dependencies", we use "parents" and
	# "children". A parent must be processed before its child.
	#
	keys = sorted(keys, key=prio)
	children = {}
	num_parents = {}
	for key in keys:
	num_parents[key] = len(depends_on(key))
	for parent in depends_on(key):
	children.setdefault(parent, set()).add(key)

	#
	# Generate heap of tests without a parent, and keep popping off
	# the heap and processing the tests.
	#
	final_order = []
	parentless = sorted([(prio(k), k) for k in keys if num_parents[k] == 0])
	while len(parentless) > 0:
	(p, k) = heapq.heappop(parentless)
	final_order.append(k)
	for s in children.get(k, []):
	num_parents[s] -= 1
	if num_parents[s] == 0:
	heapq.heappush(parentless, (prio(s), s))

	# Ensure we saw everybody. If we didn't, there is a cycle.
	if len(keys) != len(final_order):
	shortest_cycle = find_cycle(keys, depends_on)
	raise ValueError("Circular dependency involving: %s" %
	(" -> ".join(shortest_cycle)))

	return final_order

	def validate_xml(filename):
	"""Ensure the XML matches the regression DTD."""

	# Read in the DTD
	with open(REGRESSION_DTD) as dtd_file:
	dtd = etree.DTD(dtd_file)

	# Parse the file, and validate against the DTD.
	parser = etree.XMLParser(remove_comments=True)
	doc = etree.parse(filename, parser=parser)
	if not dtd.validate(doc):
	raise Exception(
	"%s does not validate against DTD:\n\n" % filename
	+ str(dtd.error_log))

	def parse_testsuite_xml(filename, strict=True):

	# Validate the XML if requested.
	if strict:
	validate_xml(filename)

	# Parse the file. We try to keep reading broken XML. If "strict" is false,
	# keep trying to parse over broken XML.
	parser = etree.XMLParser(remove_comments=True, recover=(not strict))
	doc = etree.parse(filename, parser=parser).getroot()

	# Setup an empty environment
	env = TestEnv(os.path.dirname(filename))

	# Parse this tag as a set of tests.
	return parse_set(doc, env, strict=strict)

	def process_tests(tests, strict=False):
	"""Given a list of tests (possibly from multiple XML file), check for
	errors and return a list of tests in dependency-satisfying order."""

	# Check for duplicate names.
	seen_names = set()
	for t in tests:
	if t.name in seen_names:
	if strict:
	raise TestSpecParseException("Duplicate test name detected: %s" % t.name)
	for x in itertools.count(2):
	proposed_name = "%s_%d" % (t.name, x)
	if not proposed_name in seen_names:
	t.name = proposed_name
	break
	seen_names.add(t.name)

	# Check dependencies.
	valid_names = set()
	for test in tests:
	valid_names.add(test.name)
	for test in tests:
	test_depends = sorted(test.depends)
	for dependency_name in test_depends:
	if not dependency_name in valid_names:
	if strict:
	raise TestSpecParseException(
	"Depedency '%s' invalid." % dependency_name)
	test.depends.remove(dependency_name)

	# Toposort.
	test_ordering = {}
	for (n, t) in enumerate(tests):
	test_ordering[t.name] = n
	test_depends = {}
	for t in tests:
	test_depends[t.name] = t.depends
	try:
	ordering = toposort([t.name for t in tests],
	lambda x: test_ordering[x],
	lambda x: test_depends[x])
	except ValueError, e:
	if strict:
	raise TestSpecParseException(
	"Cycle in dependencies: %s" % e.message)
	else:
	# There is a cycle, but we want to continue anyway.
	# Just ignore all deps and hope for the best.
	ordering = dict((t, n) for (n, t)
	in enumerate(sorted([t.name for t in tests])))
	ordering = dict((t, n) for (n, t) in enumerate(ordering))
	tests = sorted(tests, key=lambda k: ordering[k.name])

	return tests

	def legacy_testspec(root):
	"""Find tests inside makefiles."""

	# Find candidate "IsaMakefile"s
	candidates = sorted(
	glob.glob(os.path.join(root, "*", "IsaMakefile"))
	+ glob.glob(os.path.join(root, "", "", "IsaMakefile")))

	# Get isabelle binary.
	isabelle_bin = os.path.abspath(os.path.join(root, "isabelle", "bin", "isabelle"))

	# Run "isabelle make report-regression" on each.
	def report_regression(filename):
	filename = os.path.abspath(filename)
	base_name = os.path.split(os.path.dirname(filename))[1]
	try:
	with open("/dev/null", "w") as devnull:
	results = subprocess.check_output(
	[isabelle_bin, "make", "-f", filename, "report-regression"],
	cwd=os.path.dirname(filename),
	stderr=devnull)
	return [(base_name + "/" + x, x) for x in results.strip().split()]
	except subprocess.CalledProcessError:
	return []

	# Search for tests.
	tests = []
	for candidate in candidates:
	targets = report_regression(os.path.abspath(candidate))
	for (name, target) in targets:
	new_test = Test(name, "isabelle make " + target, timeout=4*3600,
	cwd=os.path.dirname(os.path.abspath(candidate)))
	tests.append(new_test)
	return tests

	def parse_test_files(xml_files, strict=False):
	tests = []
	seen_files = set()
	for x in xml_files:
	# Some files may be symlinked; don't process them multiple times.
	try:
	st = os.stat(x)
	if (st.st_dev, st.st_ino) in seen_files:
	continue
	seen_files.add((st.st_dev, st.st_ino))
	except OSError:
	pass

	try:
	tests += parse_testsuite_xml(x)
	except:
	sys.stderr.write("Exception while parsing file: %s.\n" % x)
	if strict:
	raise
	return process_tests(tests, strict=strict)

	def main():
	# Parse arguments
	parser = argparse.ArgumentParser(description="Regression Framework Testspec Parser")
	parser.add_argument("file", metavar="FILE", type=str, nargs="*",
	help="a regression XML file to parse")
	parser.add_argument("-r", "--relax", action="store_false", dest="strict",
	help="be less strict when parsing XML files")
	parser.add_argument("-l", "--legacy", action="store_true",
	help="use legacy 'IsaMakefile' specs")
	args = parser.parse_args()

	# Ensure we are either in legacy more or we have at least one file.
	if not args.legacy and len(args.file) == 0:
	parser.error("Please provide at least one XML file.")
	if args.legacy and len(args.file) > 0:
	parser.error("Can not use both legacy mode and XML files.")

	if args.legacy:
	# Fetch legacy tests.
	tests = legacy_testspec(os.getcwd())
	else:
	# Fetch XML tests.
	tests = parse_test_files(args.file, strict=args.strict)

	# Print results
	for test in tests:
	print("\"%s\" [timeout=%d, parents=%s, cwd=%s]" % (
	test.command, test.timeout, ",".join(test.depends), test.cwd))


	if __name__ == "__main__":
	main()