regression.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. # resulttool - regression analysis
  2. #
  3. # Copyright (c) 2019, Intel Corporation.
  4. # Copyright (c) 2019, Linux Foundation
  5. #
  6. # SPDX-License-Identifier: GPL-2.0-only
  7. #
  8. import resulttool.resultutils as resultutils
  9. from oeqa.utils.git import GitRepo
  10. import oeqa.utils.gitarchive as gitarchive
  11. METADATA_MATCH_TABLE = {
  12. "oeselftest": "OESELFTEST_METADATA"
  13. }
  14. OESELFTEST_METADATA_GUESS_TABLE={
  15. "trigger-build-posttrigger": {
  16. "run_all_tests": False,
  17. "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
  18. "skips": None,
  19. "machine": None,
  20. "select_tags":None,
  21. "exclude_tags": None
  22. },
  23. "reproducible": {
  24. "run_all_tests": False,
  25. "run_tests":["reproducible"],
  26. "skips": None,
  27. "machine": None,
  28. "select_tags":None,
  29. "exclude_tags": None
  30. },
  31. "arch-qemu-quick": {
  32. "run_all_tests": True,
  33. "run_tests":None,
  34. "skips": None,
  35. "machine": None,
  36. "select_tags":["machine"],
  37. "exclude_tags": None
  38. },
  39. "arch-qemu-full-x86-or-x86_64": {
  40. "run_all_tests": True,
  41. "run_tests":None,
  42. "skips": None,
  43. "machine": None,
  44. "select_tags":["machine", "toolchain-system"],
  45. "exclude_tags": None
  46. },
  47. "arch-qemu-full-others": {
  48. "run_all_tests": True,
  49. "run_tests":None,
  50. "skips": None,
  51. "machine": None,
  52. "select_tags":["machine", "toolchain-user"],
  53. "exclude_tags": None
  54. },
  55. "selftest": {
  56. "run_all_tests": True,
  57. "run_tests":None,
  58. "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
  59. "machine": None,
  60. "select_tags":None,
  61. "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
  62. },
  63. "bringup": {
  64. "run_all_tests": True,
  65. "run_tests":None,
  66. "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
  67. "machine": None,
  68. "select_tags":None,
  69. "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
  70. }
  71. }
  72. STATUS_STRINGS = {
  73. "None": "No matching test result"
  74. }
  75. REGRESSIONS_DISPLAY_LIMIT=50
  76. MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
  77. ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
  78. def test_has_at_least_one_matching_tag(test, tag_list):
  79. return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
  80. def all_tests_have_at_least_one_matching_tag(results, tag_list):
  81. return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
  82. def any_test_have_any_matching_tag(results, tag_list):
  83. return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
  84. def have_skipped_test(result, test_prefix):
  85. return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
  86. def have_all_tests_skipped(result, test_prefixes_list):
  87. return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
  88. def guess_oeselftest_metadata(results):
  89. """
  90. When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
  91. Check results for specific values (absence/presence of oetags, number and name of executed tests...),
  92. and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
  93. to it to allow proper test filtering.
  94. This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
  95. as new tests will have OESELFTEST_METADATA properly appended at test reporting time
  96. """
  97. if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
  98. return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
  99. elif all(result.startswith("reproducible") for result in results):
  100. return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
  101. elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
  102. return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
  103. elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
  104. return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
  105. elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
  106. return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
  107. elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
  108. if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
  109. return OESELFTEST_METADATA_GUESS_TABLE['selftest']
  110. elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
  111. return OESELFTEST_METADATA_GUESS_TABLE['bringup']
  112. return None
  113. def metadata_matches(base_configuration, target_configuration):
  114. """
  115. For passed base and target, check test type. If test type matches one of
  116. properties described in METADATA_MATCH_TABLE, compare metadata if it is
  117. present in base. Return true if metadata matches, or if base lacks some
  118. data (either TEST_TYPE or the corresponding metadata)
  119. """
  120. test_type = base_configuration.get('TEST_TYPE')
  121. if test_type not in METADATA_MATCH_TABLE:
  122. return True
  123. metadata_key = METADATA_MATCH_TABLE.get(test_type)
  124. if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
  125. return False
  126. return True
  127. def machine_matches(base_configuration, target_configuration):
  128. return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
  129. def can_be_compared(logger, base, target):
  130. """
  131. Some tests are not relevant to be compared, for example some oeselftest
  132. run with different tests sets or parameters. Return true if tests can be
  133. compared
  134. """
  135. ret = True
  136. base_configuration = base['configuration']
  137. target_configuration = target['configuration']
  138. # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
  139. if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
  140. guess = guess_oeselftest_metadata(base['result'])
  141. if guess is None:
  142. logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
  143. else:
  144. logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
  145. base_configuration['OESELFTEST_METADATA'] = guess
  146. if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
  147. guess = guess_oeselftest_metadata(target['result'])
  148. if guess is None:
  149. logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
  150. else:
  151. logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
  152. target_configuration['OESELFTEST_METADATA'] = guess
  153. # Test runs with LTP results in should only be compared with other runs with LTP tests in them
  154. if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
  155. ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
  156. return ret and metadata_matches(base_configuration, target_configuration) \
  157. and machine_matches(base_configuration, target_configuration)
  158. def get_status_str(raw_status):
  159. raw_status_lower = raw_status.lower() if raw_status else "None"
  160. return STATUS_STRINGS.get(raw_status_lower, raw_status)
  161. def get_additional_info_line(new_pass_count, new_tests):
  162. result=[]
  163. if new_tests:
  164. result.append(f'+{new_tests} test(s) present')
  165. if new_pass_count:
  166. result.append(f'+{new_pass_count} test(s) now passing')
  167. if not result:
  168. return ""
  169. return ' -> ' + ', '.join(result) + '\n'
  170. def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
  171. base_result = base_result.get('result')
  172. target_result = target_result.get('result')
  173. result = {}
  174. new_tests = 0
  175. regressions = {}
  176. resultstring = ""
  177. new_tests = 0
  178. new_pass_count = 0
  179. display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
  180. if base_result and target_result:
  181. for k in base_result:
  182. base_testcase = base_result[k]
  183. base_status = base_testcase.get('status')
  184. if base_status:
  185. target_testcase = target_result.get(k, {})
  186. target_status = target_testcase.get('status')
  187. if base_status != target_status:
  188. result[k] = {'base': base_status, 'target': target_status}
  189. else:
  190. logger.error('Failed to retrieved base test case status: %s' % k)
  191. # Also count new tests that were not present in base results: it
  192. # could be newly added tests, but it could also highlights some tests
  193. # renames or fixed faulty ptests
  194. for k in target_result:
  195. if k not in base_result:
  196. new_tests += 1
  197. if result:
  198. new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
  199. # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
  200. if new_pass_count < len(result):
  201. resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
  202. for k in sorted(result):
  203. if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
  204. # Differentiate each ptest kind when listing regressions
  205. key_parts = k.split('.')
  206. key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
  207. # Append new regression to corresponding test family
  208. regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
  209. resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
  210. for k in regressions:
  211. resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
  212. count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
  213. resultstring += ''.join(regressions[k][:count_to_print])
  214. if count_to_print < len(regressions[k]):
  215. resultstring+=' [...]\n'
  216. if new_pass_count > 0:
  217. resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
  218. if new_tests > 0:
  219. resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
  220. else:
  221. resultstring = "%s\n%s\n" % (base_name, target_name)
  222. result = None
  223. else:
  224. resultstring = "%s\n%s\n" % (base_name, target_name)
  225. if not result:
  226. additional_info = get_additional_info_line(new_pass_count, new_tests)
  227. if additional_info:
  228. resultstring += additional_info
  229. return result, resultstring
  230. def get_results(logger, source):
  231. return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
  232. def regression(args, logger):
  233. base_results = get_results(logger, args.base_result)
  234. target_results = get_results(logger, args.target_result)
  235. regression_common(args, logger, base_results, target_results)
  236. # Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
  237. # Truncating the test names works since they contain file and line number identifiers
  238. # which allows us to match them without the random components.
  239. def fixup_ptest_names(results, logger):
  240. for r in results:
  241. for i in results[r]:
  242. tests = list(results[r][i]['result'].keys())
  243. for test in tests:
  244. new = None
  245. if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
  246. new = test.split("_-_")[0]
  247. elif test.startswith(("ptestresult.curl.")) and "__" in test:
  248. new = test.split("__")[0]
  249. elif test.startswith(("ptestresult.dbus.")) and "__" in test:
  250. new = test.split("__")[0]
  251. elif test.startswith("ptestresult.binutils") and "build-st-" in test:
  252. new = test.split(" ")[0]
  253. elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
  254. new = ".".join(test.split(".")[:2])
  255. if new:
  256. results[r][i]['result'][new] = results[r][i]['result'][test]
  257. del results[r][i]['result'][test]
  258. def regression_common(args, logger, base_results, target_results):
  259. if args.base_result_id:
  260. base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
  261. if args.target_result_id:
  262. target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
  263. fixup_ptest_names(base_results, logger)
  264. fixup_ptest_names(target_results, logger)
  265. matches = []
  266. regressions = []
  267. notfound = []
  268. for a in base_results:
  269. if a in target_results:
  270. base = list(base_results[a].keys())
  271. target = list(target_results[a].keys())
  272. # We may have multiple base/targets which are for different configurations. Start by
  273. # removing any pairs which match
  274. for c in base.copy():
  275. for b in target.copy():
  276. if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
  277. continue
  278. res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
  279. if not res:
  280. matches.append(resstr)
  281. base.remove(c)
  282. target.remove(b)
  283. break
  284. # Should only now see regressions, we may not be able to match multiple pairs directly
  285. for c in base:
  286. for b in target:
  287. if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
  288. continue
  289. res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
  290. if res:
  291. regressions.append(resstr)
  292. else:
  293. notfound.append("%s not found in target" % a)
  294. print("\n".join(sorted(regressions)))
  295. print("\n" + MISSING_TESTS_BANNER + "\n")
  296. print("\n".join(sorted(notfound)))
  297. print("\n" + ADDITIONAL_DATA_BANNER + "\n")
  298. print("\n".join(sorted(matches)))
  299. return 0
  300. def regression_git(args, logger):
  301. base_results = {}
  302. target_results = {}
  303. tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
  304. repo = GitRepo(args.repo)
  305. revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
  306. if args.branch2:
  307. revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
  308. if not len(revs2):
  309. logger.error("No revisions found to compare against")
  310. return 1
  311. if not len(revs):
  312. logger.error("No revision to report on found")
  313. return 1
  314. else:
  315. if len(revs) < 2:
  316. logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
  317. return 1
  318. # Pick revisions
  319. if args.commit:
  320. if args.commit_number:
  321. logger.warning("Ignoring --commit-number as --commit was specified")
  322. index1 = gitarchive.rev_find(revs, 'commit', args.commit)
  323. elif args.commit_number:
  324. index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
  325. else:
  326. index1 = len(revs) - 1
  327. if args.branch2:
  328. revs2.append(revs[index1])
  329. index1 = len(revs2) - 1
  330. revs = revs2
  331. if args.commit2:
  332. if args.commit_number2:
  333. logger.warning("Ignoring --commit-number2 as --commit2 was specified")
  334. index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
  335. elif args.commit_number2:
  336. index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
  337. else:
  338. if index1 > 0:
  339. index2 = index1 - 1
  340. # Find the closest matching commit number for comparision
  341. # In future we could check the commit is a common ancestor and
  342. # continue back if not but this good enough for now
  343. while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
  344. index2 = index2 - 1
  345. else:
  346. logger.error("Unable to determine the other commit, use "
  347. "--commit2 or --commit-number2 to specify it")
  348. return 1
  349. logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
  350. base_results = resultutils.git_get_result(repo, revs[index1][2])
  351. target_results = resultutils.git_get_result(repo, revs[index2][2])
  352. regression_common(args, logger, base_results, target_results)
  353. return 0
  354. def register_commands(subparsers):
  355. """Register subcommands from this plugin"""
  356. parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
  357. description='regression analysis comparing the base set of results to the target results',
  358. group='analysis')
  359. parser_build.set_defaults(func=regression)
  360. parser_build.add_argument('base_result',
  361. help='base result file/directory/URL for the comparison')
  362. parser_build.add_argument('target_result',
  363. help='target result file/directory/URL to compare with')
  364. parser_build.add_argument('-b', '--base-result-id', default='',
  365. help='(optional) filter the base results to this result ID')
  366. parser_build.add_argument('-t', '--target-result-id', default='',
  367. help='(optional) filter the target results to this result ID')
  368. parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
  369. description='regression analysis comparing base result set to target '
  370. 'result set',
  371. group='analysis')
  372. parser_build.set_defaults(func=regression_git)
  373. parser_build.add_argument('repo',
  374. help='the git repository containing the data')
  375. parser_build.add_argument('-b', '--base-result-id', default='',
  376. help='(optional) default select regression based on configurations unless base result '
  377. 'id was provided')
  378. parser_build.add_argument('-t', '--target-result-id', default='',
  379. help='(optional) default select regression based on configurations unless target result '
  380. 'id was provided')
  381. parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
  382. parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
  383. parser_build.add_argument('--commit', help="Revision to search for")
  384. parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
  385. parser_build.add_argument('--commit2', help="Revision to compare with")
  386. parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
  387. parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")