patchreview.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. #! /usr/bin/env python3
  2. #
  3. # Copyright OpenEmbedded Contributors
  4. #
  5. # SPDX-License-Identifier: GPL-2.0-only
  6. #
  7. import argparse
  8. import collections
  9. import json
  10. import os
  11. import os.path
  12. import pathlib
  13. import re
  14. import subprocess
  15. import sys
  16. sys.path.append(os.path.join(sys.path[0], '../../meta/lib'))
  17. import oe.qa
  18. # TODO
  19. # - option to just list all broken files
  20. # - test suite
  21. # - validate signed-off-by
  22. status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
  23. class Result:
  24. # Whether the patch has an Upstream-Status or not
  25. missing_upstream_status = False
  26. # If the Upstream-Status tag is malformed in some way (string for bad bit)
  27. malformed_upstream_status = None
  28. # If the Upstream-Status value is unknown (boolean)
  29. unknown_upstream_status = False
  30. # The upstream status value (Pending, etc)
  31. upstream_status = None
  32. # Whether the patch has a Signed-off-by or not
  33. missing_sob = False
  34. # Whether the Signed-off-by tag is malformed in some way
  35. malformed_sob = False
  36. # The Signed-off-by tag value
  37. sob = None
  38. # Whether a patch looks like a CVE but doesn't have a CVE tag
  39. missing_cve = False
  40. def blame_patch(patch):
  41. """
  42. From a patch filename, return a list of "commit summary (author name <author
  43. email>)" strings representing the history.
  44. """
  45. return subprocess.check_output(("git", "log",
  46. "--follow", "--find-renames", "--diff-filter=A",
  47. "--format=%s (%aN <%aE>)",
  48. "--", patch), cwd=os.path.dirname(patch)).decode("utf-8").splitlines()
  49. def patchreview(patches):
  50. # General pattern: start of line, optional whitespace, tag with optional
  51. # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
  52. # insensitive.
  53. sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
  54. status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
  55. cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
  56. cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
  57. results = {}
  58. for patch in patches:
  59. result = Result()
  60. results[patch] = result
  61. content = open(patch, encoding='ascii', errors='ignore').read()
  62. # Find the Signed-off-by tag
  63. match = sob_re.search(content)
  64. if match:
  65. value = match.group(1)
  66. if value != "Signed-off-by:":
  67. result.malformed_sob = value
  68. result.sob = match.group(2)
  69. else:
  70. result.missing_sob = True
  71. # Find the Upstream-Status tag
  72. match = status_re.search(content)
  73. if match:
  74. value = oe.qa.check_upstream_status(patch)
  75. if value:
  76. result.malformed_upstream_status = value
  77. value = match.group(2).lower()
  78. # TODO: check case
  79. if value not in status_values:
  80. result.unknown_upstream_status = True
  81. result.upstream_status = value
  82. else:
  83. result.missing_upstream_status = True
  84. # Check that patches which looks like CVEs have CVE tags
  85. if cve_re.search(patch) or cve_re.search(content):
  86. if not cve_tag_re.search(content):
  87. result.missing_cve = True
  88. # TODO: extract CVE list
  89. return results
  90. def analyse(results, want_blame=False, verbose=True):
  91. """
  92. want_blame: display blame data for each malformed patch
  93. verbose: display per-file results instead of just summary
  94. """
  95. # want_blame requires verbose, so disable blame if we're not verbose
  96. if want_blame and not verbose:
  97. want_blame = False
  98. total_patches = 0
  99. missing_sob = 0
  100. malformed_sob = 0
  101. missing_status = 0
  102. malformed_status = 0
  103. missing_cve = 0
  104. pending_patches = 0
  105. for patch in sorted(results):
  106. r = results[patch]
  107. total_patches += 1
  108. need_blame = False
  109. # Build statistics
  110. if r.missing_sob:
  111. missing_sob += 1
  112. if r.malformed_sob:
  113. malformed_sob += 1
  114. if r.missing_upstream_status:
  115. missing_status += 1
  116. if r.malformed_upstream_status or r.unknown_upstream_status:
  117. malformed_status += 1
  118. # Count patches with no status as pending
  119. pending_patches +=1
  120. if r.missing_cve:
  121. missing_cve += 1
  122. if r.upstream_status == "pending":
  123. pending_patches += 1
  124. # Output warnings
  125. if r.missing_sob:
  126. need_blame = True
  127. if verbose:
  128. print("Missing Signed-off-by tag (%s)" % patch)
  129. if r.malformed_sob:
  130. need_blame = True
  131. if verbose:
  132. print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
  133. if r.missing_cve:
  134. need_blame = True
  135. if verbose:
  136. print("Missing CVE tag (%s)" % patch)
  137. if r.missing_upstream_status:
  138. need_blame = True
  139. if verbose:
  140. print("Missing Upstream-Status tag (%s)" % patch)
  141. if r.malformed_upstream_status:
  142. need_blame = True
  143. if verbose:
  144. print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
  145. if r.unknown_upstream_status:
  146. need_blame = True
  147. if verbose:
  148. print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
  149. if want_blame and need_blame:
  150. print("\n".join(blame_patch(patch)) + "\n")
  151. def percent(num):
  152. try:
  153. return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
  154. except ZeroDivisionError:
  155. return "N/A"
  156. if verbose:
  157. print()
  158. print("""Total patches found: %d
  159. Patches missing Signed-off-by: %s
  160. Patches with malformed Signed-off-by: %s
  161. Patches missing CVE: %s
  162. Patches missing Upstream-Status: %s
  163. Patches with malformed Upstream-Status: %s
  164. Patches in Pending state: %s""" % (total_patches,
  165. percent(missing_sob),
  166. percent(malformed_sob),
  167. percent(missing_cve),
  168. percent(missing_status),
  169. percent(malformed_status),
  170. percent(pending_patches)))
  171. def histogram(results):
  172. from toolz import recipes, dicttoolz
  173. import math
  174. counts = recipes.countby(lambda r: r.upstream_status, results.values())
  175. bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
  176. for k in bars:
  177. print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
  178. def find_layers(candidate):
  179. # candidate can either be the path to a layer directly (eg meta-intel), or a
  180. # repository that contains other layers (meta-arm). We can determine what by
  181. # looking for a conf/layer.conf file. If that file exists then it's a layer,
  182. # otherwise its a repository of layers and we can assume they're called
  183. # meta-*.
  184. if (candidate / "conf" / "layer.conf").exists():
  185. return [candidate.absolute()]
  186. else:
  187. return [d.absolute() for d in candidate.iterdir() if d.is_dir() and (d.name == "meta" or d.name.startswith("meta-"))]
  188. # TODO these don't actually handle dynamic-layers/
  189. def gather_patches(layers):
  190. patches = []
  191. for directory in layers:
  192. filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff"), universal_newlines=True).split()
  193. patches += [os.path.join(directory, f) for f in filenames]
  194. return patches
  195. def count_recipes(layers):
  196. count = 0
  197. for directory in layers:
  198. output = subprocess.check_output(["git", "-C", directory, "ls-files", "recipes-*/**/*.bb"], universal_newlines=True)
  199. count += len(output.splitlines())
  200. return count
  201. if __name__ == "__main__":
  202. args = argparse.ArgumentParser(description="Patch Review Tool")
  203. args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
  204. args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
  205. args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
  206. args.add_argument("-j", "--json", help="update JSON")
  207. args.add_argument("directory", type=pathlib.Path, metavar="DIRECTORY", help="directory to scan (layer, or repository of layers)")
  208. args = args.parse_args()
  209. layers = find_layers(args.directory)
  210. print(f"Found layers {' '.join((d.name for d in layers))}")
  211. patches = gather_patches(layers)
  212. results = patchreview(patches)
  213. analyse(results, want_blame=args.blame, verbose=args.verbose)
  214. if args.json:
  215. if os.path.isfile(args.json):
  216. data = json.load(open(args.json))
  217. else:
  218. data = []
  219. row = collections.Counter()
  220. row["total"] = len(results)
  221. row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"], universal_newlines=True).strip()
  222. row["commit"] = subprocess.check_output(["git", "-C", args.directory, "rev-parse", "HEAD"], universal_newlines=True).strip()
  223. row['commit_count'] = subprocess.check_output(["git", "-C", args.directory, "rev-list", "--count", "HEAD"], universal_newlines=True).strip()
  224. row['recipe_count'] = count_recipes(layers)
  225. for r in results.values():
  226. if r.upstream_status in status_values:
  227. row[r.upstream_status] += 1
  228. if r.malformed_upstream_status or r.missing_upstream_status:
  229. row['malformed-upstream-status'] += 1
  230. if r.malformed_sob or r.missing_sob:
  231. row['malformed-sob'] += 1
  232. data.append(row)
  233. json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
  234. if args.histogram:
  235. print()
  236. histogram(results)