patchreview 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. #! /usr/bin/env python3
  2. #
  3. # SPDX-License-Identifier: GPL-2.0-only
  4. #
  5. # TODO
  6. # - option to just list all broken files
  7. # - test suite
  8. # - validate signed-off-by
  9. import argparse
  10. import collections
  11. import json
  12. import os
  13. import re
  14. import subprocess
  15. status_values = (
  16. "accepted",
  17. "pending",
  18. "inappropriate",
  19. "backport",
  20. "submitted",
  21. "denied",
  22. )
  23. class PatchResult:
  24. # Whether the patch has an Upstream-Status or not
  25. missing_upstream_status = False
  26. # If the Upstream-Status tag is malformed in some way (string for bad bit)
  27. malformed_upstream_status = None
  28. # If the Upstream-Status value is unknown (boolean)
  29. unknown_upstream_status = False
  30. # The upstream status value (Pending, etc)
  31. upstream_status = None
  32. # Whether the patch has a Signed-off-by or not
  33. missing_sob = False
  34. # Whether the Signed-off-by tag is malformed in some way
  35. malformed_sob = False
  36. # The Signed-off-by tag value
  37. sob = None
  38. # Whether a patch looks like a CVE but doesn't have a CVE tag
  39. missing_cve = False
  40. class Summary:
  41. total = 0
  42. cve_missing = 0
  43. sob_missing = 0
  44. sob_malformed = 0
  45. status_missing = 0
  46. status_malformed = 0
  47. status_pending = 0
  48. def blame_patch(patch):
  49. """
  50. From a patch filename, return a list of "commit summary (author name <author
  51. email>)" strings representing the history.
  52. """
  53. return subprocess.check_output(("git", "log",
  54. "--follow", "--find-renames", "--diff-filter=A",
  55. "--format=%s (%aN <%aE>)",
  56. "--", patch)).decode("utf-8").splitlines()
  57. def patchreview(patches):
  58. # General pattern: start of line, optional whitespace, tag with optional
  59. # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
  60. # insensitive.
  61. sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
  62. status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
  63. cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
  64. cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
  65. results = {}
  66. for patch in patches:
  67. result = PatchResult()
  68. results[patch] = result
  69. content = open(patch, encoding="ascii", errors="ignore").read()
  70. # Find the Signed-off-by tag
  71. match = sob_re.search(content)
  72. if match:
  73. value = match.group(1)
  74. if value != "Signed-off-by:":
  75. result.malformed_sob = value
  76. result.sob = match.group(2)
  77. else:
  78. result.missing_sob = True
  79. # Find the Upstream-Status tag
  80. match = status_re.search(content)
  81. if match:
  82. value = match.group(1)
  83. if value != "Upstream-Status:":
  84. result.malformed_upstream_status = value
  85. value = match.group(2).lower()
  86. # TODO: check case
  87. if value not in status_values:
  88. result.unknown_upstream_status = True
  89. result.upstream_status = value
  90. else:
  91. result.missing_upstream_status = True
  92. # Check that patches which looks like CVEs have CVE tags
  93. if cve_re.search(patch) or cve_re.search(content):
  94. if not cve_tag_re.search(content):
  95. result.missing_cve = True
  96. # TODO: extract CVE list
  97. return results
  98. def analyse(results, want_blame=False, verbose=True):
  99. """
  100. want_blame: display blame data for each malformed patch
  101. verbose: display per-file results instead of just summary
  102. """
  103. # want_blame requires verbose, so disable blame if we're not verbose
  104. if want_blame and not verbose:
  105. want_blame = False
  106. summary = Summary()
  107. for patch in sorted(results):
  108. r = results[patch]
  109. summary.total += 1
  110. need_blame = False
  111. # Build statistics
  112. if r.missing_sob:
  113. summary.sob_missing += 1
  114. if r.malformed_sob:
  115. summary.sob_malformed += 1
  116. if r.missing_upstream_status:
  117. summary.status_missing += 1
  118. if r.malformed_upstream_status or r.unknown_upstream_status:
  119. summary.status_malformed += 1
  120. # Count patches with no status as pending
  121. summary.status_pending += 1
  122. if r.missing_cve:
  123. summary.cve_missing += 1
  124. if r.upstream_status == "pending":
  125. summary.status_pending += 1
  126. # Output warnings
  127. if r.missing_sob:
  128. need_blame = True
  129. if verbose:
  130. print("Missing Signed-off-by tag (%s)" % patch)
  131. if r.malformed_sob:
  132. need_blame = True
  133. if verbose:
  134. print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
  135. if r.missing_cve:
  136. need_blame = True
  137. if verbose:
  138. print("Missing CVE tag (%s)" % patch)
  139. if r.missing_upstream_status:
  140. need_blame = True
  141. if verbose:
  142. print("Missing Upstream-Status tag (%s)" % patch)
  143. if r.malformed_upstream_status:
  144. need_blame = True
  145. if verbose:
  146. print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
  147. if r.unknown_upstream_status:
  148. need_blame = True
  149. if verbose:
  150. print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
  151. if want_blame and need_blame:
  152. print("\n".join(blame_patch(patch)) + "\n")
  153. return summary
  154. def display_summary(summary, verbose):
  155. def percent(num):
  156. try:
  157. return "%d (%d%%)" % (num, round(num * 100.0 / summary.total))
  158. except ZeroDivisionError:
  159. return "N/A"
  160. if verbose:
  161. print()
  162. print("""Total patches found: %d
  163. Patches missing Signed-off-by: %s
  164. Patches with malformed Signed-off-by: %s
  165. Patches missing CVE: %s
  166. Patches missing Upstream-Status: %s
  167. Patches with malformed Upstream-Status: %s
  168. Patches in Pending state: %s""" % (summary.total,
  169. percent(summary.sob_missing),
  170. percent(summary.sob_malformed),
  171. percent(summary.cve_missing),
  172. percent(summary.status_missing),
  173. percent(summary.status_malformed),
  174. percent(summary.status_pending)))
  175. def generate_metrics(summary, output):
  176. # https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md
  177. # Summary attribute name, MetricPoint help
  178. mapping = (
  179. ("total", "Total patches"),
  180. ("cve_missing", "Patches missing CVE tag"),
  181. ("sob_malformed", "Patches with malformed Signed-off-by"),
  182. ("sob_missing", "Patches with missing Signed-off-by"),
  183. ("status_malformed", "Patches with malformed Upstream-Status"),
  184. ("status_missing", "Patches with missing Upstream-Status"),
  185. ("status_pending", "Patches with Pending Upstream-Status")
  186. )
  187. for attr, help in mapping:
  188. metric = f"patch_check_{attr}"
  189. value = getattr(summary, attr)
  190. output.write(f"""
  191. # TYPE {metric} gauge
  192. # HELP {help}
  193. {metric} {value}
  194. """)
  195. output.write("\n# EOF\n")
  196. def histogram(results):
  197. import math
  198. from toolz import dicttoolz, recipes
  199. counts = recipes.countby(lambda r: r.upstream_status, results.values())
  200. bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
  201. for k in bars:
  202. print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
  203. def gather_patches(directories):
  204. patches = []
  205. for directory in directories:
  206. filenames = subprocess.check_output(("git", "-C", directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
  207. patches += [os.path.join(directory, f) for f in filenames]
  208. return patches
  209. if __name__ == "__main__":
  210. args = argparse.ArgumentParser(description="Patch Review Tool")
  211. args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
  212. args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
  213. args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
  214. args.add_argument("-j", "--json", help="update JSON")
  215. args.add_argument("-m", "--metrics", type=argparse.FileType('w'), help="write OpenMetrics")
  216. args.add_argument("dirs", metavar="DIRECTORY", nargs="+", help="directory to scan")
  217. args = args.parse_args()
  218. patches = gather_patches(args.dirs)
  219. results = patchreview(patches)
  220. summary = analyse(results, want_blame=args.blame, verbose=args.verbose)
  221. display_summary(summary, verbose=args.verbose)
  222. if args.json:
  223. if os.path.isfile(args.json):
  224. data = json.load(open(args.json))
  225. else:
  226. data = []
  227. row = collections.Counter()
  228. row["total"] = len(results)
  229. row["date"] = subprocess.check_output(["git", "-C", args.dirs[0], "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
  230. for r in results.values():
  231. if r.upstream_status in status_values:
  232. row[r.upstream_status] += 1
  233. if r.malformed_upstream_status or r.missing_upstream_status:
  234. row["malformed-upstream-status"] += 1
  235. if r.malformed_sob or r.missing_sob:
  236. row["malformed-sob"] += 1
  237. data.append(row)
  238. json.dump(data, open(args.json, "w"))
  239. if args.metrics:
  240. generate_metrics(summary, args.metrics)
  241. if args.histogram:
  242. print()
  243. histogram(results)