patchreview.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. #! /usr/bin/env python3
  2. # TODO
  3. # - option to just list all broken files
  4. # - test suite
  5. # - validate signed-off-by
  6. status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied")
  7. class Result:
  8. # Whether the patch has an Upstream-Status or not
  9. missing_upstream_status = False
  10. # If the Upstream-Status tag is malformed in some way (string for bad bit)
  11. malformed_upstream_status = None
  12. # If the Upstream-Status value is unknown (boolean)
  13. unknown_upstream_status = False
  14. # The upstream status value (Pending, etc)
  15. upstream_status = None
  16. # Whether the patch has a Signed-off-by or not
  17. missing_sob = False
  18. # Whether the Signed-off-by tag is malformed in some way
  19. malformed_sob = False
  20. # The Signed-off-by tag value
  21. sob = None
  22. # Whether a patch looks like a CVE but doesn't have a CVE tag
  23. missing_cve = False
  24. def blame_patch(patch):
  25. """
  26. From a patch filename, return a list of "commit summary (author name <author
  27. email>)" strings representing the history.
  28. """
  29. import subprocess
  30. return subprocess.check_output(("git", "log",
  31. "--follow", "--find-renames", "--diff-filter=A",
  32. "--format=%s (%aN <%aE>)",
  33. "--", patch)).decode("utf-8").splitlines()
  34. def patchreview(path, patches):
  35. import re, os.path
  36. # General pattern: start of line, optional whitespace, tag with optional
  37. # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
  38. # insensitive.
  39. sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
  40. status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
  41. cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
  42. cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
  43. results = {}
  44. for patch in patches:
  45. fullpath = os.path.join(path, patch)
  46. result = Result()
  47. results[fullpath] = result
  48. content = open(fullpath, encoding='ascii', errors='ignore').read()
  49. # Find the Signed-off-by tag
  50. match = sob_re.search(content)
  51. if match:
  52. value = match.group(1)
  53. if value != "Signed-off-by:":
  54. result.malformed_sob = value
  55. result.sob = match.group(2)
  56. else:
  57. result.missing_sob = True
  58. # Find the Upstream-Status tag
  59. match = status_re.search(content)
  60. if match:
  61. value = match.group(1)
  62. if value != "Upstream-Status:":
  63. result.malformed_upstream_status = value
  64. value = match.group(2).lower()
  65. # TODO: check case
  66. if value not in status_values:
  67. result.unknown_upstream_status = True
  68. result.upstream_status = value
  69. else:
  70. result.missing_upstream_status = True
  71. # Check that patches which looks like CVEs have CVE tags
  72. if cve_re.search(patch) or cve_re.search(content):
  73. if not cve_tag_re.search(content):
  74. result.missing_cve = True
  75. # TODO: extract CVE list
  76. return results
  77. def analyse(results, want_blame=False, verbose=True):
  78. """
  79. want_blame: display blame data for each malformed patch
  80. verbose: display per-file results instead of just summary
  81. """
  82. # want_blame requires verbose, so disable blame if we're not verbose
  83. if want_blame and not verbose:
  84. want_blame = False
  85. total_patches = 0
  86. missing_sob = 0
  87. malformed_sob = 0
  88. missing_status = 0
  89. malformed_status = 0
  90. missing_cve = 0
  91. pending_patches = 0
  92. for patch in sorted(results):
  93. r = results[patch]
  94. total_patches += 1
  95. need_blame = False
  96. # Build statistics
  97. if r.missing_sob:
  98. missing_sob += 1
  99. if r.malformed_sob:
  100. malformed_sob += 1
  101. if r.missing_upstream_status:
  102. missing_status += 1
  103. if r.malformed_upstream_status or r.unknown_upstream_status:
  104. malformed_status += 1
  105. # Count patches with no status as pending
  106. pending_patches +=1
  107. if r.missing_cve:
  108. missing_cve += 1
  109. if r.upstream_status == "pending":
  110. pending_patches += 1
  111. # Output warnings
  112. if r.missing_sob:
  113. need_blame = True
  114. if verbose:
  115. print("Missing Signed-off-by tag (%s)" % patch)
  116. if r.malformed_sob:
  117. need_blame = True
  118. if verbose:
  119. print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
  120. if r.missing_cve:
  121. need_blame = True
  122. if verbose:
  123. print("Missing CVE tag (%s)" % patch)
  124. if r.missing_upstream_status:
  125. need_blame = True
  126. if verbose:
  127. print("Missing Upstream-Status tag (%s)" % patch)
  128. if r.malformed_upstream_status:
  129. need_blame = True
  130. if verbose:
  131. print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
  132. if r.unknown_upstream_status:
  133. need_blame = True
  134. if verbose:
  135. print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
  136. if want_blame and need_blame:
  137. print("\n".join(blame_patch(patch)) + "\n")
  138. def percent(num):
  139. try:
  140. return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
  141. except ZeroDivisionError:
  142. return "N/A"
  143. if verbose:
  144. print()
  145. print("""Total patches found: %d
  146. Patches missing Signed-off-by: %s
  147. Patches with malformed Signed-off-by: %s
  148. Patches missing CVE: %s
  149. Patches missing Upstream-Status: %s
  150. Patches with malformed Upstream-Status: %s
  151. Patches in Pending state: %s""" % (total_patches,
  152. percent(missing_sob),
  153. percent(malformed_sob),
  154. percent(missing_cve),
  155. percent(missing_status),
  156. percent(malformed_status),
  157. percent(pending_patches)))
  158. def histogram(results):
  159. from toolz import recipes, dicttoolz
  160. import math
  161. counts = recipes.countby(lambda r: r.upstream_status, results.values())
  162. bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
  163. for k in bars:
  164. print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
  165. if __name__ == "__main__":
  166. import argparse, subprocess, os
  167. args = argparse.ArgumentParser(description="Patch Review Tool")
  168. args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
  169. args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
  170. args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
  171. args.add_argument("-j", "--json", help="update JSON")
  172. args.add_argument("directory", help="directory to scan")
  173. args = args.parse_args()
  174. patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
  175. results = patchreview(args.directory, patches)
  176. analyse(results, want_blame=args.blame, verbose=args.verbose)
  177. if args.json:
  178. import json, os.path, collections
  179. if os.path.isfile(args.json):
  180. data = json.load(open(args.json))
  181. else:
  182. data = []
  183. row = collections.Counter()
  184. row["total"] = len(results)
  185. row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
  186. for r in results.values():
  187. if r.upstream_status in status_values:
  188. row[r.upstream_status] += 1
  189. if r.malformed_upstream_status or r.missing_upstream_status:
  190. row['malformed-upstream-status'] += 1
  191. if r.malformed_sob or r.missing_sob:
  192. row['malformed-sob'] += 1
  193. data.append(row)
  194. json.dump(data, open(args.json, "w"))
  195. if args.histogram:
  196. print()
  197. histogram(results)