gitarchive.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. #
  2. # Helper functions for committing data to git and pushing upstream
  3. #
  4. # Copyright (c) 2017, Intel Corporation.
  5. # Copyright (c) 2019, Linux Foundation
  6. #
  7. # SPDX-License-Identifier: GPL-2.0-only
  8. #
  9. import os
  10. import re
  11. import sys
  12. from operator import attrgetter
  13. from collections import namedtuple
  14. from oeqa.utils.git import GitRepo, GitError
  15. class ArchiveError(Exception):
  16. """Internal error handling of this script"""
  17. def format_str(string, fields):
  18. """Format string using the given fields (dict)"""
  19. try:
  20. return string.format(**fields)
  21. except KeyError as err:
  22. raise ArchiveError("Unable to expand string '{}': unknown field {} "
  23. "(valid fields are: {})".format(
  24. string, err, ', '.join(sorted(fields.keys()))))
  25. def init_git_repo(path, no_create, bare, log):
  26. """Initialize local Git repository"""
  27. path = os.path.abspath(path)
  28. if os.path.isfile(path):
  29. raise ArchiveError("Invalid Git repo at {}: path exists but is not a "
  30. "directory".format(path))
  31. if not os.path.isdir(path) or not os.listdir(path):
  32. if no_create:
  33. raise ArchiveError("No git repo at {}, refusing to create "
  34. "one".format(path))
  35. if not os.path.isdir(path):
  36. try:
  37. os.mkdir(path)
  38. except (FileNotFoundError, PermissionError) as err:
  39. raise ArchiveError("Failed to mkdir {}: {}".format(path, err))
  40. if not os.listdir(path):
  41. log.info("Initializing a new Git repo at %s", path)
  42. repo = GitRepo.init(path, bare)
  43. try:
  44. repo = GitRepo(path, is_topdir=True)
  45. except GitError:
  46. raise ArchiveError("Non-empty directory that is not a Git repository "
  47. "at {}\nPlease specify an existing Git repository, "
  48. "an empty directory or a non-existing directory "
  49. "path.".format(path))
  50. return repo
  51. def git_commit_data(repo, data_dir, branch, message, exclude, notes, log):
  52. """Commit data into a Git repository"""
  53. log.info("Committing data into to branch %s", branch)
  54. tmp_index = os.path.join(repo.git_dir, 'index.oe-git-archive')
  55. try:
  56. # Create new tree object from the data
  57. env_update = {'GIT_INDEX_FILE': tmp_index,
  58. 'GIT_WORK_TREE': os.path.abspath(data_dir)}
  59. repo.run_cmd('add .', env_update)
  60. # Remove files that are excluded
  61. if exclude:
  62. repo.run_cmd(['rm', '--cached'] + [f for f in exclude], env_update)
  63. tree = repo.run_cmd('write-tree', env_update)
  64. # Create new commit object from the tree
  65. parent = repo.rev_parse(branch)
  66. if not parent:
  67. parent = repo.rev_parse("origin/" + branch)
  68. git_cmd = ['commit-tree', tree, '-m', message]
  69. if parent:
  70. git_cmd += ['-p', parent]
  71. commit = repo.run_cmd(git_cmd, env_update)
  72. # Create git notes
  73. for ref, filename in notes:
  74. ref = ref.format(branch_name=branch)
  75. repo.run_cmd(['notes', '--ref', ref, 'add',
  76. '-F', os.path.abspath(filename), commit])
  77. # Update branch head
  78. git_cmd = ['update-ref', 'refs/heads/' + branch, commit]
  79. repo.run_cmd(git_cmd)
  80. # Update current HEAD, if we're on branch 'branch'
  81. if not repo.bare and repo.get_current_branch() == branch:
  82. log.info("Updating %s HEAD to latest commit", repo.top_dir)
  83. repo.run_cmd('reset --hard')
  84. return commit
  85. finally:
  86. if os.path.exists(tmp_index):
  87. os.unlink(tmp_index)
  88. def get_tags(repo, log, pattern=None, url=None):
  89. """ Fetch remote tags from current repository
  90. A pattern can be provided to filter returned tags list
  91. An URL can be provided if local repository has no valid remote configured
  92. """
  93. base_cmd = ['ls-remote', '--refs', '--tags', '-q']
  94. cmd = base_cmd.copy()
  95. # First try to fetch tags from repository configured remote
  96. cmd.append('origin')
  97. if pattern:
  98. cmd.append(pattern)
  99. try:
  100. tags_refs = repo.run_cmd(cmd)
  101. tags = ["".join(d.split()[1].split('/', 2)[2:]) for d in tags_refs.splitlines()]
  102. except GitError as e:
  103. # If it fails, retry with repository url if one is provided
  104. if url:
  105. log.info("No remote repository configured, use provided url")
  106. cmd = base_cmd.copy()
  107. cmd.append(url)
  108. if pattern:
  109. cmd.append(pattern)
  110. tags_refs = repo.run_cmd(cmd)
  111. tags = ["".join(d.split()[1].split('/', 2)[2:]) for d in tags_refs.splitlines()]
  112. else:
  113. log.warning("Read local tags only, some remote tags may be missed")
  114. cmd = ["tag"]
  115. if pattern:
  116. cmd += ["-l", pattern]
  117. tags = repo.run_cmd(cmd).splitlines()
  118. return tags
  119. def expand_tag_strings(repo, name_pattern, msg_subj_pattern, msg_body_pattern,
  120. url, log, keywords):
  121. """Generate tag name and message, with support for running id number"""
  122. keyws = keywords.copy()
  123. # Tag number is handled specially: if not defined, we autoincrement it
  124. if 'tag_number' not in keyws:
  125. # Fill in all other fields than 'tag_number'
  126. keyws['tag_number'] = '{tag_number}'
  127. tag_re = format_str(name_pattern, keyws)
  128. # Replace parentheses for proper regex matching
  129. tag_re = tag_re.replace('(', '\(').replace(')', '\)') + '$'
  130. # Inject regex group pattern for 'tag_number'
  131. tag_re = tag_re.format(tag_number='(?P<tag_number>[0-9]{1,5})')
  132. keyws['tag_number'] = 0
  133. for existing_tag in get_tags(repo, log, url=url):
  134. match = re.match(tag_re, existing_tag)
  135. if match and int(match.group('tag_number')) >= keyws['tag_number']:
  136. keyws['tag_number'] = int(match.group('tag_number')) + 1
  137. tag_name = format_str(name_pattern, keyws)
  138. msg_subj= format_str(msg_subj_pattern.strip(), keyws)
  139. msg_body = format_str(msg_body_pattern, keyws)
  140. return tag_name, msg_subj + '\n\n' + msg_body
  141. def gitarchive(data_dir, git_dir, no_create, bare, commit_msg_subject, commit_msg_body, branch_name, no_tag, tagname, tag_msg_subject, tag_msg_body, exclude, notes, push, keywords, log):
  142. if not os.path.isdir(data_dir):
  143. raise ArchiveError("Not a directory: {}".format(data_dir))
  144. data_repo = init_git_repo(git_dir, no_create, bare, log)
  145. # Expand strings early in order to avoid getting into inconsistent
  146. # state (e.g. no tag even if data was committed)
  147. commit_msg = format_str(commit_msg_subject.strip(), keywords)
  148. commit_msg += '\n\n' + format_str(commit_msg_body, keywords)
  149. branch_name = format_str(branch_name, keywords)
  150. tag_name = None
  151. if not no_tag and tagname:
  152. tag_name, tag_msg = expand_tag_strings(data_repo, tagname,
  153. tag_msg_subject,
  154. tag_msg_body,
  155. push, log, keywords)
  156. # Commit data
  157. commit = git_commit_data(data_repo, data_dir, branch_name,
  158. commit_msg, exclude, notes, log)
  159. # Create tag
  160. if tag_name:
  161. log.info("Creating tag %s", tag_name)
  162. data_repo.run_cmd(['tag', '-a', '-m', tag_msg, tag_name, commit])
  163. # Push data to remote
  164. if push:
  165. cmd = ['push', '--tags']
  166. # If no remote is given we push with the default settings from
  167. # gitconfig
  168. if push is not True:
  169. notes_refs = ['refs/notes/' + ref.format(branch_name=branch_name)
  170. for ref, _ in notes]
  171. cmd.extend([push, branch_name] + notes_refs)
  172. log.info("Pushing data to remote")
  173. data_repo.run_cmd(cmd)
  174. # Container class for tester revisions
  175. TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
  176. def get_test_runs(log, repo, tag_name, **kwargs):
  177. """Get a sorted list of test runs, matching given pattern"""
  178. # First, get field names from the tag name pattern
  179. field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
  180. undef_fields = [f for f in field_names if f not in kwargs.keys()]
  181. # Fields for formatting tag name pattern
  182. str_fields = dict([(f, '*') for f in field_names])
  183. str_fields.update(kwargs)
  184. # Get a list of all matching tags
  185. tag_pattern = tag_name.format(**str_fields)
  186. tags = get_tags(repo, log, pattern=tag_pattern)
  187. log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
  188. # Parse undefined fields from tag names
  189. str_fields = dict([(f, r'(?P<{}>[\w\-.()]+)'.format(f)) for f in field_names])
  190. str_fields['branch'] = r'(?P<branch>[\w\-.()/]+)'
  191. str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
  192. str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
  193. str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
  194. # escape parenthesis in fields in order to not messa up the regexp
  195. fixed_fields = dict([(k, v.replace('(', r'\(').replace(')', r'\)')) for k, v in kwargs.items()])
  196. str_fields.update(fixed_fields)
  197. tag_re = re.compile(tag_name.format(**str_fields))
  198. # Parse fields from tags
  199. revs = []
  200. for tag in tags:
  201. m = tag_re.match(tag)
  202. groups = m.groupdict()
  203. revs.append([groups[f] for f in undef_fields] + [tag])
  204. # Return field names and a sorted list of revs
  205. return undef_fields, sorted(revs)
  206. def get_test_revs(log, repo, tag_name, **kwargs):
  207. """Get list of all tested revisions"""
  208. fields, runs = get_test_runs(log, repo, tag_name, **kwargs)
  209. revs = {}
  210. commit_i = fields.index('commit')
  211. commit_num_i = fields.index('commit_number')
  212. for run in runs:
  213. commit = run[commit_i]
  214. commit_num = run[commit_num_i]
  215. tag = run[-1]
  216. if not commit in revs:
  217. revs[commit] = TestedRev(commit, commit_num, [tag])
  218. else:
  219. assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
  220. revs[commit].tags.append(tag)
  221. # Return in sorted table
  222. revs = sorted(revs.values(), key=attrgetter('commit_number'))
  223. log.debug("Found %d tested revisions:\n %s", len(revs),
  224. "\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
  225. return revs
  226. def rev_find(revs, attr, val):
  227. """Search from a list of TestedRev"""
  228. for i, rev in enumerate(revs):
  229. if getattr(rev, attr) == val:
  230. return i
  231. raise ValueError("Unable to find '{}' value '{}'".format(attr, val))