codeparser.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. from bb.pysh import pyshyacc, pyshlex
  2. from itertools import chain
  3. from bb import msg, utils
  4. import ast
  5. import codegen
  6. PARSERCACHE_VERSION = 2
  7. try:
  8. import cPickle as pickle
  9. except ImportError:
  10. import pickle
  11. bb.msg.note(1, bb.msg.domain.Cache, "Importing cPickle failed. Falling back to a very slow implementation.")
  12. def check_indent(codestr):
  13. """If the code is indented, add a top level piece of code to 'remove' the indentation"""
  14. i = 0
  15. while codestr[i] in ["\n", " ", " "]:
  16. i = i + 1
  17. if i == 0:
  18. return codestr
  19. if codestr[i-1] is " " or codestr[i-1] is " ":
  20. return "if 1:\n" + codestr
  21. return codestr
  22. pythonparsecache = {}
  23. shellparsecache = {}
  24. def parser_cachefile(d):
  25. cachedir = bb.data.getVar("PERSISTENT_DIR", d, True) or bb.data.getVar("CACHE", d, True)
  26. if cachedir in [None, '']:
  27. return None
  28. bb.utils.mkdirhier(cachedir)
  29. cachefile = os.path.join(cachedir, "bb_codeparser.dat")
  30. bb.msg.debug(1, bb.msg.domain.Cache, "Using cache in '%s' for codeparser cache" % cachefile)
  31. return cachefile
  32. def parser_cache_init(d):
  33. cachefile = parser_cachefile(d)
  34. if not cachefile:
  35. return
  36. try:
  37. p = pickle.Unpickler(file(cachefile, "rb"))
  38. data, version = p.load()
  39. except:
  40. return
  41. if version != PARSERCACHE_VERSION:
  42. return
  43. bb.codeparser.pythonparsecache = data[0]
  44. bb.codeparser.shellparsecache = data[1]
  45. def parser_cache_save(d):
  46. cachefile = parser_cachefile(d)
  47. if not cachefile:
  48. return
  49. p = pickle.Pickler(file(cachefile, "wb"), -1)
  50. p.dump([[bb.codeparser.pythonparsecache, bb.codeparser.shellparsecache], PARSERCACHE_VERSION])
  51. class PythonParser():
  52. class ValueVisitor():
  53. """Visitor to traverse a python abstract syntax tree and obtain
  54. the variables referenced via bitbake metadata APIs, and the external
  55. functions called.
  56. """
  57. getvars = ("d.getVar", "bb.data.getVar", "data.getVar")
  58. expands = ("d.expand", "bb.data.expand", "data.expand")
  59. execs = ("bb.build.exec_func", "bb.build.exec_task")
  60. @classmethod
  61. def _compare_name(cls, strparts, node):
  62. """Given a sequence of strings representing a python name,
  63. where the last component is the actual Name and the prior
  64. elements are Attribute nodes, determine if the supplied node
  65. matches.
  66. """
  67. if not strparts:
  68. return True
  69. current, rest = strparts[0], strparts[1:]
  70. if isinstance(node, ast.Attribute):
  71. if current == node.attr:
  72. return cls._compare_name(rest, node.value)
  73. elif isinstance(node, ast.Name):
  74. if current == node.id:
  75. return True
  76. return False
  77. @classmethod
  78. def compare_name(cls, value, node):
  79. """Convenience function for the _compare_node method, which
  80. can accept a string (which is split by '.' for you), or an
  81. iterable of strings, in which case it checks to see if any of
  82. them match, similar to isinstance.
  83. """
  84. if isinstance(value, basestring):
  85. return cls._compare_name(tuple(reversed(value.split("."))),
  86. node)
  87. else:
  88. return any(cls.compare_name(item, node) for item in value)
  89. def __init__(self, value):
  90. self.var_references = set()
  91. self.var_execs = set()
  92. self.direct_func_calls = set()
  93. self.var_expands = set()
  94. self.value = value
  95. @classmethod
  96. def warn(cls, func, arg):
  97. """Warn about calls of bitbake APIs which pass a non-literal
  98. argument for the variable name, as we're not able to track such
  99. a reference.
  100. """
  101. try:
  102. funcstr = codegen.to_source(func)
  103. argstr = codegen.to_source(arg)
  104. except TypeError:
  105. msg.debug(2, None, "Failed to convert function and argument to source form")
  106. else:
  107. msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" %
  108. (funcstr, argstr))
  109. def visit_Call(self, node):
  110. if self.compare_name(self.getvars, node.func):
  111. if isinstance(node.args[0], ast.Str):
  112. self.var_references.add(node.args[0].s)
  113. else:
  114. self.warn(node.func, node.args[0])
  115. elif self.compare_name(self.expands, node.func):
  116. if isinstance(node.args[0], ast.Str):
  117. self.warn(node.func, node.args[0])
  118. self.var_expands.update(node.args[0].s)
  119. elif isinstance(node.args[0], ast.Call) and \
  120. self.compare_name(self.getvars, node.args[0].func):
  121. pass
  122. else:
  123. self.warn(node.func, node.args[0])
  124. elif self.compare_name(self.execs, node.func):
  125. if isinstance(node.args[0], ast.Str):
  126. self.var_execs.add(node.args[0].s)
  127. else:
  128. self.warn(node.func, node.args[0])
  129. elif isinstance(node.func, ast.Name):
  130. self.direct_func_calls.add(node.func.id)
  131. elif isinstance(node.func, ast.Attribute):
  132. # We must have a qualified name. Therefore we need
  133. # to walk the chain of 'Attribute' nodes to determine
  134. # the qualification.
  135. attr_node = node.func.value
  136. identifier = node.func.attr
  137. while isinstance(attr_node, ast.Attribute):
  138. identifier = attr_node.attr + "." + identifier
  139. attr_node = attr_node.value
  140. if isinstance(attr_node, ast.Name):
  141. identifier = attr_node.id + "." + identifier
  142. self.direct_func_calls.add(identifier)
  143. def __init__(self):
  144. #self.funcdefs = set()
  145. self.execs = set()
  146. #self.external_cmds = set()
  147. self.references = set()
  148. def parse_python(self, node):
  149. h = hash(str(node))
  150. if h in pythonparsecache:
  151. self.references = pythonparsecache[h]["refs"]
  152. self.execs = pythonparsecache[h]["execs"]
  153. return
  154. code = compile(check_indent(str(node)), "<string>", "exec",
  155. ast.PyCF_ONLY_AST)
  156. visitor = self.ValueVisitor(code)
  157. for n in ast.walk(code):
  158. if n.__class__.__name__ == "Call":
  159. visitor.visit_Call(n)
  160. self.references.update(visitor.var_references)
  161. self.references.update(visitor.var_execs)
  162. self.execs = visitor.direct_func_calls
  163. pythonparsecache[h] = {}
  164. pythonparsecache[h]["refs"] = self.references
  165. pythonparsecache[h]["execs"] = self.execs
  166. class ShellParser():
  167. def __init__(self):
  168. self.funcdefs = set()
  169. self.allexecs = set()
  170. self.execs = set()
  171. def parse_shell(self, value):
  172. """Parse the supplied shell code in a string, returning the external
  173. commands it executes.
  174. """
  175. h = hash(str(value))
  176. if h in shellparsecache:
  177. self.execs = shellparsecache[h]["execs"]
  178. return self.execs
  179. try:
  180. tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
  181. except pyshlex.NeedMore:
  182. raise ShellSyntaxError("Unexpected EOF")
  183. for token in tokens:
  184. self.process_tokens(token)
  185. self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
  186. shellparsecache[h] = {}
  187. shellparsecache[h]["execs"] = self.execs
  188. return self.execs
  189. def process_tokens(self, tokens):
  190. """Process a supplied portion of the syntax tree as returned by
  191. pyshyacc.parse.
  192. """
  193. def function_definition(value):
  194. self.funcdefs.add(value.name)
  195. return [value.body], None
  196. def case_clause(value):
  197. # Element 0 of each item in the case is the list of patterns, and
  198. # Element 1 of each item in the case is the list of commands to be
  199. # executed when that pattern matches.
  200. words = chain(*[item[0] for item in value.items])
  201. cmds = chain(*[item[1] for item in value.items])
  202. return cmds, words
  203. def if_clause(value):
  204. main = chain(value.cond, value.if_cmds)
  205. rest = value.else_cmds
  206. if isinstance(rest, tuple) and rest[0] == "elif":
  207. return chain(main, if_clause(rest[1]))
  208. else:
  209. return chain(main, rest)
  210. def simple_command(value):
  211. return None, chain(value.words, (assign[1] for assign in value.assigns))
  212. token_handlers = {
  213. "and_or": lambda x: ((x.left, x.right), None),
  214. "async": lambda x: ([x], None),
  215. "brace_group": lambda x: (x.cmds, None),
  216. "for_clause": lambda x: (x.cmds, x.items),
  217. "function_definition": function_definition,
  218. "if_clause": lambda x: (if_clause(x), None),
  219. "pipeline": lambda x: (x.commands, None),
  220. "redirect_list": lambda x: ([x.cmd], None),
  221. "subshell": lambda x: (x.cmds, None),
  222. "while_clause": lambda x: (chain(x.condition, x.cmds), None),
  223. "until_clause": lambda x: (chain(x.condition, x.cmds), None),
  224. "simple_command": simple_command,
  225. "case_clause": case_clause,
  226. }
  227. for token in tokens:
  228. name, value = token
  229. try:
  230. more_tokens, words = token_handlers[name](value)
  231. except KeyError:
  232. raise NotImplementedError("Unsupported token type " + name)
  233. if more_tokens:
  234. self.process_tokens(more_tokens)
  235. if words:
  236. self.process_words(words)
  237. def process_words(self, words):
  238. """Process a set of 'words' in pyshyacc parlance, which includes
  239. extraction of executed commands from $() blocks, as well as grabbing
  240. the command name argument.
  241. """
  242. words = list(words)
  243. for word in list(words):
  244. wtree = pyshlex.make_wordtree(word[1])
  245. for part in wtree:
  246. if not isinstance(part, list):
  247. continue
  248. if part[0] in ('`', '$('):
  249. command = pyshlex.wordtree_as_string(part[1:-1])
  250. self.parse_shell(command)
  251. if word[0] in ("cmd_name", "cmd_word"):
  252. if word in words:
  253. words.remove(word)
  254. usetoken = False
  255. for word in words:
  256. if word[0] in ("cmd_name", "cmd_word") or \
  257. (usetoken and word[0] == "TOKEN"):
  258. if "=" in word[1]:
  259. usetoken = True
  260. continue
  261. cmd = word[1]
  262. if cmd.startswith("$"):
  263. msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd)
  264. elif cmd == "eval":
  265. command = " ".join(word for _, word in words[1:])
  266. self.parse_shell(command)
  267. else:
  268. self.allexecs.add(cmd)
  269. break