create_manifest3.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. # This script is used as a bitbake task to create a new python manifest
  2. # $ bitbake python -c create_manifest
  3. #
  4. # Our goal is to keep python-core as small as possible and add other python
  5. # packages only when the user needs them, hence why we split upstream python
  6. # into several packages.
  7. #
  8. # In a very simplistic way what this does is:
  9. # Launch python and see specifically what is required for it to run at a minimum
  10. #
  11. # Go through the python-manifest file and launch a separate task for every single
  12. # one of the files on each package, this task will check what was required for that
  13. # specific module to run, these modules will be called dependencies.
  14. # The output of such task will be a list of the modules or dependencies that were
  15. # found for that file.
  16. #
  17. # Such output will be parsed by this script, we will look for each dependency on the
  18. # manifest and if we find that another package already includes it, then we will add
  19. # that package as an RDEPENDS to the package we are currently checking; in case we dont
  20. # find the current dependency on any other package we will add it to the current package
  21. # as part of FILES.
  22. #
  23. #
  24. # This way we will create a new manifest from the data structure that was built during
  25. # this process, on this new manifest each package will contain specifically only
  26. # what it needs to run.
  27. #
  28. # There are some caveats which we try to deal with, such as repeated files on different
  29. # packages, packages that include folders, wildcards, and special packages.
  30. # Its also important to note that this method only works for python files, and shared
  31. # libraries. Static libraries, header files and binaries need to be dealt with manually.
  32. #
  33. # This script differs from its python2 version mostly on how shared libraries are handled
  34. # The manifest file for python3 has an extra field which contains the cached files for
  35. # each package.
  36. # Tha method to handle cached files does not work when a module includes a folder which
  37. # itself contains the pycache folder, gladly this is almost never the case.
  38. #
  39. # Author: Alejandro Enedino Hernandez Samaniego <alejandro at enedino dot org>
  40. import sys
  41. import subprocess
  42. import json
  43. import os
  44. import collections
  45. if '-d' in sys.argv:
  46. debugFlag = '-d'
  47. else:
  48. debugFlag = ''
  49. # Get python version from ${PYTHON_MAJMIN}
  50. pyversion = str(sys.argv[1])
  51. # Hack to get native python search path (for folders), not fond of it but it works for now
  52. pivot = 'recipe-sysroot-native'
  53. for p in sys.path:
  54. if pivot in p:
  55. nativelibfolder = p[:p.find(pivot)+len(pivot)]
  56. # Empty dict to hold the whole manifest
  57. new_manifest = collections.OrderedDict()
  58. # Check for repeated files, folders and wildcards
  59. allfiles = []
  60. repeated = []
  61. wildcards = []
  62. hasfolders = []
  63. allfolders = []
  64. def isFolder(value):
  65. value = value.replace('${PYTHON_MAJMIN}',pyversion)
  66. if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
  67. return True
  68. else:
  69. return False
  70. def isCached(item):
  71. if '__pycache__' in item:
  72. return True
  73. else:
  74. return False
  75. def prepend_comments(comments, json_manifest):
  76. with open(json_manifest, 'r+') as manifest:
  77. json_contents = manifest.read()
  78. manifest.seek(0, 0)
  79. manifest.write(comments + json_contents)
  80. def print_indent(msg, offset):
  81. for l in msg.splitlines():
  82. msg = ' ' * offset + l
  83. print(msg)
  84. # Read existing JSON manifest
  85. with open('python3-manifest.json') as manifest:
  86. # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker
  87. manifest_str = manifest.read()
  88. json_start = manifest_str.find('# EOC') + 6 # EOC + \n
  89. manifest.seek(0)
  90. comments = manifest.read(json_start)
  91. manifest_str = manifest.read()
  92. old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict)
  93. #
  94. # First pass to get core-package functionality, because we base everything on the fact that core is actually working
  95. # Not exactly the same so it should not be a function
  96. #
  97. print_indent('Getting dependencies for package: core', 0)
  98. # This special call gets the core dependencies and
  99. # appends to the old manifest so it doesnt hurt what it
  100. # currently holds.
  101. # This way when other packages check for dependencies
  102. # on the new core package, they will still find them
  103. # even when checking the old_manifest
  104. output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package', '%s' % debugFlag]).decode('utf8')
  105. for coredep in output.split():
  106. coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}')
  107. if isCached(coredep):
  108. if coredep not in old_manifest['core']['cached']:
  109. old_manifest['core']['cached'].append(coredep)
  110. else:
  111. if coredep not in old_manifest['core']['files']:
  112. old_manifest['core']['files'].append(coredep)
  113. # The second step is to loop through the existing files contained in the core package
  114. # according to the old manifest, identify if they are modules, or some other type
  115. # of file that we cant import (directories, binaries, configs) in which case we
  116. # can only assume they were added correctly (manually) so we ignore those and
  117. # pass them to the manifest directly.
  118. for filedep in old_manifest['core']['files']:
  119. if isFolder(filedep):
  120. if isCached(filedep):
  121. if filedep not in old_manifest['core']['cached']:
  122. old_manifest['core']['cached'].append(filedep)
  123. else:
  124. if filedep not in old_manifest['core']['files']:
  125. old_manifest['core']['files'].append(filedep)
  126. continue
  127. if '${bindir}' in filedep:
  128. if filedep not in old_manifest['core']['files']:
  129. old_manifest['core']['files'].append(filedep)
  130. continue
  131. if filedep == '':
  132. continue
  133. if '${includedir}' in filedep:
  134. if filedep not in old_manifest['core']['files']:
  135. old_manifest['core']['files'].append(filedep)
  136. continue
  137. # Get actual module name , shouldnt be affected by libdir/bindir, etc.
  138. pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0]
  139. # We now know that were dealing with a python module, so we can import it
  140. # and check what its dependencies are.
  141. # We launch a separate task for each module for deterministic behavior.
  142. # Each module will only import what is necessary for it to work in specific.
  143. # The output of each task will contain each module's dependencies
  144. print_indent('Getting dependencies for module: %s' % pymodule, 2)
  145. output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8')
  146. print_indent('The following dependencies were found for module %s:\n' % pymodule, 4)
  147. print_indent(output, 6)
  148. for pymodule_dep in output.split():
  149. pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
  150. if isCached(pymodule_dep):
  151. if pymodule_dep not in old_manifest['core']['cached']:
  152. old_manifest['core']['cached'].append(pymodule_dep)
  153. else:
  154. if pymodule_dep not in old_manifest['core']['files']:
  155. old_manifest['core']['files'].append(pymodule_dep)
  156. # At this point we are done with the core package.
  157. # The old_manifest dictionary is updated only for the core package because
  158. # all others will use this a base.
  159. print('\n\nChecking for directories...\n')
  160. # To improve the script speed, we check which packages contain directories
  161. # since we will be looping through (only) those later.
  162. for pypkg in old_manifest:
  163. for filedep in old_manifest[pypkg]['files']:
  164. if isFolder(filedep):
  165. print_indent('%s is a directory' % filedep, 2)
  166. if pypkg not in hasfolders:
  167. hasfolders.append(pypkg)
  168. if filedep not in allfolders:
  169. allfolders.append(filedep)
  170. # This is the main loop that will handle each package.
  171. # It works in a similar fashion than the step before, but
  172. # we will now be updating a new dictionary that will eventually
  173. # become the new manifest.
  174. #
  175. # The following loops though all packages in the manifest,
  176. # through all files on each of them, and checks whether or not
  177. # they are modules and can be imported.
  178. # If they can be imported, then it checks for dependencies for
  179. # each of them by launching a separate task.
  180. # The output of that task is then parsed and the manifest is updated
  181. # accordingly, wether it should add the module on FILES for the current package
  182. # or if that module already belongs to another package then the current one
  183. # will RDEPEND on it
  184. for pypkg in old_manifest:
  185. # Use an empty dict as data structure to hold data for each package and fill it up
  186. new_manifest[pypkg] = collections.OrderedDict()
  187. new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary']
  188. new_manifest[pypkg]['rdepends'] = []
  189. new_manifest[pypkg]['files'] = []
  190. new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached']
  191. # All packages should depend on core
  192. if pypkg != 'core':
  193. new_manifest[pypkg]['rdepends'].append('core')
  194. new_manifest[pypkg]['cached'] = []
  195. print('\n')
  196. print('--------------------------')
  197. print('Handling package %s' % pypkg)
  198. print('--------------------------')
  199. # Handle special cases, we assume that when they were manually added
  200. # to the manifest we knew what we were doing.
  201. special_packages = ['misc', 'modules', 'dev', 'tests']
  202. if pypkg in special_packages or 'staticdev' in pypkg:
  203. print_indent('Passing %s package directly' % pypkg, 2)
  204. new_manifest[pypkg] = old_manifest[pypkg]
  205. continue
  206. for filedep in old_manifest[pypkg]['files']:
  207. # We already handled core on the first pass, we can ignore it now
  208. if pypkg == 'core':
  209. if filedep not in new_manifest[pypkg]['files']:
  210. new_manifest[pypkg]['files'].append(filedep)
  211. continue
  212. # Handle/ignore what we cant import
  213. if isFolder(filedep):
  214. new_manifest[pypkg]['files'].append(filedep)
  215. # Asyncio (and others) are both the package and the folder name, we should not skip those...
  216. path,mod = os.path.split(filedep)
  217. if mod != pypkg:
  218. continue
  219. if '${bindir}' in filedep:
  220. if filedep not in new_manifest[pypkg]['files']:
  221. new_manifest[pypkg]['files'].append(filedep)
  222. continue
  223. if filedep == '':
  224. continue
  225. if '${includedir}' in filedep:
  226. if filedep not in new_manifest[pypkg]['files']:
  227. new_manifest[pypkg]['files'].append(filedep)
  228. continue
  229. # Get actual module name , shouldnt be affected by libdir/bindir, etc.
  230. # We need to check if the imported module comes from another (e.g. sqlite3.dump)
  231. path, pymodule = os.path.split(filedep)
  232. path = os.path.basename(path)
  233. pymodule = os.path.splitext(os.path.basename(pymodule))[0]
  234. # If this condition is met, it means we need to import it from another module
  235. # or its the folder itself (e.g. unittest)
  236. if path == pypkg:
  237. if pymodule:
  238. pymodule = path + '.' + pymodule
  239. else:
  240. pymodule = path
  241. # We now know that were dealing with a python module, so we can import it
  242. # and check what its dependencies are.
  243. # We launch a separate task for each module for deterministic behavior.
  244. # Each module will only import what is necessary for it to work in specific.
  245. # The output of each task will contain each module's dependencies
  246. print_indent('\nGetting dependencies for module: %s' % pymodule, 2)
  247. output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8')
  248. print_indent('The following dependencies were found for module %s:\n' % pymodule, 4)
  249. print_indent(output, 6)
  250. reportFILES = []
  251. reportRDEPS = []
  252. for pymodule_dep in output.split():
  253. # Warning: This first part is ugly
  254. # One of the dependencies that was found, could be inside of one of the folders included by another package
  255. # We need to check if this happens so we can add the package containing the folder as an rdependency
  256. # e.g. Folder encodings contained in codecs
  257. # This would be solved if no packages included any folders
  258. # This can be done in two ways:
  259. # 1 - We assume that if we take out the filename from the path we would get
  260. # the folder string, then we would check if folder string is in the list of folders
  261. # This would not work if a package contains a folder which contains another folder
  262. # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
  263. # folder_string would not match any value contained in the list of folders
  264. #
  265. # 2 - We do it the other way around, checking if the folder is contained in the path
  266. # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
  267. # is folder_string inside path/folder1/folder2/filename?,
  268. # Yes, it works, but we waste a couple of milliseconds.
  269. pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
  270. inFolders = False
  271. for folder in allfolders:
  272. # The module could have a directory named after it, e.g. xml, if we take out the filename from the path
  273. # we'll end up with ${libdir}, and we want ${libdir}/xml
  274. if isFolder(pymodule_dep):
  275. check_path = pymodule_dep
  276. else:
  277. check_path = os.path.dirname(pymodule_dep)
  278. if folder in check_path :
  279. inFolders = True # Did we find a folder?
  280. folderFound = False # Second flag to break inner for
  281. # Loop only through packages which contain folders
  282. for pypkg_with_folder in hasfolders:
  283. if (folderFound == False):
  284. # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder))
  285. for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']:
  286. if folder_dep == folder:
  287. print ('%s directory found in %s' % (folder, pypkg_with_folder))
  288. folderFound = True
  289. if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg:
  290. new_manifest[pypkg]['rdepends'].append(pypkg_with_folder)
  291. else:
  292. break
  293. # A folder was found so we're done with this item, we can go on
  294. if inFolders:
  295. continue
  296. # No directories beyond this point
  297. # We might already have this module on the dictionary since it could depend on a (previously checked) module
  298. if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']:
  299. # Handle core as a special package, we already did it so we pass it to NEW data structure directly
  300. if pypkg == 'core':
  301. print('Adding %s to %s FILES' % (pymodule_dep, pypkg))
  302. if pymodule_dep.endswith('*'):
  303. wildcards.append(pymodule_dep)
  304. if isCached(pymodule_dep):
  305. new_manifest[pypkg]['cached'].append(pymodule_dep)
  306. else:
  307. new_manifest[pypkg]['files'].append(pymodule_dep)
  308. # Check for repeated files
  309. if pymodule_dep not in allfiles:
  310. allfiles.append(pymodule_dep)
  311. else:
  312. if pymodule_dep not in repeated:
  313. repeated.append(pymodule_dep)
  314. else:
  315. # Last step: Figure out if we this belongs to FILES or RDEPENDS
  316. # We check if this module is already contained on another package, so we add that one
  317. # as an RDEPENDS, or if its not, it means it should be contained on the current
  318. # package, and we should add it to FILES
  319. for possible_rdep in old_manifest:
  320. # Debug
  321. # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep)
  322. if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']:
  323. # Since were nesting, we need to check its not the same pypkg
  324. if(possible_rdep != pypkg):
  325. if possible_rdep not in new_manifest[pypkg]['rdepends']:
  326. # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs
  327. reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep))
  328. new_manifest[pypkg]['rdepends'].append(possible_rdep)
  329. break
  330. else:
  331. # Since this module wasnt found on another package, it is not an RDEP,
  332. # so we add it to FILES for this package.
  333. # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
  334. if os.path.basename(pymodule_dep) != pypkg:
  335. reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg)))
  336. if isCached(pymodule_dep):
  337. new_manifest[pypkg]['cached'].append(pymodule_dep)
  338. else:
  339. new_manifest[pypkg]['files'].append(pymodule_dep)
  340. if pymodule_dep.endswith('*'):
  341. wildcards.append(pymodule_dep)
  342. if pymodule_dep not in allfiles:
  343. allfiles.append(pymodule_dep)
  344. else:
  345. if pymodule_dep not in repeated:
  346. repeated.append(pymodule_dep)
  347. print('\n')
  348. print('#################################')
  349. print('Summary for module %s' % pymodule)
  350. print('FILES found for module %s:' % pymodule)
  351. print(''.join(reportFILES))
  352. print('RDEPENDS found for module %s:' % pymodule)
  353. print(''.join(reportRDEPS))
  354. print('#################################')
  355. print('The following FILES contain wildcards, please check if they are necessary')
  356. print(wildcards)
  357. print('The following FILES contain folders, please check if they are necessary')
  358. print(hasfolders)
  359. # Sort it just so it looks nicer
  360. for pypkg in new_manifest:
  361. new_manifest[pypkg]['files'].sort()
  362. new_manifest[pypkg]['cached'].sort()
  363. new_manifest[pypkg]['rdepends'].sort()
  364. # Create the manifest from the data structure that was built
  365. with open('python3-manifest.json.new','w') as outfile:
  366. json.dump(new_manifest,outfile, indent=4)
  367. outfile.write('\n')
  368. prepend_comments(comments,'python3-manifest.json.new')
  369. if (repeated):
  370. error_msg = '\n\nERROR:\n'
  371. error_msg += 'The following files were found in more than one package),\n'
  372. error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n'
  373. error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n'
  374. error_msg += '\n'.join(repeated)
  375. error_msg += '\n'
  376. sys.exit(error_msg)