buildstats.bbclass 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. #
  2. # Copyright OpenEmbedded Contributors
  3. #
  4. BUILDSTATS_BASE = "${TMPDIR}/buildstats/"
  5. ################################################################################
  6. # Build statistics gathering.
  7. #
  8. # The CPU and Time gathering/tracking functions and bbevent inspiration
  9. # were written by Christopher Larson.
  10. #
  11. ################################################################################
  12. def get_buildprocess_cputime(pid):
  13. with open("/proc/%d/stat" % pid, "r") as f:
  14. fields = f.readline().rstrip().split()
  15. # 13: utime, 14: stime, 15: cutime, 16: cstime
  16. return sum(int(field) for field in fields[13:16])
  17. def get_process_cputime(pid):
  18. import resource
  19. with open("/proc/%d/stat" % pid, "r") as f:
  20. fields = f.readline().rstrip().split()
  21. stats = {
  22. 'utime' : fields[13],
  23. 'stime' : fields[14],
  24. 'cutime' : fields[15],
  25. 'cstime' : fields[16],
  26. }
  27. iostats = {}
  28. if os.path.isfile("/proc/%d/io" % pid):
  29. with open("/proc/%d/io" % pid, "r") as f:
  30. while True:
  31. i = f.readline().strip()
  32. if not i:
  33. break
  34. if not ":" in i:
  35. # one more extra line is appended (empty or containing "0")
  36. # most probably due to race condition in kernel while
  37. # updating IO stats
  38. break
  39. i = i.split(": ")
  40. iostats[i[0]] = i[1]
  41. resources = resource.getrusage(resource.RUSAGE_SELF)
  42. childres = resource.getrusage(resource.RUSAGE_CHILDREN)
  43. return stats, iostats, resources, childres
  44. def get_cputime():
  45. with open("/proc/stat", "r") as f:
  46. fields = f.readline().rstrip().split()[1:]
  47. return sum(int(field) for field in fields)
  48. def set_timedata(var, d, server_time):
  49. d.setVar(var, server_time)
  50. def get_timedata(var, d, end_time):
  51. oldtime = d.getVar(var, False)
  52. if oldtime is None:
  53. return
  54. return end_time - oldtime
  55. def set_buildtimedata(var, d):
  56. import time
  57. time = time.time()
  58. cputime = get_cputime()
  59. proctime = get_buildprocess_cputime(os.getpid())
  60. d.setVar(var, (time, cputime, proctime))
  61. def get_buildtimedata(var, d):
  62. import time
  63. timedata = d.getVar(var, False)
  64. if timedata is None:
  65. return
  66. oldtime, oldcpu, oldproc = timedata
  67. procdiff = get_buildprocess_cputime(os.getpid()) - oldproc
  68. cpudiff = get_cputime() - oldcpu
  69. end_time = time.time()
  70. timediff = end_time - oldtime
  71. if cpudiff > 0:
  72. cpuperc = float(procdiff) * 100 / cpudiff
  73. else:
  74. cpuperc = None
  75. return timediff, cpuperc
  76. def write_task_data(status, logfile, e, d):
  77. with open(os.path.join(logfile), "a") as f:
  78. elapsedtime = get_timedata("__timedata_task", d, e.time)
  79. if elapsedtime:
  80. f.write(d.expand("${PF}: %s\n" % e.task))
  81. f.write(d.expand("Elapsed time: %0.2f seconds\n" % elapsedtime))
  82. cpu, iostats, resources, childres = get_process_cputime(os.getpid())
  83. if cpu:
  84. f.write("utime: %s\n" % cpu['utime'])
  85. f.write("stime: %s\n" % cpu['stime'])
  86. f.write("cutime: %s\n" % cpu['cutime'])
  87. f.write("cstime: %s\n" % cpu['cstime'])
  88. for i in iostats:
  89. f.write("IO %s: %s\n" % (i, iostats[i]))
  90. rusages = ["ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_inblock", "ru_oublock", "ru_nvcsw", "ru_nivcsw"]
  91. for i in rusages:
  92. f.write("rusage %s: %s\n" % (i, getattr(resources, i)))
  93. for i in rusages:
  94. f.write("Child rusage %s: %s\n" % (i, getattr(childres, i)))
  95. if status == "passed":
  96. f.write("Status: PASSED \n")
  97. else:
  98. f.write("Status: FAILED \n")
  99. f.write("Ended: %0.2f \n" % e.time)
  100. def write_host_data(logfile, e, d, type):
  101. import subprocess, os, datetime
  102. # minimum time allowed for each command to run, in seconds
  103. time_threshold = 0.5
  104. limit = 10
  105. # the total number of commands
  106. num_cmds = 0
  107. msg = ""
  108. if type == "interval":
  109. # interval at which data will be logged
  110. interval = d.getVar("BB_HEARTBEAT_EVENT", False)
  111. if interval is None:
  112. bb.warn("buildstats: Collecting host data at intervals failed. Set BB_HEARTBEAT_EVENT=\"<interval>\" in conf/local.conf for the interval at which host data will be logged.")
  113. d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0")
  114. return
  115. interval = int(interval)
  116. cmds = d.getVar('BB_LOG_HOST_STAT_CMDS_INTERVAL')
  117. msg = "Host Stats: Collecting data at %d second intervals.\n" % interval
  118. if cmds is None:
  119. d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0")
  120. bb.warn("buildstats: Collecting host data at intervals failed. Set BB_LOG_HOST_STAT_CMDS_INTERVAL=\"command1 ; command2 ; ... \" in conf/local.conf.")
  121. return
  122. if type == "failure":
  123. cmds = d.getVar('BB_LOG_HOST_STAT_CMDS_FAILURE')
  124. msg = "Host Stats: Collecting data on failure.\n"
  125. msg += "Failed at task: " + e.task + "\n"
  126. if cmds is None:
  127. d.setVar("BB_LOG_HOST_STAT_ON_FAILURE", "0")
  128. bb.warn("buildstats: Collecting host data on failure failed. Set BB_LOG_HOST_STAT_CMDS_FAILURE=\"command1 ; command2 ; ... \" in conf/local.conf.")
  129. return
  130. c_san = []
  131. for cmd in cmds.split(";"):
  132. if len(cmd) == 0:
  133. continue
  134. num_cmds += 1
  135. c_san.append(cmd)
  136. if num_cmds == 0:
  137. if type == "interval":
  138. d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0")
  139. if type == "failure":
  140. d.setVar("BB_LOG_HOST_STAT_ON_FAILURE", "0")
  141. return
  142. # return if the interval is not enough to run all commands within the specified BB_HEARTBEAT_EVENT interval
  143. if type == "interval":
  144. limit = interval / num_cmds
  145. if limit <= time_threshold:
  146. d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0")
  147. bb.warn("buildstats: Collecting host data failed. BB_HEARTBEAT_EVENT interval not enough to run the specified commands. Increase value of BB_HEARTBEAT_EVENT in conf/local.conf.")
  148. return
  149. # set the environment variables
  150. path = d.getVar("PATH")
  151. opath = d.getVar("BB_ORIGENV", False).getVar("PATH")
  152. ospath = os.environ['PATH']
  153. os.environ['PATH'] = path + ":" + opath + ":" + ospath
  154. with open(logfile, "a") as f:
  155. f.write("Event Time: %f\nDate: %s\n" % (e.time, datetime.datetime.now()))
  156. f.write("%s" % msg)
  157. for c in c_san:
  158. try:
  159. output = subprocess.check_output(c.split(), stderr=subprocess.STDOUT, timeout=limit).decode('utf-8')
  160. except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError) as err:
  161. output = "Error running command: %s\n%s\n" % (c, err)
  162. f.write("%s\n%s\n" % (c, output))
  163. # reset the environment
  164. os.environ['PATH'] = ospath
  165. python run_buildstats () {
  166. import bb.build
  167. import bb.event
  168. import time, subprocess, platform
  169. bn = d.getVar('BUILDNAME')
  170. ########################################################################
  171. # bitbake fires HeartbeatEvent even before a build has been
  172. # triggered, causing BUILDNAME to be None
  173. ########################################################################
  174. if bn is not None:
  175. bsdir = os.path.join(d.getVar('BUILDSTATS_BASE'), bn)
  176. taskdir = os.path.join(bsdir, d.getVar('PF'))
  177. if isinstance(e, bb.event.HeartbeatEvent) and bb.utils.to_boolean(d.getVar("BB_LOG_HOST_STAT_ON_INTERVAL")):
  178. bb.utils.mkdirhier(bsdir)
  179. write_host_data(os.path.join(bsdir, "host_stats_interval"), e, d, "interval")
  180. if isinstance(e, bb.event.BuildStarted):
  181. ########################################################################
  182. # If the kernel was not configured to provide I/O statistics, issue
  183. # a one time warning.
  184. ########################################################################
  185. if not os.path.isfile("/proc/%d/io" % os.getpid()):
  186. bb.warn("The Linux kernel on your build host was not configured to provide process I/O statistics. (CONFIG_TASK_IO_ACCOUNTING is not set)")
  187. ########################################################################
  188. # at first pass make the buildstats hierarchy and then
  189. # set the buildname
  190. ########################################################################
  191. bb.utils.mkdirhier(bsdir)
  192. set_buildtimedata("__timedata_build", d)
  193. build_time = os.path.join(bsdir, "build_stats")
  194. # write start of build into build_time
  195. with open(build_time, "a") as f:
  196. host_info = platform.uname()
  197. f.write("Host Info: ")
  198. for x in host_info:
  199. if x:
  200. f.write(x + " ")
  201. f.write("\n")
  202. f.write("Build Started: %0.2f \n" % d.getVar('__timedata_build', False)[0])
  203. elif isinstance(e, bb.event.BuildCompleted):
  204. build_time = os.path.join(bsdir, "build_stats")
  205. with open(build_time, "a") as f:
  206. ########################################################################
  207. # Write build statistics for the build
  208. ########################################################################
  209. timedata = get_buildtimedata("__timedata_build", d)
  210. if timedata:
  211. time, cpu = timedata
  212. # write end of build and cpu used into build_time
  213. f.write("Elapsed time: %0.2f seconds \n" % (time))
  214. if cpu:
  215. f.write("CPU usage: %0.1f%% \n" % cpu)
  216. if isinstance(e, bb.build.TaskStarted):
  217. set_timedata("__timedata_task", d, e.time)
  218. bb.utils.mkdirhier(taskdir)
  219. # write into the task event file the name and start time
  220. with open(os.path.join(taskdir, e.task), "a") as f:
  221. f.write("Event: %s \n" % bb.event.getName(e))
  222. f.write("Started: %0.2f \n" % e.time)
  223. elif isinstance(e, bb.build.TaskSucceeded):
  224. write_task_data("passed", os.path.join(taskdir, e.task), e, d)
  225. if e.task == "do_rootfs":
  226. bs = os.path.join(bsdir, "build_stats")
  227. with open(bs, "a") as f:
  228. rootfs = d.getVar('IMAGE_ROOTFS')
  229. if os.path.isdir(rootfs):
  230. try:
  231. rootfs_size = subprocess.check_output(["du", "-sh", rootfs],
  232. stderr=subprocess.STDOUT).decode('utf-8')
  233. f.write("Uncompressed Rootfs size: %s" % rootfs_size)
  234. except subprocess.CalledProcessError as err:
  235. bb.warn("Failed to get rootfs size: %s" % err.output.decode('utf-8'))
  236. elif isinstance(e, bb.build.TaskFailed):
  237. # Can have a failure before TaskStarted so need to mkdir here too
  238. bb.utils.mkdirhier(taskdir)
  239. write_task_data("failed", os.path.join(taskdir, e.task), e, d)
  240. ########################################################################
  241. # Lets make things easier and tell people where the build failed in
  242. # build_status. We do this here because BuildCompleted triggers no
  243. # matter what the status of the build actually is
  244. ########################################################################
  245. build_status = os.path.join(bsdir, "build_stats")
  246. with open(build_status, "a") as f:
  247. f.write(d.expand("Failed at: ${PF} at task: %s \n" % e.task))
  248. if bb.utils.to_boolean(d.getVar("BB_LOG_HOST_STAT_ON_FAILURE")):
  249. write_host_data(os.path.join(bsdir, "host_stats_%s_failure" % e.task), e, d, "failure")
  250. }
  251. addhandler run_buildstats
  252. run_buildstats[eventmask] = "bb.event.BuildStarted bb.event.BuildCompleted bb.event.HeartbeatEvent bb.build.TaskStarted bb.build.TaskSucceeded bb.build.TaskFailed"
  253. python runqueue_stats () {
  254. import buildstats
  255. from bb import event, runqueue
  256. # We should not record any samples before the first task has started,
  257. # because that's the first activity shown in the process chart.
  258. # Besides, at that point we are sure that the build variables
  259. # are available that we need to find the output directory.
  260. # The persistent SystemStats is stored in the datastore and
  261. # closed when the build is done.
  262. system_stats = d.getVar('_buildstats_system_stats', False)
  263. if not system_stats and isinstance(e, (bb.runqueue.sceneQueueTaskStarted, bb.runqueue.runQueueTaskStarted)):
  264. system_stats = buildstats.SystemStats(d)
  265. d.setVar('_buildstats_system_stats', system_stats)
  266. if system_stats:
  267. # Ensure that we sample at important events.
  268. done = isinstance(e, bb.event.BuildCompleted)
  269. if system_stats.sample(e, force=done):
  270. d.setVar('_buildstats_system_stats', system_stats)
  271. if done:
  272. system_stats.close()
  273. d.delVar('_buildstats_system_stats')
  274. }
  275. addhandler runqueue_stats
  276. runqueue_stats[eventmask] = "bb.runqueue.sceneQueueTaskStarted bb.runqueue.runQueueTaskStarted bb.event.HeartbeatEvent bb.event.BuildCompleted bb.event.MonitorDiskEvent"