rt_bmark.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # SPDX-License-Identifier: BSD-3-Clause
  4. # Copyright (C) 2015 Enea Software AB
  5. # Author: Thomas Lundström <thomas.lundstrom@enea.com>
  6. # The script measures interrupt latency together with different types of
  7. # system load. This is done using the programs cyclictest and stress.
  8. #
  9. # The main output is:
  10. #
  11. # Best case (min) latency
  12. # This has very limited value, but is presented since it can be done
  13. # easily
  14. #
  15. # Average latency
  16. # This value is of interrest for throughput oriented systems. Limited
  17. # value for a real-time system. Also presented because it is easy to do.
  18. #
  19. # Worst case (max) latency
  20. # This is the interesting number for a real-time system. The number
  21. # presented is (of cource) the largest number observed. The challenge
  22. # is to know how the observed worst case relates to the actual worst case.
  23. #
  24. # To get an indication of the confidence, the following method is used:
  25. # 1) Instead of one long run, the measurement is made as a set of shorter
  26. # runs. The number of runs must be a power of 2 for reasons that will
  27. # shorlty be obvious
  28. #
  29. # 2) First, a list of the max values are created.
  30. #
  31. # 3) The smallest value in that list is recorded.
  32. #
  33. # 4) Then a new list is create by taking the max value of each pair of
  34. # values in the original list. In this list the smallest value is
  35. # recorded.
  36. #
  37. # 5) Step 3 is repeated until there is only one value in the list. See
  38. # example below:
  39. #
  40. # Samples:
  41. # | 44 | | | | |
  42. # | 77 | 77 | | | |
  43. # | 118 | | | | |
  44. # | 119 | 119 | 119 | | |
  45. # | 138 | | | | |
  46. # | 57 | 138 | | | |
  47. # | 175 | | | | |
  48. # | 130 | 175 | 175 | 175 | |
  49. # | 54 | | | | |
  50. # | 150 | 150 | | | |
  51. # | 47 | | | | |
  52. # | 59 | 59 | 150 | | |
  53. # | 199 | | | | |
  54. # | 115 | 199 | | | |
  55. # | 177 | | | | |
  56. # | 129 | 177 | 199 | 199 | 199 |
  57. #
  58. # Smallest value:
  59. # | 44 | 59 | 119 | 175 | 199 |
  60. #
  61. # 6) The generated list of smallest values is analyzed. In this case, it
  62. # can be observed that the values are increasing significantly through
  63. # the entire list, which leads to the conclusion that the number of
  64. # samples is too small.
  65. # If instead the list had been (167, 191, 196, 199, 199), there had
  66. # been a very small, or no, increase at the end of the list. We might
  67. # then suspect that the number of samples is probably large enough.
  68. # There is however no guarantee for that.
  69. #
  70. # Steps 1-2 are done in run_cyclictest_suite
  71. # Steps 3-5 are done in gen_minmax_list.
  72. # Step 6 needs to be done manually since there is (yet) no well defined
  73. # FAIL criterion and a theoretically solid PASS criterion may never be
  74. # available.
  75. import multiprocessing
  76. import os
  77. import re
  78. import signal
  79. import subprocess
  80. import time
  81. import traceback
  82. # See comment on the function set_hung_tmo
  83. has_hung_task_detection = True
  84. #-------------------------------------------------------------------------------
  85. class TestFail(Exception):
  86. def __init__(self, msg):
  87. self.msg = msg
  88. def __str__(self):
  89. return "Test failure: (" + self.msg + ")"
  90. #-------------------------------------------------------------------------------
  91. def tc_name(sub_name):
  92. return "rt_bmark.intlat." + sub_name
  93. #-------------------------------------------------------------------------------
  94. # log() does the same job as print except that a '#' is added at the beginning
  95. # of each line. This causes TEFEL to ignore it
  96. def log(*msg):
  97. tmp = "".join(map(str, msg)) # 'map(str, ...' allows numbers
  98. for line in tmp.splitlines():
  99. print("#", line)
  100. #-------------------------------------------------------------------------------
  101. # Like log(), but with a timestamp added
  102. def log_ts(*msg):
  103. ts = time.localtime()
  104. stamp = "%2d:%02d:%02d: " % (ts.tm_hour, ts.tm_min, ts.tm_sec)
  105. log(stamp, *msg)
  106. #-------------------------------------------------------------------------------
  107. def log_test_header(seq_no, nr_of_tests, name):
  108. log("=" * 78)
  109. log()
  110. log(" Test case (%d/%d): %s" % (seq_no, nr_of_tests, tc_name(name)))
  111. log()
  112. log("." * 78)
  113. log()
  114. #-------------------------------------------------------------------------------
  115. def start_stress(*args):
  116. stress_cmd = [ "stress-ng" ]
  117. added_stress_types = []
  118. req_stress_types = set(args)
  119. cpu_cnt = str(multiprocessing.cpu_count())
  120. # The function cond_add_stress appends the options to the stress
  121. # command if the stress type is in the set of requested stress types
  122. def cond_add_stress(stress_type, options):
  123. if stress_type in req_stress_types:
  124. req_stress_types.remove(stress_type)
  125. added_stress_types.append(stress_type)
  126. stress_cmd.extend(options)
  127. #----------
  128. cond_add_stress("io", ["-i", cpu_cnt])
  129. cond_add_stress("cpu", ["-c", cpu_cnt])
  130. cond_add_stress("hdd", ["-d", cpu_cnt, "--hdd-bytes", "20M"])
  131. cond_add_stress("vm", ["-m", cpu_cnt, "--vm-bytes", "10M"])
  132. unknown = ", ".join(req_stress_types)
  133. if unknown != "":
  134. raise TestFail("Unknown stress type(s): %s" % unknown)
  135. if not added_stress_types:
  136. log("No stress requested")
  137. return None
  138. added = "+".join(added_stress_types)
  139. stress_cmd_str = " ".join(stress_cmd)
  140. log("Starting stress(", added, ")")
  141. log(" Command: '", stress_cmd_str, "'")
  142. log()
  143. # start_new_session causes stress to be executed in a separate
  144. # session, => it gets a new process group (incl. children). It
  145. # can then be terminated using os.killpg in end_stress without
  146. # terminating this script.
  147. p = subprocess.Popen(stress_cmd, start_new_session=True)
  148. return p
  149. #-------------------------------------------------------------------------------
  150. def end_stress(p):
  151. if p is None:
  152. # The value None indicates that no stress scenario was started
  153. return
  154. if p.poll() is not None:
  155. raise TestFail("stress prematurely terminated.")
  156. os.killpg(os.getpgid(p.pid), signal.SIGTERM)
  157. log("Terminated stress")
  158. #-------------------------------------------------------------------------------
  159. def us2hms_str(us):
  160. s = (us+500000) // 1000000 # Round microseconds to s
  161. m = s//60
  162. s -= 60*m;
  163. h = m//60
  164. m -= 60*h
  165. return "%d:%02d:%02d" % (h, m, s)
  166. #-------------------------------------------------------------------------------
  167. # Sometime the hung task supervision is triggered during execution of
  168. # cyclictest (cyclictest starves stress). To avoid that, the supervision
  169. # is temporarily disabled
  170. def set_hung_tmo(new_tmo):
  171. global has_hung_task_detection
  172. tmo_file = "/proc/sys/kernel/hung_task_timeout_secs"
  173. if not has_hung_task_detection:
  174. return
  175. if not os.access(tmo_file, os.W_OK):
  176. log("Hung task detection not supported")
  177. log(" (File ", tmo_file, " not found)")
  178. has_hung_task_detection = False
  179. return
  180. orig_tmo = int(subprocess.check_output(["cat", tmo_file]).strip())
  181. if new_tmo != orig_tmo:
  182. cmd = ( "echo " + str(new_tmo) + " > " + tmo_file )
  183. subprocess.check_output(cmd, shell=True)
  184. log("Changed timeout for detection of hung tasks: ",
  185. orig_tmo, " -> ", new_tmo)
  186. return orig_tmo
  187. #-------------------------------------------------------------------------------
  188. def gen_minmax_list(max_list):
  189. res = [min(max_list)]
  190. while True:
  191. tmp = max_list
  192. max_list = []
  193. while tmp:
  194. max_list.append(max(tmp.pop(0), tmp.pop(0)))
  195. res.append(min(max_list))
  196. if len(max_list) < 2:
  197. return res
  198. #-------------------------------------------------------------------------------
  199. # Parameters for cyclictest:
  200. #
  201. # On the -S option (from cyclictest.c):
  202. # -S implies options -a -t -n and same priority of all threads
  203. # -a: One thread per core
  204. # -n: use clock_nanosleep instead of posix interval timers
  205. # -t: (without argument) Set number of threads to the number
  206. # of cpus
  207. interval_core_0 = 100 # Timer interval on core 0 [us]
  208. interval_delta = 20 # Interval increment for each core [us]
  209. loop_count = 30000 # Number of loops (on core 0).
  210. cmd = ("cyclictest",
  211. "-S", # Standard SMP testing. See below
  212. "-p", "99", # RT priority 99
  213. "-q", # Quiet mode, i.e. print only a summary
  214. "-i", str(interval_core_0),
  215. "-d", str(interval_delta),
  216. "-l", str(loop_count)
  217. )
  218. rex = re.compile(r"C:\s*(\d+).*Min:\s*(\d+).*Avg:\s*(\d+).*Max:\s*(\d+)")
  219. def run_cyclictest_once():
  220. res = subprocess.check_output(cmd)
  221. # minlist and maxlist are lists with the extremes for each core
  222. # avg_cnt is the sum of cycles for all cores
  223. # avg_sum is the sum of (cycle count*average) for each core
  224. # Since cyclictest runs different number of cycles on
  225. # different cores, avg_sum/avg_cnt gives a more accurate
  226. # value of the overall average than just taking the average
  227. # of each core's averages
  228. minlist = []
  229. maxlist = []
  230. avg_sum = 0
  231. avg_cnt = 0
  232. for line in res.splitlines():
  233. m = rex.search(line.decode('utf-8'))
  234. if m is not None:
  235. minlist.append(int(m.group(2)))
  236. maxlist.append(int(m.group(4)))
  237. n = int(m.group(1))
  238. avg_sum += n * int(m.group(3))
  239. avg_cnt += n
  240. return min(minlist), [avg_sum, avg_cnt], max(maxlist)
  241. #-------------------------------------------------------------------------------
  242. # A precondition for the tracking of min-max values is that
  243. # the suite size os a power of 2.
  244. N = 5
  245. suite_size = 2**N
  246. est_exec_time_once = interval_core_0 * loop_count
  247. est_exec_time_suite = suite_size * est_exec_time_once
  248. def run_cyclictest_suite():
  249. log("Starting cyclictest")
  250. log(" Command : ", " ".join(cmd))
  251. log(" Number of cycles : ", loop_count*suite_size,
  252. " (", suite_size, " sets of ", loop_count, " cycles)")
  253. log(" Exec. time (est) : ", us2hms_str(est_exec_time_suite))
  254. log()
  255. orig_tmo = set_hung_tmo(0) # 0 => Disable
  256. # float('inf') emulates infinity. At least in the sense that it is
  257. # guaranteed to be larger than any actual number.
  258. ack_min = float('inf')
  259. ack_avg = [0, 0]
  260. log()
  261. log_ts("Start of execution")
  262. t = time.time()
  263. max_list = []
  264. for i in range(0, suite_size):
  265. tmp_min, tmp_avg, tmp_max = run_cyclictest_once()
  266. msg = "%2d/%2d:" % (i+1, suite_size)
  267. msg += " min: %4d" % tmp_min
  268. msg += " avg: %5.1f" % (float(tmp_avg[0])/tmp_avg[1])
  269. msg += " max: %4d" % tmp_max
  270. log_ts(msg)
  271. # Track minimum value
  272. if tmp_min < ack_min:
  273. ack_min = tmp_min
  274. # Track smallest max value
  275. max_list.append(tmp_max)
  276. ack_avg[0] += tmp_avg[0]
  277. ack_avg[1] += tmp_avg[1]
  278. t = time.time()-t
  279. log_ts("Cyclictest completed. Actual execution time:",
  280. us2hms_str(t*1000000))
  281. log()
  282. set_hung_tmo(orig_tmo)
  283. return ack_min, float(ack_avg[0])/ack_avg[1], gen_minmax_list(max_list)
  284. #-------------------------------------------------------------------------------
  285. class cyclictest_runner:
  286. def run_test(self, seq_no, nr_of_tests, name, stressparams):
  287. try:
  288. log_test_header(seq_no, nr_of_tests, name)
  289. p = start_stress(*stressparams)
  290. bm_min, bm_avg, bm_max_list = run_cyclictest_suite()
  291. end_stress(p)
  292. bm_max = bm_max_list[-1]
  293. log()
  294. log("Min: %d us" % bm_min)
  295. log("Avg: %.1f us" % bm_avg)
  296. log("Max: %d us" % bm_max)
  297. log()
  298. log("Max list: ", bm_max_list)
  299. log()
  300. log("PASS")
  301. print()
  302. print(tc_name(name), "[Min/us,Avg/us,Max/us]:",)
  303. print("%d,%.1f,%d" % (bm_min,bm_avg, bm_max))
  304. print("PASS:", tc_name(name))
  305. print()
  306. except Exception:
  307. log()
  308. log("Exception!")
  309. log()
  310. log("Traceback:", traceback.format_exc())
  311. log()
  312. log("WD: ", os.getcwd())
  313. log()
  314. log("FAIL")
  315. print()
  316. print("FAIL:", tc_name(name))
  317. print()
  318. #-------------------------------------------------------------------------------
  319. runner = cyclictest_runner()
  320. tests = (("no_stress", []),
  321. ("cpu", ["cpu"]),
  322. ("hdd", ["hdd"]),
  323. ("io", ["io"]),
  324. ("vm", ["vm"]),
  325. ("full", ["io", "cpu", "hdd", "vm"]),
  326. )
  327. nr_of_tests = len(tests)
  328. for seq_no, params in enumerate(tests, start=1):
  329. runner.run_test(seq_no, nr_of_tests, *params)