profile-manual-usage.rst 128 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624
  1. .. SPDX-License-Identifier: CC-BY-SA-2.0-UK
  2. .. highlight:: shell
  3. ***************************************************************
  4. Basic Usage (with examples) for each of the Yocto Tracing Tools
  5. ***************************************************************
  6. |
  7. This chapter presents basic usage examples for each of the tracing
  8. tools.
  9. .. _profile-manual-perf:
  10. perf
  11. ====
  12. The 'perf' tool is the profiling and tracing tool that comes bundled
  13. with the Linux kernel.
  14. Don't let the fact that it's part of the kernel fool you into thinking
  15. that it's only for tracing and profiling the kernel - you can indeed use
  16. it to trace and profile just the kernel, but you can also use it to
  17. profile specific applications separately (with or without kernel
  18. context), and you can also use it to trace and profile the kernel and
  19. all applications on the system simultaneously to gain a system-wide view
  20. of what's going on.
  21. In many ways, perf aims to be a superset of all the tracing and
  22. profiling tools available in Linux today, including all the other tools
  23. covered in this HOWTO. The past couple of years have seen perf subsume a
  24. lot of the functionality of those other tools and, at the same time,
  25. those other tools have removed large portions of their previous
  26. functionality and replaced it with calls to the equivalent functionality
  27. now implemented by the perf subsystem. Extrapolation suggests that at
  28. some point those other tools will simply become completely redundant and
  29. go away; until then, we'll cover those other tools in these pages and in
  30. many cases show how the same things can be accomplished in perf and the
  31. other tools when it seems useful to do so.
  32. The coverage below details some of the most common ways you'll likely
  33. want to apply the tool; full documentation can be found either within
  34. the tool itself or in the man pages at
  35. `perf(1) <http://linux.die.net/man/1/perf>`__.
  36. .. _perf-setup:
  37. Perf Setup
  38. ----------
  39. For this section, we'll assume you've already performed the basic setup
  40. outlined in the ":ref:`profile-manual/profile-manual-intro:General Setup`" section.
  41. In particular, you'll get the most mileage out of perf if you profile an
  42. image built with the following in your ``local.conf`` file: ::
  43. INHIBIT_PACKAGE_STRIP = "1"
  44. perf runs on the target system for the most part. You can archive
  45. profile data and copy it to the host for analysis, but for the rest of
  46. this document we assume you've ssh'ed to the host and will be running
  47. the perf commands on the target.
  48. .. _perf-basic-usage:
  49. Basic Perf Usage
  50. ----------------
  51. The perf tool is pretty much self-documenting. To remind yourself of the
  52. available commands, simply type 'perf', which will show you basic usage
  53. along with the available perf subcommands: ::
  54. root@crownbay:~# perf
  55. usage: perf [--version] [--help] COMMAND [ARGS]
  56. The most commonly used perf commands are:
  57. annotate Read perf.data (created by perf record) and display annotated code
  58. archive Create archive with object files with build-ids found in perf.data file
  59. bench General framework for benchmark suites
  60. buildid-cache Manage build-id cache.
  61. buildid-list List the buildids in a perf.data file
  62. diff Read two perf.data files and display the differential profile
  63. evlist List the event names in a perf.data file
  64. inject Filter to augment the events stream with additional information
  65. kmem Tool to trace/measure kernel memory(slab) properties
  66. kvm Tool to trace/measure kvm guest os
  67. list List all symbolic event types
  68. lock Analyze lock events
  69. probe Define new dynamic tracepoints
  70. record Run a command and record its profile into perf.data
  71. report Read perf.data (created by perf record) and display the profile
  72. sched Tool to trace/measure scheduler properties (latencies)
  73. script Read perf.data (created by perf record) and display trace output
  74. stat Run a command and gather performance counter statistics
  75. test Runs sanity tests.
  76. timechart Tool to visualize total system behavior during a workload
  77. top System profiling tool.
  78. See 'perf help COMMAND' for more information on a specific command.
  79. Using perf to do Basic Profiling
  80. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  81. As a simple test case, we'll profile the 'wget' of a fairly large file,
  82. which is a minimally interesting case because it has both file and
  83. network I/O aspects, and at least in the case of standard Yocto images,
  84. it's implemented as part of busybox, so the methods we use to analyze it
  85. can be used in a very similar way to the whole host of supported busybox
  86. applets in Yocto. ::
  87. root@crownbay:~# rm linux-2.6.19.2.tar.bz2; \
  88. wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  89. The quickest and easiest way to get some basic overall data about what's
  90. going on for a particular workload is to profile it using 'perf stat'.
  91. 'perf stat' basically profiles using a few default counters and displays
  92. the summed counts at the end of the run: ::
  93. root@crownbay:~# perf stat wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  94. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  95. linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA
  96. Performance counter stats for 'wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2':
  97. 4597.223902 task-clock # 0.077 CPUs utilized
  98. 23568 context-switches # 0.005 M/sec
  99. 68 CPU-migrations # 0.015 K/sec
  100. 241 page-faults # 0.052 K/sec
  101. 3045817293 cycles # 0.663 GHz
  102. <not supported> stalled-cycles-frontend
  103. <not supported> stalled-cycles-backend
  104. 858909167 instructions # 0.28 insns per cycle
  105. 165441165 branches # 35.987 M/sec
  106. 19550329 branch-misses # 11.82% of all branches
  107. 59.836627620 seconds time elapsed
  108. Many times such a simple-minded test doesn't yield much of
  109. interest, but sometimes it does (see Real-world Yocto bug (slow
  110. loop-mounted write speed)).
  111. Also, note that 'perf stat' isn't restricted to a fixed set of counters
  112. - basically any event listed in the output of 'perf list' can be tallied
  113. by 'perf stat'. For example, suppose we wanted to see a summary of all
  114. the events related to kernel memory allocation/freeing along with cache
  115. hits and misses: ::
  116. root@crownbay:~# perf stat -e kmem:* -e cache-references -e cache-misses wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  117. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  118. linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA
  119. Performance counter stats for 'wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2':
  120. 5566 kmem:kmalloc
  121. 125517 kmem:kmem_cache_alloc
  122. 0 kmem:kmalloc_node
  123. 0 kmem:kmem_cache_alloc_node
  124. 34401 kmem:kfree
  125. 69920 kmem:kmem_cache_free
  126. 133 kmem:mm_page_free
  127. 41 kmem:mm_page_free_batched
  128. 11502 kmem:mm_page_alloc
  129. 11375 kmem:mm_page_alloc_zone_locked
  130. 0 kmem:mm_page_pcpu_drain
  131. 0 kmem:mm_page_alloc_extfrag
  132. 66848602 cache-references
  133. 2917740 cache-misses # 4.365 % of all cache refs
  134. 44.831023415 seconds time elapsed
  135. So 'perf stat' gives us a nice easy
  136. way to get a quick overview of what might be happening for a set of
  137. events, but normally we'd need a little more detail in order to
  138. understand what's going on in a way that we can act on in a useful way.
  139. To dive down into a next level of detail, we can use 'perf record'/'perf
  140. report' which will collect profiling data and present it to use using an
  141. interactive text-based UI (or simply as text if we specify --stdio to
  142. 'perf report').
  143. As our first attempt at profiling this workload, we'll simply run 'perf
  144. record', handing it the workload we want to profile (everything after
  145. 'perf record' and any perf options we hand it - here none - will be
  146. executed in a new shell). perf collects samples until the process exits
  147. and records them in a file named 'perf.data' in the current working
  148. directory. ::
  149. root@crownbay:~# perf record wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  150. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  151. linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA
  152. [ perf record: Woken up 1 times to write data ]
  153. [ perf record: Captured and wrote 0.176 MB perf.data (~7700 samples) ]
  154. To see the results in a
  155. 'text-based UI' (tui), simply run 'perf report', which will read the
  156. perf.data file in the current working directory and display the results
  157. in an interactive UI: ::
  158. root@crownbay:~# perf report
  159. .. image:: figures/perf-wget-flat-stripped.png
  160. :align: center
  161. The above screenshot displays a 'flat' profile, one entry for each
  162. 'bucket' corresponding to the functions that were profiled during the
  163. profiling run, ordered from the most popular to the least (perf has
  164. options to sort in various orders and keys as well as display entries
  165. only above a certain threshold and so on - see the perf documentation
  166. for details). Note that this includes both userspace functions (entries
  167. containing a [.]) and kernel functions accounted to the process (entries
  168. containing a [k]). (perf has command-line modifiers that can be used to
  169. restrict the profiling to kernel or userspace, among others).
  170. Notice also that the above report shows an entry for 'busybox', which is
  171. the executable that implements 'wget' in Yocto, but that instead of a
  172. useful function name in that entry, it displays a not-so-friendly hex
  173. value instead. The steps below will show how to fix that problem.
  174. Before we do that, however, let's try running a different profile, one
  175. which shows something a little more interesting. The only difference
  176. between the new profile and the previous one is that we'll add the -g
  177. option, which will record not just the address of a sampled function,
  178. but the entire callchain to the sampled function as well: ::
  179. root@crownbay:~# perf record -g wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  180. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  181. linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA
  182. [ perf record: Woken up 3 times to write data ]
  183. [ perf record: Captured and wrote 0.652 MB perf.data (~28476 samples) ]
  184. root@crownbay:~# perf report
  185. .. image:: figures/perf-wget-g-copy-to-user-expanded-stripped.png
  186. :align: center
  187. Using the callgraph view, we can actually see not only which functions
  188. took the most time, but we can also see a summary of how those functions
  189. were called and learn something about how the program interacts with the
  190. kernel in the process.
  191. Notice that each entry in the above screenshot now contains a '+' on the
  192. left-hand side. This means that we can expand the entry and drill down
  193. into the callchains that feed into that entry. Pressing 'enter' on any
  194. one of them will expand the callchain (you can also press 'E' to expand
  195. them all at the same time or 'C' to collapse them all).
  196. In the screenshot above, we've toggled the ``__copy_to_user_ll()`` entry
  197. and several subnodes all the way down. This lets us see which callchains
  198. contributed to the profiled ``__copy_to_user_ll()`` function which
  199. contributed 1.77% to the total profile.
  200. As a bit of background explanation for these callchains, think about
  201. what happens at a high level when you run wget to get a file out on the
  202. network. Basically what happens is that the data comes into the kernel
  203. via the network connection (socket) and is passed to the userspace
  204. program 'wget' (which is actually a part of busybox, but that's not
  205. important for now), which takes the buffers the kernel passes to it and
  206. writes it to a disk file to save it.
  207. The part of this process that we're looking at in the above call stacks
  208. is the part where the kernel passes the data it's read from the socket
  209. down to wget i.e. a copy-to-user.
  210. Notice also that here there's also a case where the hex value is
  211. displayed in the callstack, here in the expanded ``sys_clock_gettime()``
  212. function. Later we'll see it resolve to a userspace function call in
  213. busybox.
  214. .. image:: figures/perf-wget-g-copy-from-user-expanded-stripped.png
  215. :align: center
  216. The above screenshot shows the other half of the journey for the data -
  217. from the wget program's userspace buffers to disk. To get the buffers to
  218. disk, the wget program issues a ``write(2)``, which does a ``copy-from-user`` to
  219. the kernel, which then takes care via some circuitous path (probably
  220. also present somewhere in the profile data), to get it safely to disk.
  221. Now that we've seen the basic layout of the profile data and the basics
  222. of how to extract useful information out of it, let's get back to the
  223. task at hand and see if we can get some basic idea about where the time
  224. is spent in the program we're profiling, wget. Remember that wget is
  225. actually implemented as an applet in busybox, so while the process name
  226. is 'wget', the executable we're actually interested in is busybox. So
  227. let's expand the first entry containing busybox:
  228. .. image:: figures/perf-wget-busybox-expanded-stripped.png
  229. :align: center
  230. Again, before we expanded we saw that the function was labeled with a
  231. hex value instead of a symbol as with most of the kernel entries.
  232. Expanding the busybox entry doesn't make it any better.
  233. The problem is that perf can't find the symbol information for the
  234. busybox binary, which is actually stripped out by the Yocto build
  235. system.
  236. One way around that is to put the following in your ``local.conf`` file
  237. when you build the image: ::
  238. INHIBIT_PACKAGE_STRIP = "1"
  239. However, we already have an image with the binaries stripped, so
  240. what can we do to get perf to resolve the symbols? Basically we need to
  241. install the debuginfo for the busybox package.
  242. To generate the debug info for the packages in the image, we can add
  243. ``dbg-pkgs`` to :term:`EXTRA_IMAGE_FEATURES` in ``local.conf``. For example: ::
  244. EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile dbg-pkgs"
  245. Additionally, in order to generate the type of debuginfo that perf
  246. understands, we also need to set
  247. :term:`PACKAGE_DEBUG_SPLIT_STYLE`
  248. in the ``local.conf`` file: ::
  249. PACKAGE_DEBUG_SPLIT_STYLE = 'debug-file-directory'
  250. Once we've done that, we can install the
  251. debuginfo for busybox. The debug packages once built can be found in
  252. ``build/tmp/deploy/rpm/*`` on the host system. Find the busybox-dbg-...rpm
  253. file and copy it to the target. For example: ::
  254. [trz@empanada core2]$ scp /home/trz/yocto/crownbay-tracing-dbg/build/tmp/deploy/rpm/core2_32/busybox-dbg-1.20.2-r2.core2_32.rpm root@192.168.1.31:
  255. busybox-dbg-1.20.2-r2.core2_32.rpm 100% 1826KB 1.8MB/s 00:01
  256. Now install the debug rpm on the target: ::
  257. root@crownbay:~# rpm -i busybox-dbg-1.20.2-r2.core2_32.rpm
  258. Now that the debuginfo is installed, we see that the busybox entries now display
  259. their functions symbolically:
  260. .. image:: figures/perf-wget-busybox-debuginfo.png
  261. :align: center
  262. If we expand one of the entries and press 'enter' on a leaf node, we're
  263. presented with a menu of actions we can take to get more information
  264. related to that entry:
  265. .. image:: figures/perf-wget-busybox-dso-zoom-menu.png
  266. :align: center
  267. One of these actions allows us to show a view that displays a
  268. busybox-centric view of the profiled functions (in this case we've also
  269. expanded all the nodes using the 'E' key):
  270. .. image:: figures/perf-wget-busybox-dso-zoom.png
  271. :align: center
  272. Finally, we can see that now that the busybox debuginfo is installed,
  273. the previously unresolved symbol in the ``sys_clock_gettime()`` entry
  274. mentioned previously is now resolved, and shows that the
  275. sys_clock_gettime system call that was the source of 6.75% of the
  276. copy-to-user overhead was initiated by the ``handle_input()`` busybox
  277. function:
  278. .. image:: figures/perf-wget-g-copy-to-user-expanded-debuginfo.png
  279. :align: center
  280. At the lowest level of detail, we can dive down to the assembly level
  281. and see which instructions caused the most overhead in a function.
  282. Pressing 'enter' on the 'udhcpc_main' function, we're again presented
  283. with a menu:
  284. .. image:: figures/perf-wget-busybox-annotate-menu.png
  285. :align: center
  286. Selecting 'Annotate udhcpc_main', we get a detailed listing of
  287. percentages by instruction for the udhcpc_main function. From the
  288. display, we can see that over 50% of the time spent in this function is
  289. taken up by a couple tests and the move of a constant (1) to a register:
  290. .. image:: figures/perf-wget-busybox-annotate-udhcpc.png
  291. :align: center
  292. As a segue into tracing, let's try another profile using a different
  293. counter, something other than the default 'cycles'.
  294. The tracing and profiling infrastructure in Linux has become unified in
  295. a way that allows us to use the same tool with a completely different
  296. set of counters, not just the standard hardware counters that
  297. traditional tools have had to restrict themselves to (of course the
  298. traditional tools can also make use of the expanded possibilities now
  299. available to them, and in some cases have, as mentioned previously).
  300. We can get a list of the available events that can be used to profile a
  301. workload via 'perf list': ::
  302. root@crownbay:~# perf list
  303. List of pre-defined events (to be used in -e):
  304. cpu-cycles OR cycles [Hardware event]
  305. stalled-cycles-frontend OR idle-cycles-frontend [Hardware event]
  306. stalled-cycles-backend OR idle-cycles-backend [Hardware event]
  307. instructions [Hardware event]
  308. cache-references [Hardware event]
  309. cache-misses [Hardware event]
  310. branch-instructions OR branches [Hardware event]
  311. branch-misses [Hardware event]
  312. bus-cycles [Hardware event]
  313. ref-cycles [Hardware event]
  314. cpu-clock [Software event]
  315. task-clock [Software event]
  316. page-faults OR faults [Software event]
  317. minor-faults [Software event]
  318. major-faults [Software event]
  319. context-switches OR cs [Software event]
  320. cpu-migrations OR migrations [Software event]
  321. alignment-faults [Software event]
  322. emulation-faults [Software event]
  323. L1-dcache-loads [Hardware cache event]
  324. L1-dcache-load-misses [Hardware cache event]
  325. L1-dcache-prefetch-misses [Hardware cache event]
  326. L1-icache-loads [Hardware cache event]
  327. L1-icache-load-misses [Hardware cache event]
  328. .
  329. .
  330. .
  331. rNNN [Raw hardware event descriptor]
  332. cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
  333. (see 'perf list --help' on how to encode it)
  334. mem:<addr>[:access] [Hardware breakpoint]
  335. sunrpc:rpc_call_status [Tracepoint event]
  336. sunrpc:rpc_bind_status [Tracepoint event]
  337. sunrpc:rpc_connect_status [Tracepoint event]
  338. sunrpc:rpc_task_begin [Tracepoint event]
  339. skb:kfree_skb [Tracepoint event]
  340. skb:consume_skb [Tracepoint event]
  341. skb:skb_copy_datagram_iovec [Tracepoint event]
  342. net:net_dev_xmit [Tracepoint event]
  343. net:net_dev_queue [Tracepoint event]
  344. net:netif_receive_skb [Tracepoint event]
  345. net:netif_rx [Tracepoint event]
  346. napi:napi_poll [Tracepoint event]
  347. sock:sock_rcvqueue_full [Tracepoint event]
  348. sock:sock_exceed_buf_limit [Tracepoint event]
  349. udp:udp_fail_queue_rcv_skb [Tracepoint event]
  350. hda:hda_send_cmd [Tracepoint event]
  351. hda:hda_get_response [Tracepoint event]
  352. hda:hda_bus_reset [Tracepoint event]
  353. scsi:scsi_dispatch_cmd_start [Tracepoint event]
  354. scsi:scsi_dispatch_cmd_error [Tracepoint event]
  355. scsi:scsi_eh_wakeup [Tracepoint event]
  356. drm:drm_vblank_event [Tracepoint event]
  357. drm:drm_vblank_event_queued [Tracepoint event]
  358. drm:drm_vblank_event_delivered [Tracepoint event]
  359. random:mix_pool_bytes [Tracepoint event]
  360. random:mix_pool_bytes_nolock [Tracepoint event]
  361. random:credit_entropy_bits [Tracepoint event]
  362. gpio:gpio_direction [Tracepoint event]
  363. gpio:gpio_value [Tracepoint event]
  364. block:block_rq_abort [Tracepoint event]
  365. block:block_rq_requeue [Tracepoint event]
  366. block:block_rq_issue [Tracepoint event]
  367. block:block_bio_bounce [Tracepoint event]
  368. block:block_bio_complete [Tracepoint event]
  369. block:block_bio_backmerge [Tracepoint event]
  370. .
  371. .
  372. writeback:writeback_wake_thread [Tracepoint event]
  373. writeback:writeback_wake_forker_thread [Tracepoint event]
  374. writeback:writeback_bdi_register [Tracepoint event]
  375. .
  376. .
  377. writeback:writeback_single_inode_requeue [Tracepoint event]
  378. writeback:writeback_single_inode [Tracepoint event]
  379. kmem:kmalloc [Tracepoint event]
  380. kmem:kmem_cache_alloc [Tracepoint event]
  381. kmem:mm_page_alloc [Tracepoint event]
  382. kmem:mm_page_alloc_zone_locked [Tracepoint event]
  383. kmem:mm_page_pcpu_drain [Tracepoint event]
  384. kmem:mm_page_alloc_extfrag [Tracepoint event]
  385. vmscan:mm_vmscan_kswapd_sleep [Tracepoint event]
  386. vmscan:mm_vmscan_kswapd_wake [Tracepoint event]
  387. vmscan:mm_vmscan_wakeup_kswapd [Tracepoint event]
  388. vmscan:mm_vmscan_direct_reclaim_begin [Tracepoint event]
  389. .
  390. .
  391. module:module_get [Tracepoint event]
  392. module:module_put [Tracepoint event]
  393. module:module_request [Tracepoint event]
  394. sched:sched_kthread_stop [Tracepoint event]
  395. sched:sched_wakeup [Tracepoint event]
  396. sched:sched_wakeup_new [Tracepoint event]
  397. sched:sched_process_fork [Tracepoint event]
  398. sched:sched_process_exec [Tracepoint event]
  399. sched:sched_stat_runtime [Tracepoint event]
  400. rcu:rcu_utilization [Tracepoint event]
  401. workqueue:workqueue_queue_work [Tracepoint event]
  402. workqueue:workqueue_execute_end [Tracepoint event]
  403. signal:signal_generate [Tracepoint event]
  404. signal:signal_deliver [Tracepoint event]
  405. timer:timer_init [Tracepoint event]
  406. timer:timer_start [Tracepoint event]
  407. timer:hrtimer_cancel [Tracepoint event]
  408. timer:itimer_state [Tracepoint event]
  409. timer:itimer_expire [Tracepoint event]
  410. irq:irq_handler_entry [Tracepoint event]
  411. irq:irq_handler_exit [Tracepoint event]
  412. irq:softirq_entry [Tracepoint event]
  413. irq:softirq_exit [Tracepoint event]
  414. irq:softirq_raise [Tracepoint event]
  415. printk:console [Tracepoint event]
  416. task:task_newtask [Tracepoint event]
  417. task:task_rename [Tracepoint event]
  418. syscalls:sys_enter_socketcall [Tracepoint event]
  419. syscalls:sys_exit_socketcall [Tracepoint event]
  420. .
  421. .
  422. .
  423. syscalls:sys_enter_unshare [Tracepoint event]
  424. syscalls:sys_exit_unshare [Tracepoint event]
  425. raw_syscalls:sys_enter [Tracepoint event]
  426. raw_syscalls:sys_exit [Tracepoint event]
  427. .. admonition:: Tying it Together
  428. These are exactly the same set of events defined by the trace event
  429. subsystem and exposed by ftrace/tracecmd/kernelshark as files in
  430. /sys/kernel/debug/tracing/events, by SystemTap as
  431. kernel.trace("tracepoint_name") and (partially) accessed by LTTng.
  432. Only a subset of these would be of interest to us when looking at this
  433. workload, so let's choose the most likely subsystems (identified by the
  434. string before the colon in the Tracepoint events) and do a 'perf stat'
  435. run using only those wildcarded subsystems: ::
  436. root@crownbay:~# perf stat -e skb:* -e net:* -e napi:* -e sched:* -e workqueue:* -e irq:* -e syscalls:* wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  437. Performance counter stats for 'wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2':
  438. 23323 skb:kfree_skb
  439. 0 skb:consume_skb
  440. 49897 skb:skb_copy_datagram_iovec
  441. 6217 net:net_dev_xmit
  442. 6217 net:net_dev_queue
  443. 7962 net:netif_receive_skb
  444. 2 net:netif_rx
  445. 8340 napi:napi_poll
  446. 0 sched:sched_kthread_stop
  447. 0 sched:sched_kthread_stop_ret
  448. 3749 sched:sched_wakeup
  449. 0 sched:sched_wakeup_new
  450. 0 sched:sched_switch
  451. 29 sched:sched_migrate_task
  452. 0 sched:sched_process_free
  453. 1 sched:sched_process_exit
  454. 0 sched:sched_wait_task
  455. 0 sched:sched_process_wait
  456. 0 sched:sched_process_fork
  457. 1 sched:sched_process_exec
  458. 0 sched:sched_stat_wait
  459. 2106519415641 sched:sched_stat_sleep
  460. 0 sched:sched_stat_iowait
  461. 147453613 sched:sched_stat_blocked
  462. 12903026955 sched:sched_stat_runtime
  463. 0 sched:sched_pi_setprio
  464. 3574 workqueue:workqueue_queue_work
  465. 3574 workqueue:workqueue_activate_work
  466. 0 workqueue:workqueue_execute_start
  467. 0 workqueue:workqueue_execute_end
  468. 16631 irq:irq_handler_entry
  469. 16631 irq:irq_handler_exit
  470. 28521 irq:softirq_entry
  471. 28521 irq:softirq_exit
  472. 28728 irq:softirq_raise
  473. 1 syscalls:sys_enter_sendmmsg
  474. 1 syscalls:sys_exit_sendmmsg
  475. 0 syscalls:sys_enter_recvmmsg
  476. 0 syscalls:sys_exit_recvmmsg
  477. 14 syscalls:sys_enter_socketcall
  478. 14 syscalls:sys_exit_socketcall
  479. .
  480. .
  481. .
  482. 16965 syscalls:sys_enter_read
  483. 16965 syscalls:sys_exit_read
  484. 12854 syscalls:sys_enter_write
  485. 12854 syscalls:sys_exit_write
  486. .
  487. .
  488. .
  489. 58.029710972 seconds time elapsed
  490. Let's pick one of these tracepoints
  491. and tell perf to do a profile using it as the sampling event: ::
  492. root@crownbay:~# perf record -g -e sched:sched_wakeup wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  493. .. image:: figures/sched-wakeup-profile.png
  494. :align: center
  495. The screenshot above shows the results of running a profile using
  496. sched:sched_switch tracepoint, which shows the relative costs of various
  497. paths to sched_wakeup (note that sched_wakeup is the name of the
  498. tracepoint - it's actually defined just inside ttwu_do_wakeup(), which
  499. accounts for the function name actually displayed in the profile:
  500. .. code-block:: c
  501. /*
  502. * Mark the task runnable and perform wakeup-preemption.
  503. */
  504. static void
  505. ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  506. {
  507. trace_sched_wakeup(p, true);
  508. .
  509. .
  510. .
  511. }
  512. A couple of the more interesting
  513. callchains are expanded and displayed above, basically some network
  514. receive paths that presumably end up waking up wget (busybox) when
  515. network data is ready.
  516. Note that because tracepoints are normally used for tracing, the default
  517. sampling period for tracepoints is 1 i.e. for tracepoints perf will
  518. sample on every event occurrence (this can be changed using the -c
  519. option). This is in contrast to hardware counters such as for example
  520. the default 'cycles' hardware counter used for normal profiling, where
  521. sampling periods are much higher (in the thousands) because profiling
  522. should have as low an overhead as possible and sampling on every cycle
  523. would be prohibitively expensive.
  524. Using perf to do Basic Tracing
  525. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  526. Profiling is a great tool for solving many problems or for getting a
  527. high-level view of what's going on with a workload or across the system.
  528. It is however by definition an approximation, as suggested by the most
  529. prominent word associated with it, 'sampling'. On the one hand, it
  530. allows a representative picture of what's going on in the system to be
  531. cheaply taken, but on the other hand, that cheapness limits its utility
  532. when that data suggests a need to 'dive down' more deeply to discover
  533. what's really going on. In such cases, the only way to see what's really
  534. going on is to be able to look at (or summarize more intelligently) the
  535. individual steps that go into the higher-level behavior exposed by the
  536. coarse-grained profiling data.
  537. As a concrete example, we can trace all the events we think might be
  538. applicable to our workload: ::
  539. root@crownbay:~# perf record -g -e skb:* -e net:* -e napi:* -e sched:sched_switch -e sched:sched_wakeup -e irq:*
  540. -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write
  541. wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  542. We can look at the raw trace output using 'perf script' with no
  543. arguments: ::
  544. root@crownbay:~# perf script
  545. perf 1262 [000] 11624.857082: sys_exit_read: 0x0
  546. perf 1262 [000] 11624.857193: sched_wakeup: comm=migration/0 pid=6 prio=0 success=1 target_cpu=000
  547. wget 1262 [001] 11624.858021: softirq_raise: vec=1 [action=TIMER]
  548. wget 1262 [001] 11624.858074: softirq_entry: vec=1 [action=TIMER]
  549. wget 1262 [001] 11624.858081: softirq_exit: vec=1 [action=TIMER]
  550. wget 1262 [001] 11624.858166: sys_enter_read: fd: 0x0003, buf: 0xbf82c940, count: 0x0200
  551. wget 1262 [001] 11624.858177: sys_exit_read: 0x200
  552. wget 1262 [001] 11624.858878: kfree_skb: skbaddr=0xeb248d80 protocol=0 location=0xc15a5308
  553. wget 1262 [001] 11624.858945: kfree_skb: skbaddr=0xeb248000 protocol=0 location=0xc15a5308
  554. wget 1262 [001] 11624.859020: softirq_raise: vec=1 [action=TIMER]
  555. wget 1262 [001] 11624.859076: softirq_entry: vec=1 [action=TIMER]
  556. wget 1262 [001] 11624.859083: softirq_exit: vec=1 [action=TIMER]
  557. wget 1262 [001] 11624.859167: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400
  558. wget 1262 [001] 11624.859192: sys_exit_read: 0x1d7
  559. wget 1262 [001] 11624.859228: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400
  560. wget 1262 [001] 11624.859233: sys_exit_read: 0x0
  561. wget 1262 [001] 11624.859573: sys_enter_read: fd: 0x0003, buf: 0xbf82c580, count: 0x0200
  562. wget 1262 [001] 11624.859584: sys_exit_read: 0x200
  563. wget 1262 [001] 11624.859864: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400
  564. wget 1262 [001] 11624.859888: sys_exit_read: 0x400
  565. wget 1262 [001] 11624.859935: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400
  566. wget 1262 [001] 11624.859944: sys_exit_read: 0x400
  567. This gives us a detailed timestamped sequence of events that occurred within the
  568. workload with respect to those events.
  569. In many ways, profiling can be viewed as a subset of tracing -
  570. theoretically, if you have a set of trace events that's sufficient to
  571. capture all the important aspects of a workload, you can derive any of
  572. the results or views that a profiling run can.
  573. Another aspect of traditional profiling is that while powerful in many
  574. ways, it's limited by the granularity of the underlying data. Profiling
  575. tools offer various ways of sorting and presenting the sample data,
  576. which make it much more useful and amenable to user experimentation, but
  577. in the end it can't be used in an open-ended way to extract data that
  578. just isn't present as a consequence of the fact that conceptually, most
  579. of it has been thrown away.
  580. Full-blown detailed tracing data does however offer the opportunity to
  581. manipulate and present the information collected during a tracing run in
  582. an infinite variety of ways.
  583. Another way to look at it is that there are only so many ways that the
  584. 'primitive' counters can be used on their own to generate interesting
  585. output; to get anything more complicated than simple counts requires
  586. some amount of additional logic, which is typically very specific to the
  587. problem at hand. For example, if we wanted to make use of a 'counter'
  588. that maps to the value of the time difference between when a process was
  589. scheduled to run on a processor and the time it actually ran, we
  590. wouldn't expect such a counter to exist on its own, but we could derive
  591. one called say 'wakeup_latency' and use it to extract a useful view of
  592. that metric from trace data. Likewise, we really can't figure out from
  593. standard profiling tools how much data every process on the system reads
  594. and writes, along with how many of those reads and writes fail
  595. completely. If we have sufficient trace data, however, we could with the
  596. right tools easily extract and present that information, but we'd need
  597. something other than pre-canned profiling tools to do that.
  598. Luckily, there is a general-purpose way to handle such needs, called
  599. 'programming languages'. Making programming languages easily available
  600. to apply to such problems given the specific format of data is called a
  601. 'programming language binding' for that data and language. Perf supports
  602. two programming language bindings, one for Python and one for Perl.
  603. .. admonition:: Tying it Together
  604. Language bindings for manipulating and aggregating trace data are of
  605. course not a new idea. One of the first projects to do this was IBM's
  606. DProbes dpcc compiler, an ANSI C compiler which targeted a low-level
  607. assembly language running on an in-kernel interpreter on the target
  608. system. This is exactly analogous to what Sun's DTrace did, except
  609. that DTrace invented its own language for the purpose. Systemtap,
  610. heavily inspired by DTrace, also created its own one-off language,
  611. but rather than running the product on an in-kernel interpreter,
  612. created an elaborate compiler-based machinery to translate its
  613. language into kernel modules written in C.
  614. Now that we have the trace data in perf.data, we can use 'perf script
  615. -g' to generate a skeleton script with handlers for the read/write
  616. entry/exit events we recorded: ::
  617. root@crownbay:~# perf script -g python
  618. generated Python script: perf-script.py
  619. The skeleton script simply creates a python function for each event type in the
  620. perf.data file. The body of each function simply prints the event name along
  621. with its parameters. For example:
  622. .. code-block:: python
  623. def net__netif_rx(event_name, context, common_cpu,
  624. common_secs, common_nsecs, common_pid, common_comm,
  625. skbaddr, len, name):
  626. print_header(event_name, common_cpu, common_secs, common_nsecs,
  627. common_pid, common_comm)
  628. print "skbaddr=%u, len=%u, name=%s\n" % (skbaddr, len, name),
  629. We can run that script directly to print all of the events contained in the
  630. perf.data file: ::
  631. root@crownbay:~# perf script -s perf-script.py
  632. in trace_begin
  633. syscalls__sys_exit_read 0 11624.857082795 1262 perf nr=3, ret=0
  634. sched__sched_wakeup 0 11624.857193498 1262 perf comm=migration/0, pid=6, prio=0, success=1, target_cpu=0
  635. irq__softirq_raise 1 11624.858021635 1262 wget vec=TIMER
  636. irq__softirq_entry 1 11624.858074075 1262 wget vec=TIMER
  637. irq__softirq_exit 1 11624.858081389 1262 wget vec=TIMER
  638. syscalls__sys_enter_read 1 11624.858166434 1262 wget nr=3, fd=3, buf=3213019456, count=512
  639. syscalls__sys_exit_read 1 11624.858177924 1262 wget nr=3, ret=512
  640. skb__kfree_skb 1 11624.858878188 1262 wget skbaddr=3945041280, location=3243922184, protocol=0
  641. skb__kfree_skb 1 11624.858945608 1262 wget skbaddr=3945037824, location=3243922184, protocol=0
  642. irq__softirq_raise 1 11624.859020942 1262 wget vec=TIMER
  643. irq__softirq_entry 1 11624.859076935 1262 wget vec=TIMER
  644. irq__softirq_exit 1 11624.859083469 1262 wget vec=TIMER
  645. syscalls__sys_enter_read 1 11624.859167565 1262 wget nr=3, fd=3, buf=3077701632, count=1024
  646. syscalls__sys_exit_read 1 11624.859192533 1262 wget nr=3, ret=471
  647. syscalls__sys_enter_read 1 11624.859228072 1262 wget nr=3, fd=3, buf=3077701632, count=1024
  648. syscalls__sys_exit_read 1 11624.859233707 1262 wget nr=3, ret=0
  649. syscalls__sys_enter_read 1 11624.859573008 1262 wget nr=3, fd=3, buf=3213018496, count=512
  650. syscalls__sys_exit_read 1 11624.859584818 1262 wget nr=3, ret=512
  651. syscalls__sys_enter_read 1 11624.859864562 1262 wget nr=3, fd=3, buf=3077701632, count=1024
  652. syscalls__sys_exit_read 1 11624.859888770 1262 wget nr=3, ret=1024
  653. syscalls__sys_enter_read 1 11624.859935140 1262 wget nr=3, fd=3, buf=3077701632, count=1024
  654. syscalls__sys_exit_read 1 11624.859944032 1262 wget nr=3, ret=1024
  655. That in itself isn't very useful; after all, we can accomplish pretty much the
  656. same thing by simply running 'perf script' without arguments in the same
  657. directory as the perf.data file.
  658. We can however replace the print statements in the generated function
  659. bodies with whatever we want, and thereby make it infinitely more
  660. useful.
  661. As a simple example, let's just replace the print statements in the
  662. function bodies with a simple function that does nothing but increment a
  663. per-event count. When the program is run against a perf.data file, each
  664. time a particular event is encountered, a tally is incremented for that
  665. event. For example:
  666. .. code-block:: python
  667. def net__netif_rx(event_name, context, common_cpu,
  668. common_secs, common_nsecs, common_pid, common_comm,
  669. skbaddr, len, name):
  670. inc_counts(event_name)
  671. Each event handler function in the generated code
  672. is modified to do this. For convenience, we define a common function
  673. called inc_counts() that each handler calls; inc_counts() simply tallies
  674. a count for each event using the 'counts' hash, which is a specialized
  675. hash function that does Perl-like autovivification, a capability that's
  676. extremely useful for kinds of multi-level aggregation commonly used in
  677. processing traces (see perf's documentation on the Python language
  678. binding for details):
  679. .. code-block:: python
  680. counts = autodict()
  681. def inc_counts(event_name):
  682. try:
  683. counts[event_name] += 1
  684. except TypeError:
  685. counts[event_name] = 1
  686. Finally, at the end of the trace processing run, we want to print the
  687. result of all the per-event tallies. For that, we use the special
  688. 'trace_end()' function:
  689. .. code-block:: python
  690. def trace_end():
  691. for event_name, count in counts.iteritems():
  692. print "%-40s %10s\n" % (event_name, count)
  693. The end result is a summary of all the events recorded in the trace: ::
  694. skb__skb_copy_datagram_iovec 13148
  695. irq__softirq_entry 4796
  696. irq__irq_handler_exit 3805
  697. irq__softirq_exit 4795
  698. syscalls__sys_enter_write 8990
  699. net__net_dev_xmit 652
  700. skb__kfree_skb 4047
  701. sched__sched_wakeup 1155
  702. irq__irq_handler_entry 3804
  703. irq__softirq_raise 4799
  704. net__net_dev_queue 652
  705. syscalls__sys_enter_read 17599
  706. net__netif_receive_skb 1743
  707. syscalls__sys_exit_read 17598
  708. net__netif_rx 2
  709. napi__napi_poll 1877
  710. syscalls__sys_exit_write 8990
  711. Note that this is
  712. pretty much exactly the same information we get from 'perf stat', which
  713. goes a little way to support the idea mentioned previously that given
  714. the right kind of trace data, higher-level profiling-type summaries can
  715. be derived from it.
  716. Documentation on using the `'perf script' python
  717. binding <http://linux.die.net/man/1/perf-script-python>`__.
  718. System-Wide Tracing and Profiling
  719. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  720. The examples so far have focused on tracing a particular program or
  721. workload - in other words, every profiling run has specified the program
  722. to profile in the command-line e.g. 'perf record wget ...'.
  723. It's also possible, and more interesting in many cases, to run a
  724. system-wide profile or trace while running the workload in a separate
  725. shell.
  726. To do system-wide profiling or tracing, you typically use the -a flag to
  727. 'perf record'.
  728. To demonstrate this, open up one window and start the profile using the
  729. -a flag (press Ctrl-C to stop tracing): ::
  730. root@crownbay:~# perf record -g -a
  731. ^C[ perf record: Woken up 6 times to write data ]
  732. [ perf record: Captured and wrote 1.400 MB perf.data (~61172 samples) ]
  733. In another window, run the wget test: ::
  734. root@crownbay:~# wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2
  735. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  736. linux-2.6.19.2.tar.b 100% \|*******************************\| 41727k 0:00:00 ETA
  737. Here we see entries not only for our wget load, but for
  738. other processes running on the system as well:
  739. .. image:: figures/perf-systemwide.png
  740. :align: center
  741. In the snapshot above, we can see callchains that originate in libc, and
  742. a callchain from Xorg that demonstrates that we're using a proprietary X
  743. driver in userspace (notice the presence of 'PVR' and some other
  744. unresolvable symbols in the expanded Xorg callchain).
  745. Note also that we have both kernel and userspace entries in the above
  746. snapshot. We can also tell perf to focus on userspace but providing a
  747. modifier, in this case 'u', to the 'cycles' hardware counter when we
  748. record a profile: ::
  749. root@crownbay:~# perf record -g -a -e cycles:u
  750. ^C[ perf record: Woken up 2 times to write data ]
  751. [ perf record: Captured and wrote 0.376 MB perf.data (~16443 samples) ]
  752. .. image:: figures/perf-report-cycles-u.png
  753. :align: center
  754. Notice in the screenshot above, we see only userspace entries ([.])
  755. Finally, we can press 'enter' on a leaf node and select the 'Zoom into
  756. DSO' menu item to show only entries associated with a specific DSO. In
  757. the screenshot below, we've zoomed into the 'libc' DSO which shows all
  758. the entries associated with the libc-xxx.so DSO.
  759. .. image:: figures/perf-systemwide-libc.png
  760. :align: center
  761. We can also use the system-wide -a switch to do system-wide tracing.
  762. Here we'll trace a couple of scheduler events: ::
  763. root@crownbay:~# perf record -a -e sched:sched_switch -e sched:sched_wakeup
  764. ^C[ perf record: Woken up 38 times to write data ]
  765. [ perf record: Captured and wrote 9.780 MB perf.data (~427299 samples) ]
  766. We can look at the raw output using 'perf script' with no arguments: ::
  767. root@crownbay:~# perf script
  768. perf 1383 [001] 6171.460045: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  769. perf 1383 [001] 6171.460066: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120
  770. kworker/1:1 21 [001] 6171.460093: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120
  771. swapper 0 [000] 6171.468063: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000
  772. swapper 0 [000] 6171.468107: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120
  773. kworker/0:3 1209 [000] 6171.468143: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
  774. perf 1383 [001] 6171.470039: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  775. perf 1383 [001] 6171.470058: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120
  776. kworker/1:1 21 [001] 6171.470082: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120
  777. perf 1383 [001] 6171.480035: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  778. .. _perf-filtering:
  779. Filtering
  780. ^^^^^^^^^
  781. Notice that there are a lot of events that don't really have anything to
  782. do with what we're interested in, namely events that schedule 'perf'
  783. itself in and out or that wake perf up. We can get rid of those by using
  784. the '--filter' option - for each event we specify using -e, we can add a
  785. --filter after that to filter out trace events that contain fields with
  786. specific values: ::
  787. root@crownbay:~# perf record -a -e sched:sched_switch --filter 'next_comm != perf && prev_comm != perf' -e sched:sched_wakeup --filter 'comm != perf'
  788. ^C[ perf record: Woken up 38 times to write data ]
  789. [ perf record: Captured and wrote 9.688 MB perf.data (~423279 samples) ]
  790. root@crownbay:~# perf script
  791. swapper 0 [000] 7932.162180: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120
  792. kworker/0:3 1209 [000] 7932.162236: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
  793. perf 1407 [001] 7932.170048: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  794. perf 1407 [001] 7932.180044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  795. perf 1407 [001] 7932.190038: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  796. perf 1407 [001] 7932.200044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  797. perf 1407 [001] 7932.210044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  798. perf 1407 [001] 7932.220044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  799. swapper 0 [001] 7932.230111: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001
  800. swapper 0 [001] 7932.230146: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/1:1 next_pid=21 next_prio=120
  801. kworker/1:1 21 [001] 7932.230205: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120
  802. swapper 0 [000] 7932.326109: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000
  803. swapper 0 [000] 7932.326171: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120
  804. kworker/0:3 1209 [000] 7932.326214: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
  805. In this case, we've filtered out all events that have
  806. 'perf' in their 'comm' or 'comm_prev' or 'comm_next' fields. Notice that
  807. there are still events recorded for perf, but notice that those events
  808. don't have values of 'perf' for the filtered fields. To completely
  809. filter out anything from perf will require a bit more work, but for the
  810. purpose of demonstrating how to use filters, it's close enough.
  811. .. admonition:: Tying it Together
  812. These are exactly the same set of event filters defined by the trace
  813. event subsystem. See the ftrace/tracecmd/kernelshark section for more
  814. discussion about these event filters.
  815. .. admonition:: Tying it Together
  816. These event filters are implemented by a special-purpose
  817. pseudo-interpreter in the kernel and are an integral and
  818. indispensable part of the perf design as it relates to tracing.
  819. kernel-based event filters provide a mechanism to precisely throttle
  820. the event stream that appears in user space, where it makes sense to
  821. provide bindings to real programming languages for postprocessing the
  822. event stream. This architecture allows for the intelligent and
  823. flexible partitioning of processing between the kernel and user
  824. space. Contrast this with other tools such as SystemTap, which does
  825. all of its processing in the kernel and as such requires a special
  826. project-defined language in order to accommodate that design, or
  827. LTTng, where everything is sent to userspace and as such requires a
  828. super-efficient kernel-to-userspace transport mechanism in order to
  829. function properly. While perf certainly can benefit from for instance
  830. advances in the design of the transport, it doesn't fundamentally
  831. depend on them. Basically, if you find that your perf tracing
  832. application is causing buffer I/O overruns, it probably means that
  833. you aren't taking enough advantage of the kernel filtering engine.
  834. Using Dynamic Tracepoints
  835. ~~~~~~~~~~~~~~~~~~~~~~~~~
  836. perf isn't restricted to the fixed set of static tracepoints listed by
  837. 'perf list'. Users can also add their own 'dynamic' tracepoints anywhere
  838. in the kernel. For instance, suppose we want to define our own
  839. tracepoint on do_fork(). We can do that using the 'perf probe' perf
  840. subcommand: ::
  841. root@crownbay:~# perf probe do_fork
  842. Added new event:
  843. probe:do_fork (on do_fork)
  844. You can now use it in all perf tools, such as:
  845. perf record -e probe:do_fork -aR sleep 1
  846. Adding a new tracepoint via
  847. 'perf probe' results in an event with all the expected files and format
  848. in /sys/kernel/debug/tracing/events, just the same as for static
  849. tracepoints (as discussed in more detail in the trace events subsystem
  850. section: ::
  851. root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# ls -al
  852. drwxr-xr-x 2 root root 0 Oct 28 11:42 .
  853. drwxr-xr-x 3 root root 0 Oct 28 11:42 ..
  854. -rw-r--r-- 1 root root 0 Oct 28 11:42 enable
  855. -rw-r--r-- 1 root root 0 Oct 28 11:42 filter
  856. -r--r--r-- 1 root root 0 Oct 28 11:42 format
  857. -r--r--r-- 1 root root 0 Oct 28 11:42 id
  858. root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# cat format
  859. name: do_fork
  860. ID: 944
  861. format:
  862. field:unsigned short common_type; offset:0; size:2; signed:0;
  863. field:unsigned char common_flags; offset:2; size:1; signed:0;
  864. field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
  865. field:int common_pid; offset:4; size:4; signed:1;
  866. field:int common_padding; offset:8; size:4; signed:1;
  867. field:unsigned long __probe_ip; offset:12; size:4; signed:0;
  868. print fmt: "(%lx)", REC->__probe_ip
  869. We can list all dynamic tracepoints currently in
  870. existence: ::
  871. root@crownbay:~# perf probe -l
  872. probe:do_fork (on do_fork)
  873. probe:schedule (on schedule)
  874. Let's record system-wide ('sleep 30' is a
  875. trick for recording system-wide but basically do nothing and then wake
  876. up after 30 seconds): ::
  877. root@crownbay:~# perf record -g -a -e probe:do_fork sleep 30
  878. [ perf record: Woken up 1 times to write data ]
  879. [ perf record: Captured and wrote 0.087 MB perf.data (~3812 samples) ]
  880. Using 'perf script' we can see each do_fork event that fired: ::
  881. root@crownbay:~# perf script
  882. # ========
  883. # captured on: Sun Oct 28 11:55:18 2012
  884. # hostname : crownbay
  885. # os release : 3.4.11-yocto-standard
  886. # perf version : 3.4.11
  887. # arch : i686
  888. # nrcpus online : 2
  889. # nrcpus avail : 2
  890. # cpudesc : Intel(R) Atom(TM) CPU E660 @ 1.30GHz
  891. # cpuid : GenuineIntel,6,38,1
  892. # total memory : 1017184 kB
  893. # cmdline : /usr/bin/perf record -g -a -e probe:do_fork sleep 30
  894. # event : name = probe:do_fork, type = 2, config = 0x3b0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern
  895. = 0, id = { 5, 6 }
  896. # HEADER_CPU_TOPOLOGY info available, use -I to display
  897. # ========
  898. #
  899. matchbox-deskto 1197 [001] 34211.378318: do_fork: (c1028460)
  900. matchbox-deskto 1295 [001] 34211.380388: do_fork: (c1028460)
  901. pcmanfm 1296 [000] 34211.632350: do_fork: (c1028460)
  902. pcmanfm 1296 [000] 34211.639917: do_fork: (c1028460)
  903. matchbox-deskto 1197 [001] 34217.541603: do_fork: (c1028460)
  904. matchbox-deskto 1299 [001] 34217.543584: do_fork: (c1028460)
  905. gthumb 1300 [001] 34217.697451: do_fork: (c1028460)
  906. gthumb 1300 [001] 34219.085734: do_fork: (c1028460)
  907. gthumb 1300 [000] 34219.121351: do_fork: (c1028460)
  908. gthumb 1300 [001] 34219.264551: do_fork: (c1028460)
  909. pcmanfm 1296 [000] 34219.590380: do_fork: (c1028460)
  910. matchbox-deskto 1197 [001] 34224.955965: do_fork: (c1028460)
  911. matchbox-deskto 1306 [001] 34224.957972: do_fork: (c1028460)
  912. matchbox-termin 1307 [000] 34225.038214: do_fork: (c1028460)
  913. matchbox-termin 1307 [001] 34225.044218: do_fork: (c1028460)
  914. matchbox-termin 1307 [000] 34225.046442: do_fork: (c1028460)
  915. matchbox-deskto 1197 [001] 34237.112138: do_fork: (c1028460)
  916. matchbox-deskto 1311 [001] 34237.114106: do_fork: (c1028460)
  917. gaku 1312 [000] 34237.202388: do_fork: (c1028460)
  918. And using 'perf report' on the same file, we can see the
  919. callgraphs from starting a few programs during those 30 seconds:
  920. .. image:: figures/perf-probe-do_fork-profile.png
  921. :align: center
  922. .. admonition:: Tying it Together
  923. The trace events subsystem accommodate static and dynamic tracepoints
  924. in exactly the same way - there's no difference as far as the
  925. infrastructure is concerned. See the ftrace section for more details
  926. on the trace event subsystem.
  927. .. admonition:: Tying it Together
  928. Dynamic tracepoints are implemented under the covers by kprobes and
  929. uprobes. kprobes and uprobes are also used by and in fact are the
  930. main focus of SystemTap.
  931. .. _perf-documentation:
  932. Perf Documentation
  933. ------------------
  934. Online versions of the man pages for the commands discussed in this
  935. section can be found here:
  936. - The `'perf stat' manpage <http://linux.die.net/man/1/perf-stat>`__.
  937. - The `'perf record'
  938. manpage <http://linux.die.net/man/1/perf-record>`__.
  939. - The `'perf report'
  940. manpage <http://linux.die.net/man/1/perf-report>`__.
  941. - The `'perf probe' manpage <http://linux.die.net/man/1/perf-probe>`__.
  942. - The `'perf script'
  943. manpage <http://linux.die.net/man/1/perf-script>`__.
  944. - Documentation on using the `'perf script' python
  945. binding <http://linux.die.net/man/1/perf-script-python>`__.
  946. - The top-level `perf(1) manpage <http://linux.die.net/man/1/perf>`__.
  947. Normally, you should be able to invoke the man pages via perf itself
  948. e.g. 'perf help' or 'perf help record'.
  949. However, by default Yocto doesn't install man pages, but perf invokes
  950. the man pages for most help functionality. This is a bug and is being
  951. addressed by a Yocto bug: `Bug 3388 - perf: enable man pages for basic
  952. 'help'
  953. functionality <https://bugzilla.yoctoproject.org/show_bug.cgi?id=3388>`__.
  954. The man pages in text form, along with some other files, such as a set
  955. of examples, can be found in the 'perf' directory of the kernel tree: ::
  956. tools/perf/Documentation
  957. There's also a nice perf tutorial on the perf
  958. wiki that goes into more detail than we do here in certain areas: `Perf
  959. Tutorial <https://perf.wiki.kernel.org/index.php/Tutorial>`__
  960. .. _profile-manual-ftrace:
  961. ftrace
  962. ======
  963. 'ftrace' literally refers to the 'ftrace function tracer' but in reality
  964. this encompasses a number of related tracers along with the
  965. infrastructure that they all make use of.
  966. .. _ftrace-setup:
  967. ftrace Setup
  968. ------------
  969. For this section, we'll assume you've already performed the basic setup
  970. outlined in the ":ref:`profile-manual/profile-manual-intro:General Setup`" section.
  971. ftrace, trace-cmd, and kernelshark run on the target system, and are
  972. ready to go out-of-the-box - no additional setup is necessary. For the
  973. rest of this section we assume you've ssh'ed to the host and will be
  974. running ftrace on the target. kernelshark is a GUI application and if
  975. you use the '-X' option to ssh you can have the kernelshark GUI run on
  976. the target but display remotely on the host if you want.
  977. Basic ftrace usage
  978. ------------------
  979. 'ftrace' essentially refers to everything included in the /tracing
  980. directory of the mounted debugfs filesystem (Yocto follows the standard
  981. convention and mounts it at /sys/kernel/debug). Here's a listing of all
  982. the files found in /sys/kernel/debug/tracing on a Yocto system: ::
  983. root@sugarbay:/sys/kernel/debug/tracing# ls
  984. README kprobe_events trace
  985. available_events kprobe_profile trace_clock
  986. available_filter_functions options trace_marker
  987. available_tracers per_cpu trace_options
  988. buffer_size_kb printk_formats trace_pipe
  989. buffer_total_size_kb saved_cmdlines tracing_cpumask
  990. current_tracer set_event tracing_enabled
  991. dyn_ftrace_total_info set_ftrace_filter tracing_on
  992. enabled_functions set_ftrace_notrace tracing_thresh
  993. events set_ftrace_pid
  994. free_buffer set_graph_function
  995. The files listed above are used for various purposes
  996. - some relate directly to the tracers themselves, others are used to set
  997. tracing options, and yet others actually contain the tracing output when
  998. a tracer is in effect. Some of the functions can be guessed from their
  999. names, others need explanation; in any case, we'll cover some of the
  1000. files we see here below but for an explanation of the others, please see
  1001. the ftrace documentation.
  1002. We'll start by looking at some of the available built-in tracers.
  1003. cat'ing the 'available_tracers' file lists the set of available tracers: ::
  1004. root@sugarbay:/sys/kernel/debug/tracing# cat available_tracers
  1005. blk function_graph function nop
  1006. The 'current_tracer' file contains the tracer currently in effect: ::
  1007. root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer
  1008. nop
  1009. The above listing of current_tracer shows that the
  1010. 'nop' tracer is in effect, which is just another way of saying that
  1011. there's actually no tracer currently in effect.
  1012. echo'ing one of the available_tracers into current_tracer makes the
  1013. specified tracer the current tracer: ::
  1014. root@sugarbay:/sys/kernel/debug/tracing# echo function > current_tracer
  1015. root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer
  1016. function
  1017. The above sets the current tracer to be the 'function tracer'. This tracer
  1018. traces every function call in the kernel and makes it available as the
  1019. contents of the 'trace' file. Reading the 'trace' file lists the
  1020. currently buffered function calls that have been traced by the function
  1021. tracer: ::
  1022. root@sugarbay:/sys/kernel/debug/tracing# cat trace | less
  1023. # tracer: function
  1024. #
  1025. # entries-in-buffer/entries-written: 310629/766471 #P:8
  1026. #
  1027. # _-----=> irqs-off
  1028. # / _----=> need-resched
  1029. # | / _---=> hardirq/softirq
  1030. # || / _--=> preempt-depth
  1031. # ||| / delay
  1032. # TASK-PID CPU# |||| TIMESTAMP FUNCTION
  1033. # | | | |||| | |
  1034. <idle>-0 [004] d..1 470.867169: ktime_get_real <-intel_idle
  1035. <idle>-0 [004] d..1 470.867170: getnstimeofday <-ktime_get_real
  1036. <idle>-0 [004] d..1 470.867171: ns_to_timeval <-intel_idle
  1037. <idle>-0 [004] d..1 470.867171: ns_to_timespec <-ns_to_timeval
  1038. <idle>-0 [004] d..1 470.867172: smp_apic_timer_interrupt <-apic_timer_interrupt
  1039. <idle>-0 [004] d..1 470.867172: native_apic_mem_write <-smp_apic_timer_interrupt
  1040. <idle>-0 [004] d..1 470.867172: irq_enter <-smp_apic_timer_interrupt
  1041. <idle>-0 [004] d..1 470.867172: rcu_irq_enter <-irq_enter
  1042. <idle>-0 [004] d..1 470.867173: rcu_idle_exit_common.isra.33 <-rcu_irq_enter
  1043. <idle>-0 [004] d..1 470.867173: local_bh_disable <-irq_enter
  1044. <idle>-0 [004] d..1 470.867173: add_preempt_count <-local_bh_disable
  1045. <idle>-0 [004] d.s1 470.867174: tick_check_idle <-irq_enter
  1046. <idle>-0 [004] d.s1 470.867174: tick_check_oneshot_broadcast <-tick_check_idle
  1047. <idle>-0 [004] d.s1 470.867174: ktime_get <-tick_check_idle
  1048. <idle>-0 [004] d.s1 470.867174: tick_nohz_stop_idle <-tick_check_idle
  1049. <idle>-0 [004] d.s1 470.867175: update_ts_time_stats <-tick_nohz_stop_idle
  1050. <idle>-0 [004] d.s1 470.867175: nr_iowait_cpu <-update_ts_time_stats
  1051. <idle>-0 [004] d.s1 470.867175: tick_do_update_jiffies64 <-tick_check_idle
  1052. <idle>-0 [004] d.s1 470.867175: _raw_spin_lock <-tick_do_update_jiffies64
  1053. <idle>-0 [004] d.s1 470.867176: add_preempt_count <-_raw_spin_lock
  1054. <idle>-0 [004] d.s2 470.867176: do_timer <-tick_do_update_jiffies64
  1055. <idle>-0 [004] d.s2 470.867176: _raw_spin_lock <-do_timer
  1056. <idle>-0 [004] d.s2 470.867176: add_preempt_count <-_raw_spin_lock
  1057. <idle>-0 [004] d.s3 470.867177: ntp_tick_length <-do_timer
  1058. <idle>-0 [004] d.s3 470.867177: _raw_spin_lock_irqsave <-ntp_tick_length
  1059. .
  1060. .
  1061. .
  1062. Each line in the trace above shows what was happening in the kernel on a given
  1063. cpu, to the level of detail of function calls. Each entry shows the function
  1064. called, followed by its caller (after the arrow).
  1065. The function tracer gives you an extremely detailed idea of what the
  1066. kernel was doing at the point in time the trace was taken, and is a
  1067. great way to learn about how the kernel code works in a dynamic sense.
  1068. .. admonition:: Tying it Together
  1069. The ftrace function tracer is also available from within perf, as the
  1070. ftrace:function tracepoint.
  1071. It is a little more difficult to follow the call chains than it needs to
  1072. be - luckily there's a variant of the function tracer that displays the
  1073. callchains explicitly, called the 'function_graph' tracer: ::
  1074. root@sugarbay:/sys/kernel/debug/tracing# echo function_graph > current_tracer
  1075. root@sugarbay:/sys/kernel/debug/tracing# cat trace | less
  1076. tracer: function_graph
  1077. CPU DURATION FUNCTION CALLS
  1078. | | | | | | |
  1079. 7) 0.046 us | pick_next_task_fair();
  1080. 7) 0.043 us | pick_next_task_stop();
  1081. 7) 0.042 us | pick_next_task_rt();
  1082. 7) 0.032 us | pick_next_task_fair();
  1083. 7) 0.030 us | pick_next_task_idle();
  1084. 7) | _raw_spin_unlock_irq() {
  1085. 7) 0.033 us | sub_preempt_count();
  1086. 7) 0.258 us | }
  1087. 7) 0.032 us | sub_preempt_count();
  1088. 7) + 13.341 us | } /* __schedule */
  1089. 7) 0.095 us | } /* sub_preempt_count */
  1090. 7) | schedule() {
  1091. 7) | __schedule() {
  1092. 7) 0.060 us | add_preempt_count();
  1093. 7) 0.044 us | rcu_note_context_switch();
  1094. 7) | _raw_spin_lock_irq() {
  1095. 7) 0.033 us | add_preempt_count();
  1096. 7) 0.247 us | }
  1097. 7) | idle_balance() {
  1098. 7) | _raw_spin_unlock() {
  1099. 7) 0.031 us | sub_preempt_count();
  1100. 7) 0.246 us | }
  1101. 7) | update_shares() {
  1102. 7) 0.030 us | __rcu_read_lock();
  1103. 7) 0.029 us | __rcu_read_unlock();
  1104. 7) 0.484 us | }
  1105. 7) 0.030 us | __rcu_read_lock();
  1106. 7) | load_balance() {
  1107. 7) | find_busiest_group() {
  1108. 7) 0.031 us | idle_cpu();
  1109. 7) 0.029 us | idle_cpu();
  1110. 7) 0.035 us | idle_cpu();
  1111. 7) 0.906 us | }
  1112. 7) 1.141 us | }
  1113. 7) 0.022 us | msecs_to_jiffies();
  1114. 7) | load_balance() {
  1115. 7) | find_busiest_group() {
  1116. 7) 0.031 us | idle_cpu();
  1117. .
  1118. .
  1119. .
  1120. 4) 0.062 us | msecs_to_jiffies();
  1121. 4) 0.062 us | __rcu_read_unlock();
  1122. 4) | _raw_spin_lock() {
  1123. 4) 0.073 us | add_preempt_count();
  1124. 4) 0.562 us | }
  1125. 4) + 17.452 us | }
  1126. 4) 0.108 us | put_prev_task_fair();
  1127. 4) 0.102 us | pick_next_task_fair();
  1128. 4) 0.084 us | pick_next_task_stop();
  1129. 4) 0.075 us | pick_next_task_rt();
  1130. 4) 0.062 us | pick_next_task_fair();
  1131. 4) 0.066 us | pick_next_task_idle();
  1132. ------------------------------------------
  1133. 4) kworker-74 => <idle>-0
  1134. ------------------------------------------
  1135. 4) | finish_task_switch() {
  1136. 4) | _raw_spin_unlock_irq() {
  1137. 4) 0.100 us | sub_preempt_count();
  1138. 4) 0.582 us | }
  1139. 4) 1.105 us | }
  1140. 4) 0.088 us | sub_preempt_count();
  1141. 4) ! 100.066 us | }
  1142. .
  1143. .
  1144. .
  1145. 3) | sys_ioctl() {
  1146. 3) 0.083 us | fget_light();
  1147. 3) | security_file_ioctl() {
  1148. 3) 0.066 us | cap_file_ioctl();
  1149. 3) 0.562 us | }
  1150. 3) | do_vfs_ioctl() {
  1151. 3) | drm_ioctl() {
  1152. 3) 0.075 us | drm_ut_debug_printk();
  1153. 3) | i915_gem_pwrite_ioctl() {
  1154. 3) | i915_mutex_lock_interruptible() {
  1155. 3) 0.070 us | mutex_lock_interruptible();
  1156. 3) 0.570 us | }
  1157. 3) | drm_gem_object_lookup() {
  1158. 3) | _raw_spin_lock() {
  1159. 3) 0.080 us | add_preempt_count();
  1160. 3) 0.620 us | }
  1161. 3) | _raw_spin_unlock() {
  1162. 3) 0.085 us | sub_preempt_count();
  1163. 3) 0.562 us | }
  1164. 3) 2.149 us | }
  1165. 3) 0.133 us | i915_gem_object_pin();
  1166. 3) | i915_gem_object_set_to_gtt_domain() {
  1167. 3) 0.065 us | i915_gem_object_flush_gpu_write_domain();
  1168. 3) 0.065 us | i915_gem_object_wait_rendering();
  1169. 3) 0.062 us | i915_gem_object_flush_cpu_write_domain();
  1170. 3) 1.612 us | }
  1171. 3) | i915_gem_object_put_fence() {
  1172. 3) 0.097 us | i915_gem_object_flush_fence.constprop.36();
  1173. 3) 0.645 us | }
  1174. 3) 0.070 us | add_preempt_count();
  1175. 3) 0.070 us | sub_preempt_count();
  1176. 3) 0.073 us | i915_gem_object_unpin();
  1177. 3) 0.068 us | mutex_unlock();
  1178. 3) 9.924 us | }
  1179. 3) + 11.236 us | }
  1180. 3) + 11.770 us | }
  1181. 3) + 13.784 us | }
  1182. 3) | sys_ioctl() {
  1183. As you can see, the function_graph display is much easier
  1184. to follow. Also note that in addition to the function calls and
  1185. associated braces, other events such as scheduler events are displayed
  1186. in context. In fact, you can freely include any tracepoint available in
  1187. the trace events subsystem described in the next section by simply
  1188. enabling those events, and they'll appear in context in the function
  1189. graph display. Quite a powerful tool for understanding kernel dynamics.
  1190. Also notice that there are various annotations on the left hand side of
  1191. the display. For example if the total time it took for a given function
  1192. to execute is above a certain threshold, an exclamation point or plus
  1193. sign appears on the left hand side. Please see the ftrace documentation
  1194. for details on all these fields.
  1195. The 'trace events' Subsystem
  1196. ----------------------------
  1197. One especially important directory contained within the
  1198. /sys/kernel/debug/tracing directory is the 'events' subdirectory, which
  1199. contains representations of every tracepoint in the system. Listing out
  1200. the contents of the 'events' subdirectory, we see mainly another set of
  1201. subdirectories: ::
  1202. root@sugarbay:/sys/kernel/debug/tracing# cd events
  1203. root@sugarbay:/sys/kernel/debug/tracing/events# ls -al
  1204. drwxr-xr-x 38 root root 0 Nov 14 23:19 .
  1205. drwxr-xr-x 5 root root 0 Nov 14 23:19 ..
  1206. drwxr-xr-x 19 root root 0 Nov 14 23:19 block
  1207. drwxr-xr-x 32 root root 0 Nov 14 23:19 btrfs
  1208. drwxr-xr-x 5 root root 0 Nov 14 23:19 drm
  1209. -rw-r--r-- 1 root root 0 Nov 14 23:19 enable
  1210. drwxr-xr-x 40 root root 0 Nov 14 23:19 ext3
  1211. drwxr-xr-x 79 root root 0 Nov 14 23:19 ext4
  1212. drwxr-xr-x 14 root root 0 Nov 14 23:19 ftrace
  1213. drwxr-xr-x 8 root root 0 Nov 14 23:19 hda
  1214. -r--r--r-- 1 root root 0 Nov 14 23:19 header_event
  1215. -r--r--r-- 1 root root 0 Nov 14 23:19 header_page
  1216. drwxr-xr-x 25 root root 0 Nov 14 23:19 i915
  1217. drwxr-xr-x 7 root root 0 Nov 14 23:19 irq
  1218. drwxr-xr-x 12 root root 0 Nov 14 23:19 jbd
  1219. drwxr-xr-x 14 root root 0 Nov 14 23:19 jbd2
  1220. drwxr-xr-x 14 root root 0 Nov 14 23:19 kmem
  1221. drwxr-xr-x 7 root root 0 Nov 14 23:19 module
  1222. drwxr-xr-x 3 root root 0 Nov 14 23:19 napi
  1223. drwxr-xr-x 6 root root 0 Nov 14 23:19 net
  1224. drwxr-xr-x 3 root root 0 Nov 14 23:19 oom
  1225. drwxr-xr-x 12 root root 0 Nov 14 23:19 power
  1226. drwxr-xr-x 3 root root 0 Nov 14 23:19 printk
  1227. drwxr-xr-x 8 root root 0 Nov 14 23:19 random
  1228. drwxr-xr-x 4 root root 0 Nov 14 23:19 raw_syscalls
  1229. drwxr-xr-x 3 root root 0 Nov 14 23:19 rcu
  1230. drwxr-xr-x 6 root root 0 Nov 14 23:19 rpm
  1231. drwxr-xr-x 20 root root 0 Nov 14 23:19 sched
  1232. drwxr-xr-x 7 root root 0 Nov 14 23:19 scsi
  1233. drwxr-xr-x 4 root root 0 Nov 14 23:19 signal
  1234. drwxr-xr-x 5 root root 0 Nov 14 23:19 skb
  1235. drwxr-xr-x 4 root root 0 Nov 14 23:19 sock
  1236. drwxr-xr-x 10 root root 0 Nov 14 23:19 sunrpc
  1237. drwxr-xr-x 538 root root 0 Nov 14 23:19 syscalls
  1238. drwxr-xr-x 4 root root 0 Nov 14 23:19 task
  1239. drwxr-xr-x 14 root root 0 Nov 14 23:19 timer
  1240. drwxr-xr-x 3 root root 0 Nov 14 23:19 udp
  1241. drwxr-xr-x 21 root root 0 Nov 14 23:19 vmscan
  1242. drwxr-xr-x 3 root root 0 Nov 14 23:19 vsyscall
  1243. drwxr-xr-x 6 root root 0 Nov 14 23:19 workqueue
  1244. drwxr-xr-x 26 root root 0 Nov 14 23:19 writeback
  1245. Each one of these subdirectories
  1246. corresponds to a 'subsystem' and contains yet again more subdirectories,
  1247. each one of those finally corresponding to a tracepoint. For example,
  1248. here are the contents of the 'kmem' subsystem: ::
  1249. root@sugarbay:/sys/kernel/debug/tracing/events# cd kmem
  1250. root@sugarbay:/sys/kernel/debug/tracing/events/kmem# ls -al
  1251. drwxr-xr-x 14 root root 0 Nov 14 23:19 .
  1252. drwxr-xr-x 38 root root 0 Nov 14 23:19 ..
  1253. -rw-r--r-- 1 root root 0 Nov 14 23:19 enable
  1254. -rw-r--r-- 1 root root 0 Nov 14 23:19 filter
  1255. drwxr-xr-x 2 root root 0 Nov 14 23:19 kfree
  1256. drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc
  1257. drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc_node
  1258. drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc
  1259. drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc_node
  1260. drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_free
  1261. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc
  1262. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_extfrag
  1263. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_zone_locked
  1264. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free
  1265. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free_batched
  1266. drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_pcpu_drain
  1267. Let's see what's inside the subdirectory for a
  1268. specific tracepoint, in this case the one for kmalloc: ::
  1269. root@sugarbay:/sys/kernel/debug/tracing/events/kmem# cd kmalloc
  1270. root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# ls -al
  1271. drwxr-xr-x 2 root root 0 Nov 14 23:19 .
  1272. drwxr-xr-x 14 root root 0 Nov 14 23:19 ..
  1273. -rw-r--r-- 1 root root 0 Nov 14 23:19 enable
  1274. -rw-r--r-- 1 root root 0 Nov 14 23:19 filter
  1275. -r--r--r-- 1 root root 0 Nov 14 23:19 format
  1276. -r--r--r-- 1 root root 0 Nov 14 23:19 id
  1277. The 'format' file for the
  1278. tracepoint describes the event in memory, which is used by the various
  1279. tracing tools that now make use of these tracepoint to parse the event
  1280. and make sense of it, along with a 'print fmt' field that allows tools
  1281. like ftrace to display the event as text. Here's what the format of the
  1282. kmalloc event looks like: ::
  1283. root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# cat format
  1284. name: kmalloc
  1285. ID: 313
  1286. format:
  1287. field:unsigned short common_type; offset:0; size:2; signed:0;
  1288. field:unsigned char common_flags; offset:2; size:1; signed:0;
  1289. field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
  1290. field:int common_pid; offset:4; size:4; signed:1;
  1291. field:int common_padding; offset:8; size:4; signed:1;
  1292. field:unsigned long call_site; offset:16; size:8; signed:0;
  1293. field:const void * ptr; offset:24; size:8; signed:0;
  1294. field:size_t bytes_req; offset:32; size:8; signed:0;
  1295. field:size_t bytes_alloc; offset:40; size:8; signed:0;
  1296. field:gfp_t gfp_flags; offset:48; size:4; signed:0;
  1297. print fmt: "call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", REC->call_site, REC->ptr, REC->bytes_req, REC->bytes_alloc,
  1298. (REC->gfp_flags) ? __print_flags(REC->gfp_flags, "|", {(unsigned long)(((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | ((
  1299. gfp_t)0x20000u) | (( gfp_t)0x02u) | (( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) | (( gfp_t)0x1000u) | (( gfp_t)0x200u) | ((
  1300. gfp_t)0x400000u)), "GFP_TRANSHUGE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x20000u) | ((
  1301. gfp_t)0x02u) | (( gfp_t)0x08u)), "GFP_HIGHUSER_MOVABLE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | ((
  1302. gfp_t)0x20000u) | (( gfp_t)0x02u)), "GFP_HIGHUSER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | ((
  1303. gfp_t)0x20000u)), "GFP_USER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x80000u)), GFP_TEMPORARY"},
  1304. {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u)), "GFP_KERNEL"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u)),
  1305. "GFP_NOFS"}, {(unsigned long)((( gfp_t)0x20u)), "GFP_ATOMIC"}, {(unsigned long)((( gfp_t)0x10u)), "GFP_NOIO"}, {(unsigned long)((
  1306. gfp_t)0x20u), "GFP_HIGH"}, {(unsigned long)(( gfp_t)0x10u), "GFP_WAIT"}, {(unsigned long)(( gfp_t)0x40u), "GFP_IO"}, {(unsigned long)((
  1307. gfp_t)0x100u), "GFP_COLD"}, {(unsigned long)(( gfp_t)0x200u), "GFP_NOWARN"}, {(unsigned long)(( gfp_t)0x400u), "GFP_REPEAT"}, {(unsigned
  1308. long)(( gfp_t)0x800u), "GFP_NOFAIL"}, {(unsigned long)(( gfp_t)0x1000u), "GFP_NORETRY"}, {(unsigned long)(( gfp_t)0x4000u), "GFP_COMP"},
  1309. {(unsigned long)(( gfp_t)0x8000u), "GFP_ZERO"}, {(unsigned long)(( gfp_t)0x10000u), "GFP_NOMEMALLOC"}, {(unsigned long)(( gfp_t)0x20000u),
  1310. "GFP_HARDWALL"}, {(unsigned long)(( gfp_t)0x40000u), "GFP_THISNODE"}, {(unsigned long)(( gfp_t)0x80000u), "GFP_RECLAIMABLE"}, {(unsigned
  1311. long)(( gfp_t)0x08u), "GFP_MOVABLE"}, {(unsigned long)(( gfp_t)0), "GFP_NOTRACK"}, {(unsigned long)(( gfp_t)0x400000u), "GFP_NO_KSWAPD"},
  1312. {(unsigned long)(( gfp_t)0x800000u), "GFP_OTHER_NODE"} ) : "GFP_NOWAIT"
  1313. The 'enable' file
  1314. in the tracepoint directory is what allows the user (or tools such as
  1315. trace-cmd) to actually turn the tracepoint on and off. When enabled, the
  1316. corresponding tracepoint will start appearing in the ftrace 'trace' file
  1317. described previously. For example, this turns on the kmalloc tracepoint: ::
  1318. root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 1 > enable
  1319. At the moment, we're not interested in the function tracer or
  1320. some other tracer that might be in effect, so we first turn it off, but
  1321. if we do that, we still need to turn tracing on in order to see the
  1322. events in the output buffer: ::
  1323. root@sugarbay:/sys/kernel/debug/tracing# echo nop > current_tracer
  1324. root@sugarbay:/sys/kernel/debug/tracing# echo 1 > tracing_on
  1325. Now, if we look at the the 'trace' file, we see nothing
  1326. but the kmalloc events we just turned on: ::
  1327. root@sugarbay:/sys/kernel/debug/tracing# cat trace | less
  1328. # tracer: nop
  1329. #
  1330. # entries-in-buffer/entries-written: 1897/1897 #P:8
  1331. #
  1332. # _-----=> irqs-off
  1333. # / _----=> need-resched
  1334. # | / _---=> hardirq/softirq
  1335. # || / _--=> preempt-depth
  1336. # ||| / delay
  1337. # TASK-PID CPU# |||| TIMESTAMP FUNCTION
  1338. # | | | |||| | |
  1339. dropbear-1465 [000] ...1 18154.620753: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1340. <idle>-0 [000] ..s3 18154.621640: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1341. <idle>-0 [000] ..s3 18154.621656: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1342. matchbox-termin-1361 [001] ...1 18154.755472: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f0e00 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT
  1343. Xorg-1264 [002] ...1 18154.755581: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY
  1344. Xorg-1264 [002] ...1 18154.755583: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO
  1345. Xorg-1264 [002] ...1 18154.755589: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO
  1346. matchbox-termin-1361 [001] ...1 18155.354594: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db35400 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT
  1347. Xorg-1264 [002] ...1 18155.354703: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY
  1348. Xorg-1264 [002] ...1 18155.354705: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO
  1349. Xorg-1264 [002] ...1 18155.354711: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO
  1350. <idle>-0 [000] ..s3 18155.673319: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1351. dropbear-1465 [000] ...1 18155.673525: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1352. <idle>-0 [000] ..s3 18155.674821: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1353. <idle>-0 [000] ..s3 18155.793014: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1354. dropbear-1465 [000] ...1 18155.793219: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1355. <idle>-0 [000] ..s3 18155.794147: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1356. <idle>-0 [000] ..s3 18155.936705: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1357. dropbear-1465 [000] ...1 18155.936910: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1358. <idle>-0 [000] ..s3 18155.937869: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1359. matchbox-termin-1361 [001] ...1 18155.953667: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f2000 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT
  1360. Xorg-1264 [002] ...1 18155.953775: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY
  1361. Xorg-1264 [002] ...1 18155.953777: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO
  1362. Xorg-1264 [002] ...1 18155.953783: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO
  1363. <idle>-0 [000] ..s3 18156.176053: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1364. dropbear-1465 [000] ...1 18156.176257: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1365. <idle>-0 [000] ..s3 18156.177717: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1366. <idle>-0 [000] ..s3 18156.399229: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1367. dropbear-1465 [000] ...1 18156.399434: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_http://rostedt.homelinux.com/kernelshark/req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL
  1368. <idle>-0 [000] ..s3 18156.400660: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC
  1369. matchbox-termin-1361 [001] ...1 18156.552800: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db34800 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT
  1370. To again disable the kmalloc event, we need to send 0 to the enable file: ::
  1371. root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 0 > enable
  1372. You can enable any number of events or complete subsystems (by
  1373. using the 'enable' file in the subsystem directory) and get an
  1374. arbitrarily fine-grained idea of what's going on in the system by
  1375. enabling as many of the appropriate tracepoints as applicable.
  1376. A number of the tools described in this HOWTO do just that, including
  1377. trace-cmd and kernelshark in the next section.
  1378. .. admonition:: Tying it Together
  1379. These tracepoints and their representation are used not only by
  1380. ftrace, but by many of the other tools covered in this document and
  1381. they form a central point of integration for the various tracers
  1382. available in Linux. They form a central part of the instrumentation
  1383. for the following tools: perf, lttng, ftrace, blktrace and SystemTap
  1384. .. admonition:: Tying it Together
  1385. Eventually all the special-purpose tracers currently available in
  1386. /sys/kernel/debug/tracing will be removed and replaced with
  1387. equivalent tracers based on the 'trace events' subsystem.
  1388. .. _trace-cmd-kernelshark:
  1389. trace-cmd/kernelshark
  1390. ---------------------
  1391. trace-cmd is essentially an extensive command-line 'wrapper' interface
  1392. that hides the details of all the individual files in
  1393. /sys/kernel/debug/tracing, allowing users to specify specific particular
  1394. events within the /sys/kernel/debug/tracing/events/ subdirectory and to
  1395. collect traces and avoid having to deal with those details directly.
  1396. As yet another layer on top of that, kernelshark provides a GUI that
  1397. allows users to start and stop traces and specify sets of events using
  1398. an intuitive interface, and view the output as both trace events and as
  1399. a per-CPU graphical display. It directly uses 'trace-cmd' as the
  1400. plumbing that accomplishes all that underneath the covers (and actually
  1401. displays the trace-cmd command it uses, as we'll see).
  1402. To start a trace using kernelshark, first start kernelshark: ::
  1403. root@sugarbay:~# kernelshark
  1404. Then bring up the 'Capture' dialog by
  1405. choosing from the kernelshark menu: ::
  1406. Capture | Record
  1407. That will display the following dialog, which allows you to choose one or more
  1408. events (or even one or more complete subsystems) to trace:
  1409. .. image:: figures/kernelshark-choose-events.png
  1410. :align: center
  1411. Note that these are exactly the same sets of events described in the
  1412. previous trace events subsystem section, and in fact is where trace-cmd
  1413. gets them for kernelshark.
  1414. In the above screenshot, we've decided to explore the graphics subsystem
  1415. a bit and so have chosen to trace all the tracepoints contained within
  1416. the 'i915' and 'drm' subsystems.
  1417. After doing that, we can start and stop the trace using the 'Run' and
  1418. 'Stop' button on the lower right corner of the dialog (the same button
  1419. will turn into the 'Stop' button after the trace has started):
  1420. .. image:: figures/kernelshark-output-display.png
  1421. :align: center
  1422. Notice that the right-hand pane shows the exact trace-cmd command-line
  1423. that's used to run the trace, along with the results of the trace-cmd
  1424. run.
  1425. Once the 'Stop' button is pressed, the graphical view magically fills up
  1426. with a colorful per-cpu display of the trace data, along with the
  1427. detailed event listing below that:
  1428. .. image:: figures/kernelshark-i915-display.png
  1429. :align: center
  1430. Here's another example, this time a display resulting from tracing 'all
  1431. events':
  1432. .. image:: figures/kernelshark-all.png
  1433. :align: center
  1434. The tool is pretty self-explanatory, but for more detailed information
  1435. on navigating through the data, see the `kernelshark
  1436. website <http://rostedt.homelinux.com/kernelshark/>`__.
  1437. .. _ftrace-documentation:
  1438. ftrace Documentation
  1439. --------------------
  1440. The documentation for ftrace can be found in the kernel Documentation
  1441. directory: ::
  1442. Documentation/trace/ftrace.txt
  1443. The documentation for the trace event subsystem can also be found in the kernel
  1444. Documentation directory: ::
  1445. Documentation/trace/events.txt
  1446. There is a nice series of articles on using ftrace and trace-cmd at LWN:
  1447. - `Debugging the kernel using Ftrace - part
  1448. 1 <http://lwn.net/Articles/365835/>`__
  1449. - `Debugging the kernel using Ftrace - part
  1450. 2 <http://lwn.net/Articles/366796/>`__
  1451. - `Secrets of the Ftrace function
  1452. tracer <http://lwn.net/Articles/370423/>`__
  1453. - `trace-cmd: A front-end for
  1454. Ftrace <https://lwn.net/Articles/410200/>`__
  1455. There's more detailed documentation kernelshark usage here:
  1456. `KernelShark <http://rostedt.homelinux.com/kernelshark/>`__
  1457. An amusing yet useful README (a tracing mini-HOWTO) can be found in
  1458. ``/sys/kernel/debug/tracing/README``.
  1459. .. _profile-manual-systemtap:
  1460. systemtap
  1461. =========
  1462. SystemTap is a system-wide script-based tracing and profiling tool.
  1463. SystemTap scripts are C-like programs that are executed in the kernel to
  1464. gather/print/aggregate data extracted from the context they end up being
  1465. invoked under.
  1466. For example, this probe from the `SystemTap
  1467. tutorial <http://sourceware.org/systemtap/tutorial/>`__ simply prints a
  1468. line every time any process on the system open()s a file. For each line,
  1469. it prints the executable name of the program that opened the file, along
  1470. with its PID, and the name of the file it opened (or tried to open),
  1471. which it extracts from the open syscall's argstr.
  1472. .. code-block:: none
  1473. probe syscall.open
  1474. {
  1475. printf ("%s(%d) open (%s)\n", execname(), pid(), argstr)
  1476. }
  1477. probe timer.ms(4000) # after 4 seconds
  1478. {
  1479. exit ()
  1480. }
  1481. Normally, to execute this
  1482. probe, you'd simply install systemtap on the system you want to probe,
  1483. and directly run the probe on that system e.g. assuming the name of the
  1484. file containing the above text is trace_open.stp: ::
  1485. # stap trace_open.stp
  1486. What systemtap does under the covers to run this probe is 1) parse and
  1487. convert the probe to an equivalent 'C' form, 2) compile the 'C' form
  1488. into a kernel module, 3) insert the module into the kernel, which arms
  1489. it, and 4) collect the data generated by the probe and display it to the
  1490. user.
  1491. In order to accomplish steps 1 and 2, the 'stap' program needs access to
  1492. the kernel build system that produced the kernel that the probed system
  1493. is running. In the case of a typical embedded system (the 'target'), the
  1494. kernel build system unfortunately isn't typically part of the image
  1495. running on the target. It is normally available on the 'host' system
  1496. that produced the target image however; in such cases, steps 1 and 2 are
  1497. executed on the host system, and steps 3 and 4 are executed on the
  1498. target system, using only the systemtap 'runtime'.
  1499. The systemtap support in Yocto assumes that only steps 3 and 4 are run
  1500. on the target; it is possible to do everything on the target, but this
  1501. section assumes only the typical embedded use-case.
  1502. So basically what you need to do in order to run a systemtap script on
  1503. the target is to 1) on the host system, compile the probe into a kernel
  1504. module that makes sense to the target, 2) copy the module onto the
  1505. target system and 3) insert the module into the target kernel, which
  1506. arms it, and 4) collect the data generated by the probe and display it
  1507. to the user.
  1508. .. _systemtap-setup:
  1509. systemtap Setup
  1510. ---------------
  1511. Those are a lot of steps and a lot of details, but fortunately Yocto
  1512. includes a script called 'crosstap' that will take care of those
  1513. details, allowing you to simply execute a systemtap script on the remote
  1514. target, with arguments if necessary.
  1515. In order to do this from a remote host, however, you need to have access
  1516. to the build for the image you booted. The 'crosstap' script provides
  1517. details on how to do this if you run the script on the host without
  1518. having done a build: ::
  1519. $ crosstap root@192.168.1.88 trace_open.stp
  1520. Error: No target kernel build found.
  1521. Did you forget to create a local build of your image?
  1522. 'crosstap' requires a local sdk build of the target system
  1523. (or a build that includes 'tools-profile') in order to build
  1524. kernel modules that can probe the target system.
  1525. Practically speaking, that means you need to do the following:
  1526. - If you're running a pre-built image, download the release
  1527. and/or BSP tarballs used to build the image.
  1528. - If you're working from git sources, just clone the metadata
  1529. and BSP layers needed to build the image you'll be booting.
  1530. - Make sure you're properly set up to build a new image (see
  1531. the BSP README and/or the widely available basic documentation
  1532. that discusses how to build images).
  1533. - Build an -sdk version of the image e.g.:
  1534. $ bitbake core-image-sato-sdk
  1535. OR
  1536. - Build a non-sdk image but include the profiling tools:
  1537. [ edit local.conf and add 'tools-profile' to the end of
  1538. the EXTRA_IMAGE_FEATURES variable ]
  1539. $ bitbake core-image-sato
  1540. Once you've build the image on the host system, you're ready to
  1541. boot it (or the equivalent pre-built image) and use 'crosstap'
  1542. to probe it (you need to source the environment as usual first):
  1543. $ source oe-init-build-env
  1544. $ cd ~/my/systemtap/scripts
  1545. $ crosstap root@192.168.1.xxx myscript.stp
  1546. .. note::
  1547. SystemTap, which uses 'crosstap', assumes you can establish an ssh
  1548. connection to the remote target. Please refer to the crosstap wiki
  1549. page for details on verifying ssh connections at
  1550. . Also, the ability to ssh into the target system is not enabled by
  1551. default in \*-minimal images.
  1552. So essentially what you need to
  1553. do is build an SDK image or image with 'tools-profile' as detailed in
  1554. the ":ref:`profile-manual/profile-manual-intro:General Setup`" section of this
  1555. manual, and boot the resulting target image.
  1556. .. note::
  1557. If you have a build directory containing multiple machines, you need
  1558. to have the MACHINE you're connecting to selected in local.conf, and
  1559. the kernel in that machine's build directory must match the kernel on
  1560. the booted system exactly, or you'll get the above 'crosstap' message
  1561. when you try to invoke a script.
  1562. Running a Script on a Target
  1563. ----------------------------
  1564. Once you've done that, you should be able to run a systemtap script on
  1565. the target: ::
  1566. $ cd /path/to/yocto
  1567. $ source oe-init-build-env
  1568. ### Shell environment set up for builds. ###
  1569. You can now run 'bitbake <target>'
  1570. Common targets are:
  1571. core-image-minimal
  1572. core-image-sato
  1573. meta-toolchain
  1574. meta-ide-support
  1575. You can also run generated qemu images with a command like 'runqemu qemux86-64'
  1576. Once you've done that, you can cd to whatever
  1577. directory contains your scripts and use 'crosstap' to run the script: ::
  1578. $ cd /path/to/my/systemap/script
  1579. $ crosstap root@192.168.7.2 trace_open.stp
  1580. If you get an error connecting to the target e.g.: ::
  1581. $ crosstap root@192.168.7.2 trace_open.stp
  1582. error establishing ssh connection on remote 'root@192.168.7.2'
  1583. Try ssh'ing to the target and see what happens: ::
  1584. $ ssh root@192.168.7.2
  1585. A lot of the time, connection
  1586. problems are due specifying a wrong IP address or having a 'host key
  1587. verification error'.
  1588. If everything worked as planned, you should see something like this
  1589. (enter the password when prompted, or press enter if it's set up to use
  1590. no password):
  1591. .. code-block:: none
  1592. $ crosstap root@192.168.7.2 trace_open.stp
  1593. root@192.168.7.2's password:
  1594. matchbox-termin(1036) open ("/tmp/vte3FS2LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600)
  1595. matchbox-termin(1036) open ("/tmp/vteJMC7LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600)
  1596. .. _systemtap-documentation:
  1597. systemtap Documentation
  1598. -----------------------
  1599. The SystemTap language reference can be found here: `SystemTap Language
  1600. Reference <http://sourceware.org/systemtap/langref/>`__
  1601. Links to other SystemTap documents, tutorials, and examples can be found
  1602. here: `SystemTap documentation
  1603. page <http://sourceware.org/systemtap/documentation.html>`__
  1604. .. _profile-manual-sysprof:
  1605. Sysprof
  1606. =======
  1607. Sysprof is a very easy to use system-wide profiler that consists of a
  1608. single window with three panes and a few buttons which allow you to
  1609. start, stop, and view the profile from one place.
  1610. .. _sysprof-setup:
  1611. Sysprof Setup
  1612. -------------
  1613. For this section, we'll assume you've already performed the basic setup
  1614. outlined in the ":ref:`profile-manual/profile-manual-intro:General Setup`" section.
  1615. Sysprof is a GUI-based application that runs on the target system. For
  1616. the rest of this document we assume you've ssh'ed to the host and will
  1617. be running Sysprof on the target (you can use the '-X' option to ssh and
  1618. have the Sysprof GUI run on the target but display remotely on the host
  1619. if you want).
  1620. .. _sysprof-basic-usage:
  1621. Basic Sysprof Usage
  1622. -------------------
  1623. To start profiling the system, you simply press the 'Start' button. To
  1624. stop profiling and to start viewing the profile data in one easy step,
  1625. press the 'Profile' button.
  1626. Once you've pressed the profile button, the three panes will fill up
  1627. with profiling data:
  1628. .. image:: figures/sysprof-copy-to-user.png
  1629. :align: center
  1630. The left pane shows a list of functions and processes. Selecting one of
  1631. those expands that function in the right pane, showing all its callees.
  1632. Note that this caller-oriented display is essentially the inverse of
  1633. perf's default callee-oriented callchain display.
  1634. In the screenshot above, we're focusing on ``__copy_to_user_ll()`` and
  1635. looking up the callchain we can see that one of the callers of
  1636. ``__copy_to_user_ll`` is sys_read() and the complete callpath between them.
  1637. Notice that this is essentially a portion of the same information we saw
  1638. in the perf display shown in the perf section of this page.
  1639. .. image:: figures/sysprof-copy-from-user.png
  1640. :align: center
  1641. Similarly, the above is a snapshot of the Sysprof display of a
  1642. copy-from-user callchain.
  1643. Finally, looking at the third Sysprof pane in the lower left, we can see
  1644. a list of all the callers of a particular function selected in the top
  1645. left pane. In this case, the lower pane is showing all the callers of
  1646. ``__mark_inode_dirty``:
  1647. .. image:: figures/sysprof-callers.png
  1648. :align: center
  1649. Double-clicking on one of those functions will in turn change the focus
  1650. to the selected function, and so on.
  1651. .. admonition:: Tying it Together
  1652. If you like sysprof's 'caller-oriented' display, you may be able to
  1653. approximate it in other tools as well. For example, 'perf report' has
  1654. the -g (--call-graph) option that you can experiment with; one of the
  1655. options is 'caller' for an inverted caller-based callgraph display.
  1656. .. _sysprof-documentation:
  1657. Sysprof Documentation
  1658. ---------------------
  1659. There doesn't seem to be any documentation for Sysprof, but maybe that's
  1660. because it's pretty self-explanatory. The Sysprof website, however, is
  1661. here: `Sysprof, System-wide Performance Profiler for
  1662. Linux <http://sysprof.com/>`__
  1663. LTTng (Linux Trace Toolkit, next generation)
  1664. ============================================
  1665. .. _lttng-setup:
  1666. LTTng Setup
  1667. -----------
  1668. For this section, we'll assume you've already performed the basic setup
  1669. outlined in the ":ref:`profile-manual/profile-manual-intro:General Setup`" section.
  1670. LTTng is run on the target system by ssh'ing to it.
  1671. Collecting and Viewing Traces
  1672. -----------------------------
  1673. Once you've applied the above commits and built and booted your image
  1674. (you need to build the core-image-sato-sdk image or use one of the other
  1675. methods described in the ":ref:`profile-manual/profile-manual-intro:General Setup`" section), you're ready to start
  1676. tracing.
  1677. Collecting and viewing a trace on the target (inside a shell)
  1678. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1679. First, from the host, ssh to the target: ::
  1680. $ ssh -l root 192.168.1.47
  1681. The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established.
  1682. RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e.
  1683. Are you sure you want to continue connecting (yes/no)? yes
  1684. Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts.
  1685. root@192.168.1.47's password:
  1686. Once on the target, use these steps to create a trace: ::
  1687. root@crownbay:~# lttng create
  1688. Spawning a session daemon
  1689. Session auto-20121015-232120 created.
  1690. Traces will be written in /home/root/lttng-traces/auto-20121015-232120
  1691. Enable the events you want to trace (in this case all kernel events): ::
  1692. root@crownbay:~# lttng enable-event --kernel --all
  1693. All kernel events are enabled in channel channel0
  1694. Start the trace: ::
  1695. root@crownbay:~# lttng start
  1696. Tracing started for session auto-20121015-232120
  1697. And then stop the trace after awhile or after running a particular workload that
  1698. you want to trace: ::
  1699. root@crownbay:~# lttng stop
  1700. Tracing stopped for session auto-20121015-232120
  1701. You can now view the trace in text form on the target: ::
  1702. root@crownbay:~# lttng view
  1703. [23:21:56.989270399] (+?.?????????) sys_geteuid: { 1 }, { }
  1704. [23:21:56.989278081] (+0.000007682) exit_syscall: { 1 }, { ret = 0 }
  1705. [23:21:56.989286043] (+0.000007962) sys_pipe: { 1 }, { fildes = 0xB77B9E8C }
  1706. [23:21:56.989321802] (+0.000035759) exit_syscall: { 1 }, { ret = 0 }
  1707. [23:21:56.989329345] (+0.000007543) sys_mmap_pgoff: { 1 }, { addr = 0x0, len = 10485760, prot = 3, flags = 131362, fd = 4294967295, pgoff = 0 }
  1708. [23:21:56.989351694] (+0.000022349) exit_syscall: { 1 }, { ret = -1247805440 }
  1709. [23:21:56.989432989] (+0.000081295) sys_clone: { 1 }, { clone_flags = 0x411, newsp = 0xB5EFFFE4, parent_tid = 0xFFFFFFFF, child_tid = 0x0 }
  1710. [23:21:56.989477129] (+0.000044140) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 681660, vruntime = 43367983388 }
  1711. [23:21:56.989486697] (+0.000009568) sched_migrate_task: { 1 }, { comm = "lttng-consumerd", tid = 1193, prio = 20, orig_cpu = 1, dest_cpu = 1 }
  1712. [23:21:56.989508418] (+0.000021721) hrtimer_init: { 1 }, { hrtimer = 3970832076, clockid = 1, mode = 1 }
  1713. [23:21:56.989770462] (+0.000262044) hrtimer_cancel: { 1 }, { hrtimer = 3993865440 }
  1714. [23:21:56.989771580] (+0.000001118) hrtimer_cancel: { 0 }, { hrtimer = 3993812192 }
  1715. [23:21:56.989776957] (+0.000005377) hrtimer_expire_entry: { 1 }, { hrtimer = 3993865440, now = 79815980007057, function = 3238465232 }
  1716. [23:21:56.989778145] (+0.000001188) hrtimer_expire_entry: { 0 }, { hrtimer = 3993812192, now = 79815980008174, function = 3238465232 }
  1717. [23:21:56.989791695] (+0.000013550) softirq_raise: { 1 }, { vec = 1 }
  1718. [23:21:56.989795396] (+0.000003701) softirq_raise: { 0 }, { vec = 1 }
  1719. [23:21:56.989800635] (+0.000005239) softirq_raise: { 0 }, { vec = 9 }
  1720. [23:21:56.989807130] (+0.000006495) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 330710, vruntime = 43368314098 }
  1721. [23:21:56.989809993] (+0.000002863) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 1015313, vruntime = 36976733240 }
  1722. [23:21:56.989818514] (+0.000008521) hrtimer_expire_exit: { 0 }, { hrtimer = 3993812192 }
  1723. [23:21:56.989819631] (+0.000001117) hrtimer_expire_exit: { 1 }, { hrtimer = 3993865440 }
  1724. [23:21:56.989821866] (+0.000002235) hrtimer_start: { 0 }, { hrtimer = 3993812192, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 }
  1725. [23:21:56.989822984] (+0.000001118) hrtimer_start: { 1 }, { hrtimer = 3993865440, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 }
  1726. [23:21:56.989832762] (+0.000009778) softirq_entry: { 1 }, { vec = 1 }
  1727. [23:21:56.989833879] (+0.000001117) softirq_entry: { 0 }, { vec = 1 }
  1728. [23:21:56.989838069] (+0.000004190) timer_cancel: { 1 }, { timer = 3993871956 }
  1729. [23:21:56.989839187] (+0.000001118) timer_cancel: { 0 }, { timer = 3993818708 }
  1730. [23:21:56.989841492] (+0.000002305) timer_expire_entry: { 1 }, { timer = 3993871956, now = 79515980, function = 3238277552 }
  1731. [23:21:56.989842819] (+0.000001327) timer_expire_entry: { 0 }, { timer = 3993818708, now = 79515980, function = 3238277552 }
  1732. [23:21:56.989854831] (+0.000012012) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 49237, vruntime = 43368363335 }
  1733. [23:21:56.989855949] (+0.000001118) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 45121, vruntime = 36976778361 }
  1734. [23:21:56.989861257] (+0.000005308) sched_stat_sleep: { 1 }, { comm = "kworker/1:1", tid = 21, delay = 9451318 }
  1735. [23:21:56.989862374] (+0.000001117) sched_stat_sleep: { 0 }, { comm = "kworker/0:0", tid = 4, delay = 9958820 }
  1736. [23:21:56.989868241] (+0.000005867) sched_wakeup: { 0 }, { comm = "kworker/0:0", tid = 4, prio = 120, success = 1, target_cpu = 0 }
  1737. [23:21:56.989869358] (+0.000001117) sched_wakeup: { 1 }, { comm = "kworker/1:1", tid = 21, prio = 120, success = 1, target_cpu = 1 }
  1738. [23:21:56.989877460] (+0.000008102) timer_expire_exit: { 1 }, { timer = 3993871956 }
  1739. [23:21:56.989878577] (+0.000001117) timer_expire_exit: { 0 }, { timer = 3993818708 }
  1740. .
  1741. .
  1742. .
  1743. You can now safely destroy the trace
  1744. session (note that this doesn't delete the trace - it's still there in
  1745. ~/lttng-traces): ::
  1746. root@crownbay:~# lttng destroy
  1747. Session auto-20121015-232120 destroyed at /home/root
  1748. Note that the trace is saved in a directory of the same name as returned by
  1749. 'lttng create', under the ~/lttng-traces directory (note that you can change this by
  1750. supplying your own name to 'lttng create'): ::
  1751. root@crownbay:~# ls -al ~/lttng-traces
  1752. drwxrwx--- 3 root root 1024 Oct 15 23:21 .
  1753. drwxr-xr-x 5 root root 1024 Oct 15 23:57 ..
  1754. drwxrwx--- 3 root root 1024 Oct 15 23:21 auto-20121015-232120
  1755. Collecting and viewing a userspace trace on the target (inside a shell)
  1756. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1757. For LTTng userspace tracing, you need to have a properly instrumented
  1758. userspace program. For this example, we'll use the 'hello' test program
  1759. generated by the lttng-ust build.
  1760. The 'hello' test program isn't installed on the rootfs by the lttng-ust
  1761. build, so we need to copy it over manually. First cd into the build
  1762. directory that contains the hello executable: ::
  1763. $ cd build/tmp/work/core2_32-poky-linux/lttng-ust/2.0.5-r0/git/tests/hello/.libs
  1764. Copy that over to the target machine: ::
  1765. $ scp hello root@192.168.1.20:
  1766. You now have the instrumented lttng 'hello world' test program on the
  1767. target, ready to test.
  1768. First, from the host, ssh to the target: ::
  1769. $ ssh -l root 192.168.1.47
  1770. The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established.
  1771. RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e.
  1772. Are you sure you want to continue connecting (yes/no)? yes
  1773. Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts.
  1774. root@192.168.1.47's password:
  1775. Once on the target, use these steps to create a trace: ::
  1776. root@crownbay:~# lttng create
  1777. Session auto-20190303-021943 created.
  1778. Traces will be written in /home/root/lttng-traces/auto-20190303-021943
  1779. Enable the events you want to trace (in this case all userspace events): ::
  1780. root@crownbay:~# lttng enable-event --userspace --all
  1781. All UST events are enabled in channel channel0
  1782. Start the trace: ::
  1783. root@crownbay:~# lttng start
  1784. Tracing started for session auto-20190303-021943
  1785. Run the instrumented hello world program: ::
  1786. root@crownbay:~# ./hello
  1787. Hello, World!
  1788. Tracing... done.
  1789. And then stop the trace after awhile or after running a particular workload
  1790. that you want to trace: ::
  1791. root@crownbay:~# lttng stop
  1792. Tracing stopped for session auto-20190303-021943
  1793. You can now view the trace in text form on the target: ::
  1794. root@crownbay:~# lttng view
  1795. [02:31:14.906146544] (+?.?????????) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 0, intfield2 = 0x0, longfield = 0, netintfield = 0, netintfieldhex = 0x0, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 }
  1796. [02:31:14.906170360] (+0.000023816) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 1, intfield2 = 0x1, longfield = 1, netintfield = 1, netintfieldhex = 0x1, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 }
  1797. [02:31:14.906183140] (+0.000012780) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 2, intfield2 = 0x2, longfield = 2, netintfield = 2, netintfieldhex = 0x2, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 }
  1798. [02:31:14.906194385] (+0.000011245) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 3, intfield2 = 0x3, longfield = 3, netintfield = 3, netintfieldhex = 0x3, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 }
  1799. .
  1800. .
  1801. .
  1802. You can now safely destroy the trace session (note that this doesn't delete the
  1803. trace - it's still there in ~/lttng-traces): ::
  1804. root@crownbay:~# lttng destroy
  1805. Session auto-20190303-021943 destroyed at /home/root
  1806. .. _lltng-documentation:
  1807. LTTng Documentation
  1808. -------------------
  1809. You can find the primary LTTng Documentation on the `LTTng
  1810. Documentation <https://lttng.org/docs/>`__ site. The documentation on
  1811. this site is appropriate for intermediate to advanced software
  1812. developers who are working in a Linux environment and are interested in
  1813. efficient software tracing.
  1814. For information on LTTng in general, visit the `LTTng
  1815. Project <http://lttng.org/lttng2.0>`__ site. You can find a "Getting
  1816. Started" link on this site that takes you to an LTTng Quick Start.
  1817. .. _profile-manual-blktrace:
  1818. blktrace
  1819. ========
  1820. blktrace is a tool for tracing and reporting low-level disk I/O.
  1821. blktrace provides the tracing half of the equation; its output can be
  1822. piped into the blkparse program, which renders the data in a
  1823. human-readable form and does some basic analysis:
  1824. .. _blktrace-setup:
  1825. blktrace Setup
  1826. --------------
  1827. For this section, we'll assume you've already performed the basic setup
  1828. outlined in the ":ref:`profile-manual/profile-manual-intro:General Setup`"
  1829. section.
  1830. blktrace is an application that runs on the target system. You can run
  1831. the entire blktrace and blkparse pipeline on the target, or you can run
  1832. blktrace in 'listen' mode on the target and have blktrace and blkparse
  1833. collect and analyze the data on the host (see the
  1834. ":ref:`profile-manual/profile-manual-usage:Using blktrace Remotely`" section
  1835. below). For the rest of this section we assume you've ssh'ed to the host and
  1836. will be running blkrace on the target.
  1837. .. _blktrace-basic-usage:
  1838. Basic blktrace Usage
  1839. --------------------
  1840. To record a trace, simply run the 'blktrace' command, giving it the name
  1841. of the block device you want to trace activity on: ::
  1842. root@crownbay:~# blktrace /dev/sdc
  1843. In another shell, execute a workload you want to trace. ::
  1844. root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2; sync
  1845. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  1846. linux-2.6.19.2.tar.b 100% \|*******************************\| 41727k 0:00:00 ETA
  1847. Press Ctrl-C in the blktrace shell to stop the trace. It
  1848. will display how many events were logged, along with the per-cpu file
  1849. sizes (blktrace records traces in per-cpu kernel buffers and simply
  1850. dumps them to userspace for blkparse to merge and sort later). ::
  1851. ^C=== sdc ===
  1852. CPU 0: 7082 events, 332 KiB data
  1853. CPU 1: 1578 events, 74 KiB data
  1854. Total: 8660 events (dropped 0), 406 KiB data
  1855. If you examine the files saved to disk, you see multiple files, one per CPU and
  1856. with the device name as the first part of the filename: ::
  1857. root@crownbay:~# ls -al
  1858. drwxr-xr-x 6 root root 1024 Oct 27 22:39 .
  1859. drwxr-sr-x 4 root root 1024 Oct 26 18:24 ..
  1860. -rw-r--r-- 1 root root 339938 Oct 27 22:40 sdc.blktrace.0
  1861. -rw-r--r-- 1 root root 75753 Oct 27 22:40 sdc.blktrace.1
  1862. To view the trace events, simply invoke 'blkparse' in the directory
  1863. containing the trace files, giving it the device name that forms the
  1864. first part of the filenames: ::
  1865. root@crownbay:~# blkparse sdc
  1866. 8,32 1 1 0.000000000 1225 Q WS 3417048 + 8 [jbd2/sdc-8]
  1867. 8,32 1 2 0.000025213 1225 G WS 3417048 + 8 [jbd2/sdc-8]
  1868. 8,32 1 3 0.000033384 1225 P N [jbd2/sdc-8]
  1869. 8,32 1 4 0.000043301 1225 I WS 3417048 + 8 [jbd2/sdc-8]
  1870. 8,32 1 0 0.000057270 0 m N cfq1225 insert_request
  1871. 8,32 1 0 0.000064813 0 m N cfq1225 add_to_rr
  1872. 8,32 1 5 0.000076336 1225 U N [jbd2/sdc-8] 1
  1873. 8,32 1 0 0.000088559 0 m N cfq workload slice:150
  1874. 8,32 1 0 0.000097359 0 m N cfq1225 set_active wl_prio:0 wl_type:1
  1875. 8,32 1 0 0.000104063 0 m N cfq1225 Not idling. st->count:1
  1876. 8,32 1 0 0.000112584 0 m N cfq1225 fifo= (null)
  1877. 8,32 1 0 0.000118730 0 m N cfq1225 dispatch_insert
  1878. 8,32 1 0 0.000127390 0 m N cfq1225 dispatched a request
  1879. 8,32 1 0 0.000133536 0 m N cfq1225 activate rq, drv=1
  1880. 8,32 1 6 0.000136889 1225 D WS 3417048 + 8 [jbd2/sdc-8]
  1881. 8,32 1 7 0.000360381 1225 Q WS 3417056 + 8 [jbd2/sdc-8]
  1882. 8,32 1 8 0.000377422 1225 G WS 3417056 + 8 [jbd2/sdc-8]
  1883. 8,32 1 9 0.000388876 1225 P N [jbd2/sdc-8]
  1884. 8,32 1 10 0.000397886 1225 Q WS 3417064 + 8 [jbd2/sdc-8]
  1885. 8,32 1 11 0.000404800 1225 M WS 3417064 + 8 [jbd2/sdc-8]
  1886. 8,32 1 12 0.000412343 1225 Q WS 3417072 + 8 [jbd2/sdc-8]
  1887. 8,32 1 13 0.000416533 1225 M WS 3417072 + 8 [jbd2/sdc-8]
  1888. 8,32 1 14 0.000422121 1225 Q WS 3417080 + 8 [jbd2/sdc-8]
  1889. 8,32 1 15 0.000425194 1225 M WS 3417080 + 8 [jbd2/sdc-8]
  1890. 8,32 1 16 0.000431968 1225 Q WS 3417088 + 8 [jbd2/sdc-8]
  1891. 8,32 1 17 0.000435251 1225 M WS 3417088 + 8 [jbd2/sdc-8]
  1892. 8,32 1 18 0.000440279 1225 Q WS 3417096 + 8 [jbd2/sdc-8]
  1893. 8,32 1 19 0.000443911 1225 M WS 3417096 + 8 [jbd2/sdc-8]
  1894. 8,32 1 20 0.000450336 1225 Q WS 3417104 + 8 [jbd2/sdc-8]
  1895. 8,32 1 21 0.000454038 1225 M WS 3417104 + 8 [jbd2/sdc-8]
  1896. 8,32 1 22 0.000462070 1225 Q WS 3417112 + 8 [jbd2/sdc-8]
  1897. 8,32 1 23 0.000465422 1225 M WS 3417112 + 8 [jbd2/sdc-8]
  1898. 8,32 1 24 0.000474222 1225 I WS 3417056 + 64 [jbd2/sdc-8]
  1899. 8,32 1 0 0.000483022 0 m N cfq1225 insert_request
  1900. 8,32 1 25 0.000489727 1225 U N [jbd2/sdc-8] 1
  1901. 8,32 1 0 0.000498457 0 m N cfq1225 Not idling. st->count:1
  1902. 8,32 1 0 0.000503765 0 m N cfq1225 dispatch_insert
  1903. 8,32 1 0 0.000512914 0 m N cfq1225 dispatched a request
  1904. 8,32 1 0 0.000518851 0 m N cfq1225 activate rq, drv=2
  1905. .
  1906. .
  1907. .
  1908. 8,32 0 0 58.515006138 0 m N cfq3551 complete rqnoidle 1
  1909. 8,32 0 2024 58.516603269 3 C WS 3156992 + 16 [0]
  1910. 8,32 0 0 58.516626736 0 m N cfq3551 complete rqnoidle 1
  1911. 8,32 0 0 58.516634558 0 m N cfq3551 arm_idle: 8 group_idle: 0
  1912. 8,32 0 0 58.516636933 0 m N cfq schedule dispatch
  1913. 8,32 1 0 58.516971613 0 m N cfq3551 slice expired t=0
  1914. 8,32 1 0 58.516982089 0 m N cfq3551 sl_used=13 disp=6 charge=13 iops=0 sect=80
  1915. 8,32 1 0 58.516985511 0 m N cfq3551 del_from_rr
  1916. 8,32 1 0 58.516990819 0 m N cfq3551 put_queue
  1917. CPU0 (sdc):
  1918. Reads Queued: 0, 0KiB Writes Queued: 331, 26,284KiB
  1919. Read Dispatches: 0, 0KiB Write Dispatches: 485, 40,484KiB
  1920. Reads Requeued: 0 Writes Requeued: 0
  1921. Reads Completed: 0, 0KiB Writes Completed: 511, 41,000KiB
  1922. Read Merges: 0, 0KiB Write Merges: 13, 160KiB
  1923. Read depth: 0 Write depth: 2
  1924. IO unplugs: 23 Timer unplugs: 0
  1925. CPU1 (sdc):
  1926. Reads Queued: 0, 0KiB Writes Queued: 249, 15,800KiB
  1927. Read Dispatches: 0, 0KiB Write Dispatches: 42, 1,600KiB
  1928. Reads Requeued: 0 Writes Requeued: 0
  1929. Reads Completed: 0, 0KiB Writes Completed: 16, 1,084KiB
  1930. Read Merges: 0, 0KiB Write Merges: 40, 276KiB
  1931. Read depth: 0 Write depth: 2
  1932. IO unplugs: 30 Timer unplugs: 1
  1933. Total (sdc):
  1934. Reads Queued: 0, 0KiB Writes Queued: 580, 42,084KiB
  1935. Read Dispatches: 0, 0KiB Write Dispatches: 527, 42,084KiB
  1936. Reads Requeued: 0 Writes Requeued: 0
  1937. Reads Completed: 0, 0KiB Writes Completed: 527, 42,084KiB
  1938. Read Merges: 0, 0KiB Write Merges: 53, 436KiB
  1939. IO unplugs: 53 Timer unplugs: 1
  1940. Throughput (R/W): 0KiB/s / 719KiB/s
  1941. Events (sdc): 6,592 entries
  1942. Skips: 0 forward (0 - 0.0%)
  1943. Input file sdc.blktrace.0 added
  1944. Input file sdc.blktrace.1 added
  1945. The report shows each event that was
  1946. found in the blktrace data, along with a summary of the overall block
  1947. I/O traffic during the run. You can look at the
  1948. `blkparse <http://linux.die.net/man/1/blkparse>`__ manpage to learn the
  1949. meaning of each field displayed in the trace listing.
  1950. .. _blktrace-live-mode:
  1951. Live Mode
  1952. ~~~~~~~~~
  1953. blktrace and blkparse are designed from the ground up to be able to
  1954. operate together in a 'pipe mode' where the stdout of blktrace can be
  1955. fed directly into the stdin of blkparse: ::
  1956. root@crownbay:~# blktrace /dev/sdc -o - | blkparse -i -
  1957. This enables long-lived tracing sessions
  1958. to run without writing anything to disk, and allows the user to look for
  1959. certain conditions in the trace data in 'real-time' by viewing the trace
  1960. output as it scrolls by on the screen or by passing it along to yet
  1961. another program in the pipeline such as grep which can be used to
  1962. identify and capture conditions of interest.
  1963. There's actually another blktrace command that implements the above
  1964. pipeline as a single command, so the user doesn't have to bother typing
  1965. in the above command sequence: ::
  1966. root@crownbay:~# btrace /dev/sdc
  1967. Using blktrace Remotely
  1968. ~~~~~~~~~~~~~~~~~~~~~~~
  1969. Because blktrace traces block I/O and at the same time normally writes
  1970. its trace data to a block device, and in general because it's not really
  1971. a great idea to make the device being traced the same as the device the
  1972. tracer writes to, blktrace provides a way to trace without perturbing
  1973. the traced device at all by providing native support for sending all
  1974. trace data over the network.
  1975. To have blktrace operate in this mode, start blktrace on the target
  1976. system being traced with the -l option, along with the device to trace: ::
  1977. root@crownbay:~# blktrace -l /dev/sdc
  1978. server: waiting for connections...
  1979. On the host system, use the -h option to connect to the target system,
  1980. also passing it the device to trace: ::
  1981. $ blktrace -d /dev/sdc -h 192.168.1.43
  1982. blktrace: connecting to 192.168.1.43
  1983. blktrace: connected!
  1984. On the target system, you should see this: ::
  1985. server: connection from 192.168.1.43
  1986. In another shell, execute a workload you want to trace. ::
  1987. root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2; sync
  1988. Connecting to downloads.yoctoproject.org (140.211.169.59:80)
  1989. linux-2.6.19.2.tar.b 100% \|*******************************\| 41727k 0:00:00 ETA
  1990. When it's done, do a Ctrl-C on the host system to stop the
  1991. trace: ::
  1992. ^C=== sdc ===
  1993. CPU 0: 7691 events, 361 KiB data
  1994. CPU 1: 4109 events, 193 KiB data
  1995. Total: 11800 events (dropped 0), 554 KiB data
  1996. On the target system, you should also see a trace summary for the trace
  1997. just ended: ::
  1998. server: end of run for 192.168.1.43:sdc
  1999. === sdc ===
  2000. CPU 0: 7691 events, 361 KiB data
  2001. CPU 1: 4109 events, 193 KiB data
  2002. Total: 11800 events (dropped 0), 554 KiB data
  2003. The blktrace instance on the host will
  2004. save the target output inside a hostname-timestamp directory: ::
  2005. $ ls -al
  2006. drwxr-xr-x 10 root root 1024 Oct 28 02:40 .
  2007. drwxr-sr-x 4 root root 1024 Oct 26 18:24 ..
  2008. drwxr-xr-x 2 root root 1024 Oct 28 02:40 192.168.1.43-2012-10-28-02:40:56
  2009. cd into that directory to see the output files: ::
  2010. $ ls -l
  2011. -rw-r--r-- 1 root root 369193 Oct 28 02:44 sdc.blktrace.0
  2012. -rw-r--r-- 1 root root 197278 Oct 28 02:44 sdc.blktrace.1
  2013. And run blkparse on the host system using the device name: ::
  2014. $ blkparse sdc
  2015. 8,32 1 1 0.000000000 1263 Q RM 6016 + 8 [ls]
  2016. 8,32 1 0 0.000036038 0 m N cfq1263 alloced
  2017. 8,32 1 2 0.000039390 1263 G RM 6016 + 8 [ls]
  2018. 8,32 1 3 0.000049168 1263 I RM 6016 + 8 [ls]
  2019. 8,32 1 0 0.000056152 0 m N cfq1263 insert_request
  2020. 8,32 1 0 0.000061600 0 m N cfq1263 add_to_rr
  2021. 8,32 1 0 0.000075498 0 m N cfq workload slice:300
  2022. .
  2023. .
  2024. .
  2025. 8,32 0 0 177.266385696 0 m N cfq1267 arm_idle: 8 group_idle: 0
  2026. 8,32 0 0 177.266388140 0 m N cfq schedule dispatch
  2027. 8,32 1 0 177.266679239 0 m N cfq1267 slice expired t=0
  2028. 8,32 1 0 177.266689297 0 m N cfq1267 sl_used=9 disp=6 charge=9 iops=0 sect=56
  2029. 8,32 1 0 177.266692649 0 m N cfq1267 del_from_rr
  2030. 8,32 1 0 177.266696560 0 m N cfq1267 put_queue
  2031. CPU0 (sdc):
  2032. Reads Queued: 0, 0KiB Writes Queued: 270, 21,708KiB
  2033. Read Dispatches: 59, 2,628KiB Write Dispatches: 495, 39,964KiB
  2034. Reads Requeued: 0 Writes Requeued: 0
  2035. Reads Completed: 90, 2,752KiB Writes Completed: 543, 41,596KiB
  2036. Read Merges: 0, 0KiB Write Merges: 9, 344KiB
  2037. Read depth: 2 Write depth: 2
  2038. IO unplugs: 20 Timer unplugs: 1
  2039. CPU1 (sdc):
  2040. Reads Queued: 688, 2,752KiB Writes Queued: 381, 20,652KiB
  2041. Read Dispatches: 31, 124KiB Write Dispatches: 59, 2,396KiB
  2042. Reads Requeued: 0 Writes Requeued: 0
  2043. Reads Completed: 0, 0KiB Writes Completed: 11, 764KiB
  2044. Read Merges: 598, 2,392KiB Write Merges: 88, 448KiB
  2045. Read depth: 2 Write depth: 2
  2046. IO unplugs: 52 Timer unplugs: 0
  2047. Total (sdc):
  2048. Reads Queued: 688, 2,752KiB Writes Queued: 651, 42,360KiB
  2049. Read Dispatches: 90, 2,752KiB Write Dispatches: 554, 42,360KiB
  2050. Reads Requeued: 0 Writes Requeued: 0
  2051. Reads Completed: 90, 2,752KiB Writes Completed: 554, 42,360KiB
  2052. Read Merges: 598, 2,392KiB Write Merges: 97, 792KiB
  2053. IO unplugs: 72 Timer unplugs: 1
  2054. Throughput (R/W): 15KiB/s / 238KiB/s
  2055. Events (sdc): 9,301 entries
  2056. Skips: 0 forward (0 - 0.0%)
  2057. You should see the trace events and summary just as you would have if you'd run
  2058. the same command on the target.
  2059. Tracing Block I/O via 'ftrace'
  2060. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  2061. It's also possible to trace block I/O using only
  2062. :ref:`profile-manual/profile-manual-usage:The 'trace events' Subsystem`, which
  2063. can be useful for casual tracing if you don't want to bother dealing with the
  2064. userspace tools.
  2065. To enable tracing for a given device, use /sys/block/xxx/trace/enable,
  2066. where xxx is the device name. This for example enables tracing for
  2067. /dev/sdc: ::
  2068. root@crownbay:/sys/kernel/debug/tracing# echo 1 > /sys/block/sdc/trace/enable
  2069. Once you've selected the device(s) you want
  2070. to trace, selecting the 'blk' tracer will turn the blk tracer on: ::
  2071. root@crownbay:/sys/kernel/debug/tracing# cat available_tracers
  2072. blk function_graph function nop
  2073. root@crownbay:/sys/kernel/debug/tracing# echo blk > current_tracer
  2074. Execute the workload you're interested in: ::
  2075. root@crownbay:/sys/kernel/debug/tracing# cat /media/sdc/testfile.txt
  2076. And look at the output (note here that we're using 'trace_pipe' instead of
  2077. trace to capture this trace - this allows us to wait around on the pipe
  2078. for data to appear): ::
  2079. root@crownbay:/sys/kernel/debug/tracing# cat trace_pipe
  2080. cat-3587 [001] d..1 3023.276361: 8,32 Q R 1699848 + 8 [cat]
  2081. cat-3587 [001] d..1 3023.276410: 8,32 m N cfq3587 alloced
  2082. cat-3587 [001] d..1 3023.276415: 8,32 G R 1699848 + 8 [cat]
  2083. cat-3587 [001] d..1 3023.276424: 8,32 P N [cat]
  2084. cat-3587 [001] d..2 3023.276432: 8,32 I R 1699848 + 8 [cat]
  2085. cat-3587 [001] d..1 3023.276439: 8,32 m N cfq3587 insert_request
  2086. cat-3587 [001] d..1 3023.276445: 8,32 m N cfq3587 add_to_rr
  2087. cat-3587 [001] d..2 3023.276454: 8,32 U N [cat] 1
  2088. cat-3587 [001] d..1 3023.276464: 8,32 m N cfq workload slice:150
  2089. cat-3587 [001] d..1 3023.276471: 8,32 m N cfq3587 set_active wl_prio:0 wl_type:2
  2090. cat-3587 [001] d..1 3023.276478: 8,32 m N cfq3587 fifo= (null)
  2091. cat-3587 [001] d..1 3023.276483: 8,32 m N cfq3587 dispatch_insert
  2092. cat-3587 [001] d..1 3023.276490: 8,32 m N cfq3587 dispatched a request
  2093. cat-3587 [001] d..1 3023.276497: 8,32 m N cfq3587 activate rq, drv=1
  2094. cat-3587 [001] d..2 3023.276500: 8,32 D R 1699848 + 8 [cat]
  2095. And this turns off tracing for the specified device: ::
  2096. root@crownbay:/sys/kernel/debug/tracing# echo 0 > /sys/block/sdc/trace/enable
  2097. .. _blktrace-documentation:
  2098. blktrace Documentation
  2099. ----------------------
  2100. Online versions of the man pages for the commands discussed in this
  2101. section can be found here:
  2102. - http://linux.die.net/man/8/blktrace
  2103. - http://linux.die.net/man/1/blkparse
  2104. - http://linux.die.net/man/8/btrace
  2105. The above manpages, along with manpages for the other blktrace utilities
  2106. (btt, blkiomon, etc) can be found in the /doc directory of the blktrace
  2107. tools git repo: ::
  2108. $ git clone git://git.kernel.dk/blktrace.git