573 lines
40 KiB
HTML
573 lines
40 KiB
HTML
<!DOCTYPE html>
|
||
|
||
<html lang="en" data-content_root="../">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
<meta property="og:title" content="Python support for the Linux perf profiler" />
|
||
<meta property="og:type" content="website" />
|
||
<meta property="og:url" content="https://docs.python.org/3/howto/perf_profiling.html" />
|
||
<meta property="og:site_name" content="Python documentation" />
|
||
<meta property="og:description" content="author, Pablo Galindo,. The Linux perf profiler is a very powerful tool that allows you to profile and obtain information about the performance of your application. perf also has a very vibrant eco..." />
|
||
<meta property="og:image" content="https://docs.python.org/3/_static/og-image.png" />
|
||
<meta property="og:image:alt" content="Python documentation" />
|
||
<meta name="description" content="author, Pablo Galindo,. The Linux perf profiler is a very powerful tool that allows you to profile and obtain information about the performance of your application. perf also has a very vibrant eco..." />
|
||
<meta property="og:image:width" content="200">
|
||
<meta property="og:image:height" content="200">
|
||
<meta name="theme-color" content="#3776ab">
|
||
|
||
<title>Python support for the Linux perf profiler — Python 3.13.3 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/pydoctheme.css?v=23252803" />
|
||
<link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="../_static/pygments_dark.css?v=5349f25f" />
|
||
|
||
<script src="../_static/documentation_options.js?v=5d57ca2d"></script>
|
||
<script src="../_static/doctools.js?v=9bcbadda"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
|
||
<script src="../_static/sidebar.js"></script>
|
||
|
||
<link rel="search" type="application/opensearchdescription+xml"
|
||
title="Search within Python 3.13.3 documentation"
|
||
href="../_static/opensearch.xml"/>
|
||
<link rel="author" title="About these documents" href="../about.html" />
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="copyright" title="Copyright" href="../copyright.html" />
|
||
<link rel="next" title="Annotations Best Practices" href="annotations.html" />
|
||
<link rel="prev" title="Instrumenting CPython with DTrace and SystemTap" href="instrumentation.html" />
|
||
|
||
<link rel="canonical" href="https://docs.python.org/3/howto/perf_profiling.html">
|
||
|
||
|
||
|
||
|
||
|
||
<style>
|
||
@media only screen {
|
||
table.full-width-table {
|
||
width: 100%;
|
||
}
|
||
}
|
||
</style>
|
||
<link rel="stylesheet" href="../_static/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css">
|
||
<link rel="shortcut icon" type="image/png" href="../_static/py.svg" />
|
||
<script type="text/javascript" src="../_static/copybutton.js"></script>
|
||
<script type="text/javascript" src="../_static/menu.js"></script>
|
||
<script type="text/javascript" src="../_static/search-focus.js"></script>
|
||
<script type="text/javascript" src="../_static/themetoggle.js"></script>
|
||
<script type="text/javascript" src="../_static/rtd_switcher.js"></script>
|
||
<meta name="readthedocs-addons-api-version" content="1">
|
||
|
||
</head>
|
||
<body>
|
||
<div class="mobile-nav">
|
||
<input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation"
|
||
aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu" />
|
||
<nav class="nav-content" role="navigation">
|
||
<label for="menuToggler" class="toggler__label">
|
||
<span></span>
|
||
</label>
|
||
<span class="nav-items-wrapper">
|
||
<a href="https://www.python.org/" class="nav-logo">
|
||
<img src="../_static/py.svg" alt="Python logo"/>
|
||
</a>
|
||
<span class="version_switcher_placeholder"></span>
|
||
<form role="search" class="search" action="../search.html" method="get">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon">
|
||
<path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path>
|
||
</svg>
|
||
<input placeholder="Quick search" aria-label="Quick search" type="search" name="q" />
|
||
<input type="submit" value="Go"/>
|
||
</form>
|
||
</span>
|
||
</nav>
|
||
<div class="menu-wrapper">
|
||
<nav class="menu" role="navigation" aria-label="main navigation">
|
||
<div class="language_switcher_placeholder"></div>
|
||
|
||
<label class="theme-selector-label">
|
||
Theme
|
||
<select class="theme-selector" oninput="activateTheme(this.value)">
|
||
<option value="auto" selected>Auto</option>
|
||
<option value="light">Light</option>
|
||
<option value="dark">Dark</option>
|
||
</select>
|
||
</label>
|
||
<div>
|
||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||
<ul>
|
||
<li><a class="reference internal" href="#">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a><ul>
|
||
<li><a class="reference internal" href="#how-to-enable-perf-profiling-support">How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support</a></li>
|
||
<li><a class="reference internal" href="#how-to-obtain-the-best-results">How to obtain the best results</a></li>
|
||
<li><a class="reference internal" href="#how-to-work-without-frame-pointers">How to work without frame pointers</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
<div>
|
||
<h4>Previous topic</h4>
|
||
<p class="topless"><a href="instrumentation.html"
|
||
title="previous chapter">Instrumenting CPython with DTrace and SystemTap</a></p>
|
||
</div>
|
||
<div>
|
||
<h4>Next topic</h4>
|
||
<p class="topless"><a href="annotations.html"
|
||
title="next chapter">Annotations Best Practices</a></p>
|
||
</div>
|
||
<div role="note" aria-label="source link">
|
||
<h3>This Page</h3>
|
||
<ul class="this-page-menu">
|
||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||
<li>
|
||
<a href="https://github.com/python/cpython/blob/main/Doc/howto/perf_profiling.rst"
|
||
rel="nofollow">Show Source
|
||
</a>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<div class="related" role="navigation" aria-label="Related">
|
||
<h3>Navigation</h3>
|
||
<ul>
|
||
<li class="right" style="margin-right: 10px">
|
||
<a href="../genindex.html" title="General Index"
|
||
accesskey="I">index</a></li>
|
||
<li class="right" >
|
||
<a href="../py-modindex.html" title="Python Module Index"
|
||
>modules</a> |</li>
|
||
<li class="right" >
|
||
<a href="annotations.html" title="Annotations Best Practices"
|
||
accesskey="N">next</a> |</li>
|
||
<li class="right" >
|
||
<a href="instrumentation.html" title="Instrumenting CPython with DTrace and SystemTap"
|
||
accesskey="P">previous</a> |</li>
|
||
|
||
<li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
|
||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||
<li class="switchers">
|
||
<div class="language_switcher_placeholder"></div>
|
||
<div class="version_switcher_placeholder"></div>
|
||
</li>
|
||
<li>
|
||
|
||
</li>
|
||
<li id="cpython-language-and-version">
|
||
<a href="../index.html">3.13.3 Documentation</a> »
|
||
</li>
|
||
|
||
<li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
|
||
<li class="nav-item nav-item-this"><a href="">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a></li>
|
||
<li class="right">
|
||
|
||
|
||
<div class="inline-search" role="search">
|
||
<form class="inline-search" action="../search.html" method="get">
|
||
<input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
|
||
<input type="submit" value="Go" />
|
||
</form>
|
||
</div>
|
||
|
|
||
</li>
|
||
<li class="right">
|
||
<label class="theme-selector-label">
|
||
Theme
|
||
<select class="theme-selector" oninput="activateTheme(this.value)">
|
||
<option value="auto" selected>Auto</option>
|
||
<option value="light">Light</option>
|
||
<option value="dark">Dark</option>
|
||
</select>
|
||
</label> |</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="document">
|
||
<div class="documentwrapper">
|
||
<div class="bodywrapper">
|
||
<div class="body" role="main">
|
||
|
||
<section id="python-support-for-the-linux-perf-profiler">
|
||
<span id="perf-profiling"></span><h1>Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler<a class="headerlink" href="#python-support-for-the-linux-perf-profiler" title="Link to this heading">¶</a></h1>
|
||
<dl class="field-list simple">
|
||
<dt class="field-odd">author<span class="colon">:</span></dt>
|
||
<dd class="field-odd"><p>Pablo Galindo</p>
|
||
</dd>
|
||
</dl>
|
||
<p><a class="reference external" href="https://perf.wiki.kernel.org">The Linux perf profiler</a>
|
||
is a very powerful tool that allows you to profile and obtain
|
||
information about the performance of your application.
|
||
<code class="docutils literal notranslate"><span class="pre">perf</span></code> also has a very vibrant ecosystem of tools
|
||
that aid with the analysis of the data that it produces.</p>
|
||
<p>The main problem with using the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler with Python applications is that
|
||
<code class="docutils literal notranslate"><span class="pre">perf</span></code> only gets information about native symbols, that is, the names of
|
||
functions and procedures written in C. This means that the names and file names
|
||
of Python functions in your code will not appear in the output of <code class="docutils literal notranslate"><span class="pre">perf</span></code>.</p>
|
||
<p>Since Python 3.12, the interpreter can run in a special mode that allows Python
|
||
functions to appear in the output of the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler. When this mode is
|
||
enabled, the interpreter will interpose a small piece of code compiled on the
|
||
fly before the execution of every Python function and it will teach <code class="docutils literal notranslate"><span class="pre">perf</span></code> the
|
||
relationship between this piece of code and the associated Python function using
|
||
<a class="reference internal" href="../c-api/perfmaps.html"><span class="doc">perf map files</span></a>.</p>
|
||
<div class="admonition note">
|
||
<p class="admonition-title">Note</p>
|
||
<p>Support for the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler is currently only available for Linux on
|
||
select architectures. Check the output of the <code class="docutils literal notranslate"><span class="pre">configure</span></code> build step or
|
||
check the output of <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">-m</span> <span class="pre">sysconfig</span> <span class="pre">|</span> <span class="pre">grep</span> <span class="pre">HAVE_PERF_TRAMPOLINE</span></code>
|
||
to see if your system is supported.</p>
|
||
</div>
|
||
<p>For example, consider the following script:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">foo</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="mi">0</span>
|
||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||
<span class="n">result</span> <span class="o">+=</span> <span class="mi">1</span>
|
||
<span class="k">return</span> <span class="n">result</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">bar</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||
<span class="n">foo</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">baz</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||
<span class="n">bar</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
||
<span class="n">baz</span><span class="p">(</span><span class="mi">1000000</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>We can run <code class="docutils literal notranslate"><span class="pre">perf</span></code> to sample CPU stack traces at 9999 hertz:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
|
||
</pre></div>
|
||
</div>
|
||
<p>Then we can use <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">report</span></code> to analyze the data:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>--stdio<span class="w"> </span>-n<span class="w"> </span>-g
|
||
|
||
<span class="gp"># </span>Children<span class="w"> </span>Self<span class="w"> </span>Samples<span class="w"> </span>Command<span class="w"> </span>Shared<span class="w"> </span>Object<span class="w"> </span>Symbol
|
||
<span class="gp"># </span>........<span class="w"> </span>........<span class="w"> </span>............<span class="w"> </span>..........<span class="w"> </span>..................<span class="w"> </span>..........................................
|
||
<span class="gp">#</span>
|
||
<span class="go"> 91.08% 0.00% 0 python.exe python.exe [.] _start</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> ---_start</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> --90.71%--__libc_start_main</span>
|
||
<span class="go"> Py_BytesMain</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> |--56.88%--pymain_run_python.constprop.0</span>
|
||
<span class="go"> | |</span>
|
||
<span class="go"> | |--56.13%--_PyRun_AnyFileObject</span>
|
||
<span class="go"> | | _PyRun_SimpleFileObject</span>
|
||
<span class="go"> | | |</span>
|
||
<span class="go"> | | |--55.02%--run_mod</span>
|
||
<span class="go"> | | | |</span>
|
||
<span class="go"> | | | --54.65%--PyEval_EvalCode</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | |</span>
|
||
<span class="go"> | | | |--51.67%--_PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | | |</span>
|
||
<span class="go"> | | | | |--11.52%--_PyLong_Add</span>
|
||
<span class="go"> | | | | | |</span>
|
||
<span class="go"> | | | | | |--2.97%--_PyObject_Malloc</span>
|
||
<span class="go">...</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>As you can see, the Python functions are not shown in the output, only <code class="docutils literal notranslate"><span class="pre">_PyEval_EvalFrameDefault</span></code>
|
||
(the function that evaluates the Python bytecode) shows up. Unfortunately that’s not very useful because all Python
|
||
functions use the same C function to evaluate bytecode so we cannot know which Python function corresponds to which
|
||
bytecode-evaluating function.</p>
|
||
<p>Instead, if we run the same experiment with <code class="docutils literal notranslate"><span class="pre">perf</span></code> support enabled we get:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>--stdio<span class="w"> </span>-n<span class="w"> </span>-g
|
||
|
||
<span class="gp"># </span>Children<span class="w"> </span>Self<span class="w"> </span>Samples<span class="w"> </span>Command<span class="w"> </span>Shared<span class="w"> </span>Object<span class="w"> </span>Symbol
|
||
<span class="gp"># </span>........<span class="w"> </span>........<span class="w"> </span>............<span class="w"> </span>..........<span class="w"> </span>..................<span class="w"> </span>.....................................................................
|
||
<span class="gp">#</span>
|
||
<span class="go"> 90.58% 0.36% 1 python.exe python.exe [.] _start</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> ---_start</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> --89.86%--__libc_start_main</span>
|
||
<span class="go"> Py_BytesMain</span>
|
||
<span class="go"> |</span>
|
||
<span class="go"> |--55.43%--pymain_run_python.constprop.0</span>
|
||
<span class="go"> | |</span>
|
||
<span class="go"> | |--54.71%--_PyRun_AnyFileObject</span>
|
||
<span class="go"> | | _PyRun_SimpleFileObject</span>
|
||
<span class="go"> | | |</span>
|
||
<span class="go"> | | |--53.62%--run_mod</span>
|
||
<span class="go"> | | | |</span>
|
||
<span class="go"> | | | --53.26%--PyEval_EvalCode</span>
|
||
<span class="go"> | | | py::<module>:/src/script.py</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | py::baz:/src/script.py</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | py::bar:/src/script.py</span>
|
||
<span class="go"> | | | _PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | PyObject_Vectorcall</span>
|
||
<span class="go"> | | | _PyEval_Vector</span>
|
||
<span class="go"> | | | py::foo:/src/script.py</span>
|
||
<span class="go"> | | | |</span>
|
||
<span class="go"> | | | |--51.81%--_PyEval_EvalFrameDefault</span>
|
||
<span class="go"> | | | | |</span>
|
||
<span class="go"> | | | | |--13.77%--_PyLong_Add</span>
|
||
<span class="go"> | | | | | |</span>
|
||
<span class="go"> | | | | | |--3.26%--_PyObject_Malloc</span>
|
||
</pre></div>
|
||
</div>
|
||
<section id="how-to-enable-perf-profiling-support">
|
||
<h2>How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support<a class="headerlink" href="#how-to-enable-perf-profiling-support" title="Link to this heading">¶</a></h2>
|
||
<p><code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support can be enabled either from the start using
|
||
the environment variable <span class="target" id="index-0"></span><a class="reference internal" href="../using/cmdline.html#envvar-PYTHONPERFSUPPORT"><code class="xref std std-envvar docutils literal notranslate"><span class="pre">PYTHONPERFSUPPORT</span></code></a> or the
|
||
<a class="reference internal" href="../using/cmdline.html#cmdoption-X"><code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span> <span class="pre">perf</span></code></a> option,
|
||
or dynamically using <a class="reference internal" href="../library/sys.html#sys.activate_stack_trampoline" title="sys.activate_stack_trampoline"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.activate_stack_trampoline()</span></code></a> and
|
||
<a class="reference internal" href="../library/sys.html#sys.deactivate_stack_trampoline" title="sys.deactivate_stack_trampoline"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.deactivate_stack_trampoline()</span></code></a>.</p>
|
||
<p>The <code class="xref py py-mod docutils literal notranslate"><span class="pre">sys</span></code> functions take precedence over the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option,
|
||
the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option takes precedence over the environment variable.</p>
|
||
<p>Example, using the environment variable:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nv">PYTHONPERFSUPPORT</span><span class="o">=</span><span class="m">1</span><span class="w"> </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
|
||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
|
||
</pre></div>
|
||
</div>
|
||
<p>Example, using the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>-X<span class="w"> </span>perf<span class="w"> </span>my_script.py
|
||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
|
||
</pre></div>
|
||
</div>
|
||
<p>Example, using the <a class="reference internal" href="../library/sys.html#module-sys" title="sys: Access system-specific parameters and functions."><code class="xref py py-mod docutils literal notranslate"><span class="pre">sys</span></code></a> APIs in file <code class="file docutils literal notranslate"><span class="pre">example.py</span></code>:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">sys</span>
|
||
|
||
<span class="n">sys</span><span class="o">.</span><span class="n">activate_stack_trampoline</span><span class="p">(</span><span class="s2">"perf"</span><span class="p">)</span>
|
||
<span class="n">do_profiled_stuff</span><span class="p">()</span>
|
||
<span class="n">sys</span><span class="o">.</span><span class="n">deactivate_stack_trampoline</span><span class="p">()</span>
|
||
|
||
<span class="n">non_profiled_stuff</span><span class="p">()</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>…then:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>./example.py
|
||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="how-to-obtain-the-best-results">
|
||
<h2>How to obtain the best results<a class="headerlink" href="#how-to-obtain-the-best-results" title="Link to this heading">¶</a></h2>
|
||
<p>For best results, Python should be compiled with
|
||
<code class="docutils literal notranslate"><span class="pre">CFLAGS="-fno-omit-frame-pointer</span> <span class="pre">-mno-omit-leaf-frame-pointer"</span></code> as this allows
|
||
profilers to unwind using only the frame pointer and not on DWARF debug
|
||
information. This is because as the code that is interposed to allow <code class="docutils literal notranslate"><span class="pre">perf</span></code>
|
||
support is dynamically generated it doesn’t have any DWARF debugging information
|
||
available.</p>
|
||
<p>You can check if your system has been compiled with this flag by running:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>sysconfig<span class="w"> </span><span class="p">|</span><span class="w"> </span>grep<span class="w"> </span><span class="s1">'no-omit-frame-pointer'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>If you don’t see any output it means that your interpreter has not been compiled with
|
||
frame pointers and therefore it may not be able to show Python functions in the output
|
||
of <code class="docutils literal notranslate"><span class="pre">perf</span></code>.</p>
|
||
</section>
|
||
<section id="how-to-work-without-frame-pointers">
|
||
<h2>How to work without frame pointers<a class="headerlink" href="#how-to-work-without-frame-pointers" title="Link to this heading">¶</a></h2>
|
||
<p>If you are working with a Python interpreter that has been compiled without
|
||
frame pointers, you can still use the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler, but the overhead will be
|
||
a bit higher because Python needs to generate unwinding information for every
|
||
Python function call on the fly. Additionally, <code class="docutils literal notranslate"><span class="pre">perf</span></code> will take more time to
|
||
process the data because it will need to use the DWARF debugging information to
|
||
unwind the stack and this is a slow process.</p>
|
||
<p>To enable this mode, you can use the environment variable
|
||
<span class="target" id="index-1"></span><a class="reference internal" href="../using/cmdline.html#envvar-PYTHON_PERF_JIT_SUPPORT"><code class="xref std std-envvar docutils literal notranslate"><span class="pre">PYTHON_PERF_JIT_SUPPORT</span></code></a> or the <a class="reference internal" href="../using/cmdline.html#cmdoption-X"><code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span> <span class="pre">perf_jit</span></code></a> option,
|
||
which will enable the JIT mode for the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler.</p>
|
||
<div class="admonition note">
|
||
<p class="admonition-title">Note</p>
|
||
<p>Due to a bug in the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool, only <code class="docutils literal notranslate"><span class="pre">perf</span></code> versions higher than v6.8
|
||
will work with the JIT mode. The fix was also backported to the v6.7.2
|
||
version of the tool.</p>
|
||
<p>Note that when checking the version of the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool (which can be done
|
||
by running <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">version</span></code>) you must take into account that some distros
|
||
add some custom version numbers including a <code class="docutils literal notranslate"><span class="pre">-</span></code> character. This means
|
||
that <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">6.7-3</span></code> is not necessarily <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">6.7.3</span></code>.</p>
|
||
</div>
|
||
<p>When using the perf JIT mode, you need an extra step before you can run <code class="docutils literal notranslate"><span class="pre">perf</span>
|
||
<span class="pre">report</span></code>. You need to call the <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">inject</span></code> command to inject the JIT
|
||
information into the <code class="docutils literal notranslate"><span class="pre">perf.data</span></code> file.:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-k<span class="w"> </span><span class="m">1</span><span class="w"> </span>--call-graph<span class="w"> </span>dwarf<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>-Xperf_jit<span class="w"> </span>my_script.py
|
||
<span class="gp">$ </span>perf<span class="w"> </span>inject<span class="w"> </span>-i<span class="w"> </span>perf.data<span class="w"> </span>--jit<span class="w"> </span>--output<span class="w"> </span>perf.jit.data
|
||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.jit.data
|
||
</pre></div>
|
||
</div>
|
||
<p>or using the environment variable:</p>
|
||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nv">PYTHON_PERF_JIT_SUPPORT</span><span class="o">=</span><span class="m">1</span><span class="w"> </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>--call-graph<span class="w"> </span>dwarf<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
|
||
<span class="gp">$ </span>perf<span class="w"> </span>inject<span class="w"> </span>-i<span class="w"> </span>perf.data<span class="w"> </span>--jit<span class="w"> </span>--output<span class="w"> </span>perf.jit.data
|
||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.jit.data
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">inject</span> <span class="pre">--jit</span></code> command will read <code class="docutils literal notranslate"><span class="pre">perf.data</span></code>,
|
||
automatically pick up the perf dump file that Python creates (in
|
||
<code class="docutils literal notranslate"><span class="pre">/tmp/perf-$PID.dump</span></code>), and then create <code class="docutils literal notranslate"><span class="pre">perf.jit.data</span></code> which merges all the
|
||
JIT information together. It should also create a lot of <code class="docutils literal notranslate"><span class="pre">jitted-XXXX-N.so</span></code>
|
||
files in the current directory which are ELF images for all the JIT trampolines
|
||
that were created by Python.</p>
|
||
<div class="admonition warning">
|
||
<p class="admonition-title">Warning</p>
|
||
<p>Notice that when using <code class="docutils literal notranslate"><span class="pre">--call-graph</span> <span class="pre">dwarf</span></code> the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool will take
|
||
snapshots of the stack of the process being profiled and save the
|
||
information in the <code class="docutils literal notranslate"><span class="pre">perf.data</span></code> file. By default the size of the stack dump
|
||
is 8192 bytes but the user can change the size by passing the size after
|
||
comma like <code class="docutils literal notranslate"><span class="pre">--call-graph</span> <span class="pre">dwarf,4096</span></code>. The size of the stack dump is
|
||
important because if the size is too small <code class="docutils literal notranslate"><span class="pre">perf</span></code> will not be able to
|
||
unwind the stack and the output will be incomplete. On the other hand, if
|
||
the size is too big, then <code class="docutils literal notranslate"><span class="pre">perf</span></code> won’t be able to sample the process as
|
||
frequently as it would like as the overhead will be higher.</p>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
<div class="clearer"></div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="sphinxsidebar" role="navigation" aria-label="Main">
|
||
<div class="sphinxsidebarwrapper">
|
||
<div>
|
||
<h3><a href="../contents.html">Table of Contents</a></h3>
|
||
<ul>
|
||
<li><a class="reference internal" href="#">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a><ul>
|
||
<li><a class="reference internal" href="#how-to-enable-perf-profiling-support">How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support</a></li>
|
||
<li><a class="reference internal" href="#how-to-obtain-the-best-results">How to obtain the best results</a></li>
|
||
<li><a class="reference internal" href="#how-to-work-without-frame-pointers">How to work without frame pointers</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
<div>
|
||
<h4>Previous topic</h4>
|
||
<p class="topless"><a href="instrumentation.html"
|
||
title="previous chapter">Instrumenting CPython with DTrace and SystemTap</a></p>
|
||
</div>
|
||
<div>
|
||
<h4>Next topic</h4>
|
||
<p class="topless"><a href="annotations.html"
|
||
title="next chapter">Annotations Best Practices</a></p>
|
||
</div>
|
||
<div role="note" aria-label="source link">
|
||
<h3>This Page</h3>
|
||
<ul class="this-page-menu">
|
||
<li><a href="../bugs.html">Report a Bug</a></li>
|
||
<li>
|
||
<a href="https://github.com/python/cpython/blob/main/Doc/howto/perf_profiling.rst"
|
||
rel="nofollow">Show Source
|
||
</a>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<div id="sidebarbutton" title="Collapse sidebar">
|
||
<span>«</span>
|
||
</div>
|
||
|
||
</div>
|
||
<div class="clearer"></div>
|
||
</div>
|
||
<div class="related" role="navigation" aria-label="Related">
|
||
<h3>Navigation</h3>
|
||
<ul>
|
||
<li class="right" style="margin-right: 10px">
|
||
<a href="../genindex.html" title="General Index"
|
||
>index</a></li>
|
||
<li class="right" >
|
||
<a href="../py-modindex.html" title="Python Module Index"
|
||
>modules</a> |</li>
|
||
<li class="right" >
|
||
<a href="annotations.html" title="Annotations Best Practices"
|
||
>next</a> |</li>
|
||
<li class="right" >
|
||
<a href="instrumentation.html" title="Instrumenting CPython with DTrace and SystemTap"
|
||
>previous</a> |</li>
|
||
|
||
<li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
|
||
<li><a href="https://www.python.org/">Python</a> »</li>
|
||
<li class="switchers">
|
||
<div class="language_switcher_placeholder"></div>
|
||
<div class="version_switcher_placeholder"></div>
|
||
</li>
|
||
<li>
|
||
|
||
</li>
|
||
<li id="cpython-language-and-version">
|
||
<a href="../index.html">3.13.3 Documentation</a> »
|
||
</li>
|
||
|
||
<li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
|
||
<li class="nav-item nav-item-this"><a href="">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a></li>
|
||
<li class="right">
|
||
|
||
|
||
<div class="inline-search" role="search">
|
||
<form class="inline-search" action="../search.html" method="get">
|
||
<input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
|
||
<input type="submit" value="Go" />
|
||
</form>
|
||
</div>
|
||
|
|
||
</li>
|
||
<li class="right">
|
||
<label class="theme-selector-label">
|
||
Theme
|
||
<select class="theme-selector" oninput="activateTheme(this.value)">
|
||
<option value="auto" selected>Auto</option>
|
||
<option value="light">Light</option>
|
||
<option value="dark">Dark</option>
|
||
</select>
|
||
</label> |</li>
|
||
|
||
</ul>
|
||
</div>
|
||
<div class="footer">
|
||
©
|
||
<a href="../copyright.html">
|
||
|
||
Copyright
|
||
|
||
</a>
|
||
2001-2025, Python Software Foundation.
|
||
<br />
|
||
This page is licensed under the Python Software Foundation License Version 2.
|
||
<br />
|
||
Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License.
|
||
<br />
|
||
|
||
See <a href="/license.html">History and License</a> for more information.<br />
|
||
|
||
|
||
<br />
|
||
|
||
The Python Software Foundation is a non-profit corporation.
|
||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
|
||
<br />
|
||
<br />
|
||
Last updated on Apr 08, 2025 (14:33 UTC).
|
||
|
||
<a href="/bugs.html">Found a bug</a>?
|
||
|
||
<br />
|
||
|
||
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
|
||
</div>
|
||
|
||
</body>
|
||
</html> |