mirror of
				https://github.com/bunny-lab-io/Borealis.git
				synced 2025-11-03 19:41:57 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			573 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			573 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html>
 | 
						||
 | 
						||
<html lang="en" data-content_root="../">
 | 
						||
  <head>
 | 
						||
    <meta charset="utf-8" />
 | 
						||
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 | 
						||
<meta property="og:title" content="Python support for the Linux perf profiler" />
 | 
						||
<meta property="og:type" content="website" />
 | 
						||
<meta property="og:url" content="https://docs.python.org/3/howto/perf_profiling.html" />
 | 
						||
<meta property="og:site_name" content="Python documentation" />
 | 
						||
<meta property="og:description" content="author, Pablo Galindo,. The Linux perf profiler is a very powerful tool that allows you to profile and obtain information about the performance of your application. perf also has a very vibrant eco..." />
 | 
						||
<meta property="og:image" content="https://docs.python.org/3/_static/og-image.png" />
 | 
						||
<meta property="og:image:alt" content="Python documentation" />
 | 
						||
<meta name="description" content="author, Pablo Galindo,. The Linux perf profiler is a very powerful tool that allows you to profile and obtain information about the performance of your application. perf also has a very vibrant eco..." />
 | 
						||
<meta property="og:image:width" content="200">
 | 
						||
<meta property="og:image:height" content="200">
 | 
						||
<meta name="theme-color" content="#3776ab">
 | 
						||
 | 
						||
    <title>Python support for the Linux perf profiler — Python 3.13.3 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
						||
    
 | 
						||
    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
 | 
						||
    <link rel="stylesheet" type="text/css" href="../_static/pydoctheme.css?v=23252803" />
 | 
						||
    <link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="../_static/pygments_dark.css?v=5349f25f" />
 | 
						||
    
 | 
						||
    <script src="../_static/documentation_options.js?v=5d57ca2d"></script>
 | 
						||
    <script src="../_static/doctools.js?v=9bcbadda"></script>
 | 
						||
    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 | 
						||
    
 | 
						||
    <script src="../_static/sidebar.js"></script>
 | 
						||
    
 | 
						||
    <link rel="search" type="application/opensearchdescription+xml"
 | 
						||
          title="Search within Python 3.13.3 documentation"
 | 
						||
          href="../_static/opensearch.xml"/>
 | 
						||
    <link rel="author" title="About these documents" href="../about.html" />
 | 
						||
    <link rel="index" title="Index" href="../genindex.html" />
 | 
						||
    <link rel="search" title="Search" href="../search.html" />
 | 
						||
    <link rel="copyright" title="Copyright" href="../copyright.html" />
 | 
						||
    <link rel="next" title="Annotations Best Practices" href="annotations.html" />
 | 
						||
    <link rel="prev" title="Instrumenting CPython with DTrace and SystemTap" href="instrumentation.html" />
 | 
						||
    
 | 
						||
    <link rel="canonical" href="https://docs.python.org/3/howto/perf_profiling.html">
 | 
						||
    
 | 
						||
      
 | 
						||
    
 | 
						||
 | 
						||
    
 | 
						||
    <style>
 | 
						||
      @media only screen {
 | 
						||
        table.full-width-table {
 | 
						||
            width: 100%;
 | 
						||
        }
 | 
						||
      }
 | 
						||
    </style>
 | 
						||
<link rel="stylesheet" href="../_static/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css">
 | 
						||
    <link rel="shortcut icon" type="image/png" href="../_static/py.svg" />
 | 
						||
            <script type="text/javascript" src="../_static/copybutton.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/menu.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/search-focus.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/themetoggle.js"></script> 
 | 
						||
            <script type="text/javascript" src="../_static/rtd_switcher.js"></script>
 | 
						||
            <meta name="readthedocs-addons-api-version" content="1">
 | 
						||
 | 
						||
  </head>
 | 
						||
<body>
 | 
						||
<div class="mobile-nav">
 | 
						||
    <input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation"
 | 
						||
           aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu" />
 | 
						||
    <nav class="nav-content" role="navigation">
 | 
						||
        <label for="menuToggler" class="toggler__label">
 | 
						||
            <span></span>
 | 
						||
        </label>
 | 
						||
        <span class="nav-items-wrapper">
 | 
						||
            <a href="https://www.python.org/" class="nav-logo">
 | 
						||
                <img src="../_static/py.svg" alt="Python logo"/>
 | 
						||
            </a>
 | 
						||
            <span class="version_switcher_placeholder"></span>
 | 
						||
            <form role="search" class="search" action="../search.html" method="get">
 | 
						||
                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon">
 | 
						||
                    <path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path>
 | 
						||
                </svg>
 | 
						||
                <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" />
 | 
						||
                <input type="submit" value="Go"/>
 | 
						||
            </form>
 | 
						||
        </span>
 | 
						||
    </nav>
 | 
						||
    <div class="menu-wrapper">
 | 
						||
        <nav class="menu" role="navigation" aria-label="main navigation">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label>
 | 
						||
  <div>
 | 
						||
    <h3><a href="../contents.html">Table of Contents</a></h3>
 | 
						||
    <ul>
 | 
						||
<li><a class="reference internal" href="#">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a><ul>
 | 
						||
<li><a class="reference internal" href="#how-to-enable-perf-profiling-support">How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support</a></li>
 | 
						||
<li><a class="reference internal" href="#how-to-obtain-the-best-results">How to obtain the best results</a></li>
 | 
						||
<li><a class="reference internal" href="#how-to-work-without-frame-pointers">How to work without frame pointers</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
</ul>
 | 
						||
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Previous topic</h4>
 | 
						||
    <p class="topless"><a href="instrumentation.html"
 | 
						||
                          title="previous chapter">Instrumenting CPython with DTrace and SystemTap</a></p>
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Next topic</h4>
 | 
						||
    <p class="topless"><a href="annotations.html"
 | 
						||
                          title="next chapter">Annotations Best Practices</a></p>
 | 
						||
  </div>
 | 
						||
  <div role="note" aria-label="source link">
 | 
						||
    <h3>This Page</h3>
 | 
						||
    <ul class="this-page-menu">
 | 
						||
      <li><a href="../bugs.html">Report a Bug</a></li>
 | 
						||
      <li>
 | 
						||
        <a href="https://github.com/python/cpython/blob/main/Doc/howto/perf_profiling.rst"
 | 
						||
            rel="nofollow">Show Source
 | 
						||
        </a>
 | 
						||
      </li>
 | 
						||
    </ul>
 | 
						||
  </div>
 | 
						||
        </nav>
 | 
						||
    </div>
 | 
						||
</div>
 | 
						||
 | 
						||
  
 | 
						||
    <div class="related" role="navigation" aria-label="Related">
 | 
						||
      <h3>Navigation</h3>
 | 
						||
      <ul>
 | 
						||
        <li class="right" style="margin-right: 10px">
 | 
						||
          <a href="../genindex.html" title="General Index"
 | 
						||
             accesskey="I">index</a></li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="../py-modindex.html" title="Python Module Index"
 | 
						||
             >modules</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="annotations.html" title="Annotations Best Practices"
 | 
						||
             accesskey="N">next</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="instrumentation.html" title="Instrumenting CPython with DTrace and SystemTap"
 | 
						||
             accesskey="P">previous</a> |</li>
 | 
						||
 | 
						||
          <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | 
						||
          <li><a href="https://www.python.org/">Python</a> »</li>
 | 
						||
          <li class="switchers">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            <div class="version_switcher_placeholder"></div>
 | 
						||
          </li>
 | 
						||
          <li>
 | 
						||
              
 | 
						||
          </li>
 | 
						||
    <li id="cpython-language-and-version">
 | 
						||
      <a href="../index.html">3.13.3 Documentation</a> »
 | 
						||
    </li>
 | 
						||
 | 
						||
          <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
 | 
						||
        <li class="nav-item nav-item-this"><a href="">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a></li>
 | 
						||
                <li class="right">
 | 
						||
                    
 | 
						||
 | 
						||
    <div class="inline-search" role="search">
 | 
						||
        <form class="inline-search" action="../search.html" method="get">
 | 
						||
          <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | 
						||
          <input type="submit" value="Go" />
 | 
						||
        </form>
 | 
						||
    </div>
 | 
						||
                     |
 | 
						||
                </li>
 | 
						||
            <li class="right">
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label> |</li>
 | 
						||
            
 | 
						||
      </ul>
 | 
						||
    </div>    
 | 
						||
 | 
						||
    <div class="document">
 | 
						||
      <div class="documentwrapper">
 | 
						||
        <div class="bodywrapper">
 | 
						||
          <div class="body" role="main">
 | 
						||
            
 | 
						||
  <section id="python-support-for-the-linux-perf-profiler">
 | 
						||
<span id="perf-profiling"></span><h1>Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler<a class="headerlink" href="#python-support-for-the-linux-perf-profiler" title="Link to this heading">¶</a></h1>
 | 
						||
<dl class="field-list simple">
 | 
						||
<dt class="field-odd">author<span class="colon">:</span></dt>
 | 
						||
<dd class="field-odd"><p>Pablo Galindo</p>
 | 
						||
</dd>
 | 
						||
</dl>
 | 
						||
<p><a class="reference external" href="https://perf.wiki.kernel.org">The Linux perf profiler</a>
 | 
						||
is a very powerful tool that allows you to profile and obtain
 | 
						||
information about the performance of your application.
 | 
						||
<code class="docutils literal notranslate"><span class="pre">perf</span></code> also has a very vibrant ecosystem of tools
 | 
						||
that aid with the analysis of the data that it produces.</p>
 | 
						||
<p>The main problem with using the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler with Python applications is that
 | 
						||
<code class="docutils literal notranslate"><span class="pre">perf</span></code> only gets information about native symbols, that is, the names of
 | 
						||
functions and procedures written in C. This means that the names and file names
 | 
						||
of Python functions in your code will not appear in the output of <code class="docutils literal notranslate"><span class="pre">perf</span></code>.</p>
 | 
						||
<p>Since Python 3.12, the interpreter can run in a special mode that allows Python
 | 
						||
functions to appear in the output of the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler. When this mode is
 | 
						||
enabled, the interpreter will interpose a small piece of code compiled on the
 | 
						||
fly before the execution of every Python function and it will teach <code class="docutils literal notranslate"><span class="pre">perf</span></code> the
 | 
						||
relationship between this piece of code and the associated Python function using
 | 
						||
<a class="reference internal" href="../c-api/perfmaps.html"><span class="doc">perf map files</span></a>.</p>
 | 
						||
<div class="admonition note">
 | 
						||
<p class="admonition-title">Note</p>
 | 
						||
<p>Support for the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler is currently only available for Linux on
 | 
						||
select architectures. Check the output of the <code class="docutils literal notranslate"><span class="pre">configure</span></code> build step or
 | 
						||
check the output of <code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">-m</span> <span class="pre">sysconfig</span> <span class="pre">|</span> <span class="pre">grep</span> <span class="pre">HAVE_PERF_TRAMPOLINE</span></code>
 | 
						||
to see if your system is supported.</p>
 | 
						||
</div>
 | 
						||
<p>For example, consider the following script:</p>
 | 
						||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">foo</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 | 
						||
    <span class="n">result</span> <span class="o">=</span> <span class="mi">0</span>
 | 
						||
    <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 | 
						||
        <span class="n">result</span> <span class="o">+=</span> <span class="mi">1</span>
 | 
						||
    <span class="k">return</span> <span class="n">result</span>
 | 
						||
 | 
						||
<span class="k">def</span><span class="w"> </span><span class="nf">bar</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 | 
						||
    <span class="n">foo</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
 | 
						||
 | 
						||
<span class="k">def</span><span class="w"> </span><span class="nf">baz</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 | 
						||
    <span class="n">bar</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
 | 
						||
 | 
						||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
 | 
						||
    <span class="n">baz</span><span class="p">(</span><span class="mi">1000000</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>We can run <code class="docutils literal notranslate"><span class="pre">perf</span></code> to sample CPU stack traces at 9999 hertz:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Then we can use <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">report</span></code> to analyze the data:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>--stdio<span class="w"> </span>-n<span class="w"> </span>-g
 | 
						||
 | 
						||
<span class="gp"># </span>Children<span class="w">      </span>Self<span class="w">       </span>Samples<span class="w">  </span>Command<span class="w">     </span>Shared<span class="w"> </span>Object<span class="w">       </span>Symbol
 | 
						||
<span class="gp"># </span>........<span class="w">  </span>........<span class="w">  </span>............<span class="w">  </span>..........<span class="w">  </span>..................<span class="w">  </span>..........................................
 | 
						||
<span class="gp">#</span>
 | 
						||
<span class="go">    91.08%     0.00%             0  python.exe  python.exe          [.] _start</span>
 | 
						||
<span class="go">            |</span>
 | 
						||
<span class="go">            ---_start</span>
 | 
						||
<span class="go">            |</span>
 | 
						||
<span class="go">                --90.71%--__libc_start_main</span>
 | 
						||
<span class="go">                        Py_BytesMain</span>
 | 
						||
<span class="go">                        |</span>
 | 
						||
<span class="go">                        |--56.88%--pymain_run_python.constprop.0</span>
 | 
						||
<span class="go">                        |          |</span>
 | 
						||
<span class="go">                        |          |--56.13%--_PyRun_AnyFileObject</span>
 | 
						||
<span class="go">                        |          |          _PyRun_SimpleFileObject</span>
 | 
						||
<span class="go">                        |          |          |</span>
 | 
						||
<span class="go">                        |          |          |--55.02%--run_mod</span>
 | 
						||
<span class="go">                        |          |          |          |</span>
 | 
						||
<span class="go">                        |          |          |           --54.65%--PyEval_EvalCode</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     |</span>
 | 
						||
<span class="go">                        |          |          |                     |--51.67%--_PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     |          |</span>
 | 
						||
<span class="go">                        |          |          |                     |          |--11.52%--_PyLong_Add</span>
 | 
						||
<span class="go">                        |          |          |                     |          |          |</span>
 | 
						||
<span class="go">                        |          |          |                     |          |          |--2.97%--_PyObject_Malloc</span>
 | 
						||
<span class="go">...</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>As you can see, the Python functions are not shown in the output, only <code class="docutils literal notranslate"><span class="pre">_PyEval_EvalFrameDefault</span></code>
 | 
						||
(the function that evaluates the Python bytecode) shows up. Unfortunately that’s not very useful because all Python
 | 
						||
functions use the same C function to evaluate bytecode so we cannot know which Python function corresponds to which
 | 
						||
bytecode-evaluating function.</p>
 | 
						||
<p>Instead, if we run the same experiment with <code class="docutils literal notranslate"><span class="pre">perf</span></code> support enabled we get:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>--stdio<span class="w"> </span>-n<span class="w"> </span>-g
 | 
						||
 | 
						||
<span class="gp"># </span>Children<span class="w">      </span>Self<span class="w">       </span>Samples<span class="w">  </span>Command<span class="w">     </span>Shared<span class="w"> </span>Object<span class="w">       </span>Symbol
 | 
						||
<span class="gp"># </span>........<span class="w">  </span>........<span class="w">  </span>............<span class="w">  </span>..........<span class="w">  </span>..................<span class="w">  </span>.....................................................................
 | 
						||
<span class="gp">#</span>
 | 
						||
<span class="go">    90.58%     0.36%             1  python.exe  python.exe          [.] _start</span>
 | 
						||
<span class="go">            |</span>
 | 
						||
<span class="go">            ---_start</span>
 | 
						||
<span class="go">            |</span>
 | 
						||
<span class="go">                --89.86%--__libc_start_main</span>
 | 
						||
<span class="go">                        Py_BytesMain</span>
 | 
						||
<span class="go">                        |</span>
 | 
						||
<span class="go">                        |--55.43%--pymain_run_python.constprop.0</span>
 | 
						||
<span class="go">                        |          |</span>
 | 
						||
<span class="go">                        |          |--54.71%--_PyRun_AnyFileObject</span>
 | 
						||
<span class="go">                        |          |          _PyRun_SimpleFileObject</span>
 | 
						||
<span class="go">                        |          |          |</span>
 | 
						||
<span class="go">                        |          |          |--53.62%--run_mod</span>
 | 
						||
<span class="go">                        |          |          |          |</span>
 | 
						||
<span class="go">                        |          |          |           --53.26%--PyEval_EvalCode</span>
 | 
						||
<span class="go">                        |          |          |                     py::<module>:/src/script.py</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     py::baz:/src/script.py</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     py::bar:/src/script.py</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     PyObject_Vectorcall</span>
 | 
						||
<span class="go">                        |          |          |                     _PyEval_Vector</span>
 | 
						||
<span class="go">                        |          |          |                     py::foo:/src/script.py</span>
 | 
						||
<span class="go">                        |          |          |                     |</span>
 | 
						||
<span class="go">                        |          |          |                     |--51.81%--_PyEval_EvalFrameDefault</span>
 | 
						||
<span class="go">                        |          |          |                     |          |</span>
 | 
						||
<span class="go">                        |          |          |                     |          |--13.77%--_PyLong_Add</span>
 | 
						||
<span class="go">                        |          |          |                     |          |          |</span>
 | 
						||
<span class="go">                        |          |          |                     |          |          |--3.26%--_PyObject_Malloc</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<section id="how-to-enable-perf-profiling-support">
 | 
						||
<h2>How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support<a class="headerlink" href="#how-to-enable-perf-profiling-support" title="Link to this heading">¶</a></h2>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support can be enabled either from the start using
 | 
						||
the environment variable <span class="target" id="index-0"></span><a class="reference internal" href="../using/cmdline.html#envvar-PYTHONPERFSUPPORT"><code class="xref std std-envvar docutils literal notranslate"><span class="pre">PYTHONPERFSUPPORT</span></code></a> or the
 | 
						||
<a class="reference internal" href="../using/cmdline.html#cmdoption-X"><code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span> <span class="pre">perf</span></code></a> option,
 | 
						||
or dynamically using <a class="reference internal" href="../library/sys.html#sys.activate_stack_trampoline" title="sys.activate_stack_trampoline"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.activate_stack_trampoline()</span></code></a> and
 | 
						||
<a class="reference internal" href="../library/sys.html#sys.deactivate_stack_trampoline" title="sys.deactivate_stack_trampoline"><code class="xref py py-func docutils literal notranslate"><span class="pre">sys.deactivate_stack_trampoline()</span></code></a>.</p>
 | 
						||
<p>The <code class="xref py py-mod docutils literal notranslate"><span class="pre">sys</span></code> functions take precedence over the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option,
 | 
						||
the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option takes precedence over the environment variable.</p>
 | 
						||
<p>Example, using the environment variable:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nv">PYTHONPERFSUPPORT</span><span class="o">=</span><span class="m">1</span><span class="w"> </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Example, using the <code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span></code> option:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>-X<span class="w"> </span>perf<span class="w"> </span>my_script.py
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Example, using the <a class="reference internal" href="../library/sys.html#module-sys" title="sys: Access system-specific parameters and functions."><code class="xref py py-mod docutils literal notranslate"><span class="pre">sys</span></code></a> APIs in file <code class="file docutils literal notranslate"><span class="pre">example.py</span></code>:</p>
 | 
						||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">sys</span>
 | 
						||
 | 
						||
<span class="n">sys</span><span class="o">.</span><span class="n">activate_stack_trampoline</span><span class="p">(</span><span class="s2">"perf"</span><span class="p">)</span>
 | 
						||
<span class="n">do_profiled_stuff</span><span class="p">()</span>
 | 
						||
<span class="n">sys</span><span class="o">.</span><span class="n">deactivate_stack_trampoline</span><span class="p">()</span>
 | 
						||
 | 
						||
<span class="n">non_profiled_stuff</span><span class="p">()</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>…then:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>./example.py
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.data
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
<section id="how-to-obtain-the-best-results">
 | 
						||
<h2>How to obtain the best results<a class="headerlink" href="#how-to-obtain-the-best-results" title="Link to this heading">¶</a></h2>
 | 
						||
<p>For best results, Python should be compiled with
 | 
						||
<code class="docutils literal notranslate"><span class="pre">CFLAGS="-fno-omit-frame-pointer</span> <span class="pre">-mno-omit-leaf-frame-pointer"</span></code> as this allows
 | 
						||
profilers to unwind using only the frame pointer and not on DWARF debug
 | 
						||
information. This is because as the code that is interposed to allow <code class="docutils literal notranslate"><span class="pre">perf</span></code>
 | 
						||
support is dynamically generated it doesn’t have any DWARF debugging information
 | 
						||
available.</p>
 | 
						||
<p>You can check if your system has been compiled with this flag by running:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python<span class="w"> </span>-m<span class="w"> </span>sysconfig<span class="w"> </span><span class="p">|</span><span class="w"> </span>grep<span class="w"> </span><span class="s1">'no-omit-frame-pointer'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>If you don’t see any output it means that your interpreter has not been compiled with
 | 
						||
frame pointers and therefore it may not be able to show Python functions in the output
 | 
						||
of <code class="docutils literal notranslate"><span class="pre">perf</span></code>.</p>
 | 
						||
</section>
 | 
						||
<section id="how-to-work-without-frame-pointers">
 | 
						||
<h2>How to work without frame pointers<a class="headerlink" href="#how-to-work-without-frame-pointers" title="Link to this heading">¶</a></h2>
 | 
						||
<p>If you are working with a Python interpreter that has been compiled without
 | 
						||
frame pointers, you can still use the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler, but the overhead will be
 | 
						||
a bit higher because Python needs to generate unwinding information for every
 | 
						||
Python function call on the fly. Additionally, <code class="docutils literal notranslate"><span class="pre">perf</span></code> will take more time to
 | 
						||
process the data because it will need to use the DWARF debugging information to
 | 
						||
unwind the stack and this is a slow process.</p>
 | 
						||
<p>To enable this mode, you can use the environment variable
 | 
						||
<span class="target" id="index-1"></span><a class="reference internal" href="../using/cmdline.html#envvar-PYTHON_PERF_JIT_SUPPORT"><code class="xref std std-envvar docutils literal notranslate"><span class="pre">PYTHON_PERF_JIT_SUPPORT</span></code></a> or the <a class="reference internal" href="../using/cmdline.html#cmdoption-X"><code class="xref std std-option docutils literal notranslate"><span class="pre">-X</span> <span class="pre">perf_jit</span></code></a> option,
 | 
						||
which will enable the JIT mode for the <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler.</p>
 | 
						||
<div class="admonition note">
 | 
						||
<p class="admonition-title">Note</p>
 | 
						||
<p>Due to a bug in the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool, only <code class="docutils literal notranslate"><span class="pre">perf</span></code> versions higher than v6.8
 | 
						||
will work with the JIT mode.  The fix was also backported to the v6.7.2
 | 
						||
version of the tool.</p>
 | 
						||
<p>Note that when checking the version of the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool (which can be done
 | 
						||
by running <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">version</span></code>) you must take into account that some distros
 | 
						||
add some custom version numbers including a <code class="docutils literal notranslate"><span class="pre">-</span></code> character.  This means
 | 
						||
that <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">6.7-3</span></code> is not necessarily <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">6.7.3</span></code>.</p>
 | 
						||
</div>
 | 
						||
<p>When using the perf JIT mode, you need an extra step before you can run <code class="docutils literal notranslate"><span class="pre">perf</span>
 | 
						||
<span class="pre">report</span></code>. You need to call the <code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">inject</span></code> command to inject the JIT
 | 
						||
information into the <code class="docutils literal notranslate"><span class="pre">perf.data</span></code> file.:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>-k<span class="w"> </span><span class="m">1</span><span class="w"> </span>--call-graph<span class="w"> </span>dwarf<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>-Xperf_jit<span class="w"> </span>my_script.py
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>inject<span class="w"> </span>-i<span class="w"> </span>perf.data<span class="w"> </span>--jit<span class="w"> </span>--output<span class="w"> </span>perf.jit.data
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.jit.data
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>or using the environment variable:</p>
 | 
						||
<div class="highlight-shell-session notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span><span class="nv">PYTHON_PERF_JIT_SUPPORT</span><span class="o">=</span><span class="m">1</span><span class="w"> </span>perf<span class="w"> </span>record<span class="w"> </span>-F<span class="w"> </span><span class="m">9999</span><span class="w"> </span>-g<span class="w"> </span>--call-graph<span class="w"> </span>dwarf<span class="w"> </span>-o<span class="w"> </span>perf.data<span class="w"> </span>python<span class="w"> </span>my_script.py
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>inject<span class="w"> </span>-i<span class="w"> </span>perf.data<span class="w"> </span>--jit<span class="w"> </span>--output<span class="w"> </span>perf.jit.data
 | 
						||
<span class="gp">$ </span>perf<span class="w"> </span>report<span class="w"> </span>-g<span class="w"> </span>-i<span class="w"> </span>perf.jit.data
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">perf</span> <span class="pre">inject</span> <span class="pre">--jit</span></code> command will read <code class="docutils literal notranslate"><span class="pre">perf.data</span></code>,
 | 
						||
automatically pick up the perf dump file that Python creates (in
 | 
						||
<code class="docutils literal notranslate"><span class="pre">/tmp/perf-$PID.dump</span></code>), and then create <code class="docutils literal notranslate"><span class="pre">perf.jit.data</span></code> which merges all the
 | 
						||
JIT information together. It should also create a lot of <code class="docutils literal notranslate"><span class="pre">jitted-XXXX-N.so</span></code>
 | 
						||
files in the current directory which are ELF images for all the JIT trampolines
 | 
						||
that were created by Python.</p>
 | 
						||
<div class="admonition warning">
 | 
						||
<p class="admonition-title">Warning</p>
 | 
						||
<p>Notice that when using <code class="docutils literal notranslate"><span class="pre">--call-graph</span> <span class="pre">dwarf</span></code> the <code class="docutils literal notranslate"><span class="pre">perf</span></code> tool will take
 | 
						||
snapshots of the stack of the process being profiled and save the
 | 
						||
information in the <code class="docutils literal notranslate"><span class="pre">perf.data</span></code> file. By default the size of the stack dump
 | 
						||
is 8192 bytes but the user can change the size by passing the size after
 | 
						||
comma like <code class="docutils literal notranslate"><span class="pre">--call-graph</span> <span class="pre">dwarf,4096</span></code>. The size of the stack dump is
 | 
						||
important because if the size is too small <code class="docutils literal notranslate"><span class="pre">perf</span></code> will not be able to
 | 
						||
unwind the stack and the output will be incomplete. On the other hand, if
 | 
						||
the size is too big, then <code class="docutils literal notranslate"><span class="pre">perf</span></code> won’t be able to sample the process as
 | 
						||
frequently as it would like as the overhead will be higher.</p>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
 | 
						||
 | 
						||
            <div class="clearer"></div>
 | 
						||
          </div>
 | 
						||
        </div>
 | 
						||
      </div>
 | 
						||
      <div class="sphinxsidebar" role="navigation" aria-label="Main">
 | 
						||
        <div class="sphinxsidebarwrapper">
 | 
						||
  <div>
 | 
						||
    <h3><a href="../contents.html">Table of Contents</a></h3>
 | 
						||
    <ul>
 | 
						||
<li><a class="reference internal" href="#">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a><ul>
 | 
						||
<li><a class="reference internal" href="#how-to-enable-perf-profiling-support">How to enable <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiling support</a></li>
 | 
						||
<li><a class="reference internal" href="#how-to-obtain-the-best-results">How to obtain the best results</a></li>
 | 
						||
<li><a class="reference internal" href="#how-to-work-without-frame-pointers">How to work without frame pointers</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
</ul>
 | 
						||
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Previous topic</h4>
 | 
						||
    <p class="topless"><a href="instrumentation.html"
 | 
						||
                          title="previous chapter">Instrumenting CPython with DTrace and SystemTap</a></p>
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Next topic</h4>
 | 
						||
    <p class="topless"><a href="annotations.html"
 | 
						||
                          title="next chapter">Annotations Best Practices</a></p>
 | 
						||
  </div>
 | 
						||
  <div role="note" aria-label="source link">
 | 
						||
    <h3>This Page</h3>
 | 
						||
    <ul class="this-page-menu">
 | 
						||
      <li><a href="../bugs.html">Report a Bug</a></li>
 | 
						||
      <li>
 | 
						||
        <a href="https://github.com/python/cpython/blob/main/Doc/howto/perf_profiling.rst"
 | 
						||
            rel="nofollow">Show Source
 | 
						||
        </a>
 | 
						||
      </li>
 | 
						||
    </ul>
 | 
						||
  </div>
 | 
						||
        </div>
 | 
						||
<div id="sidebarbutton" title="Collapse sidebar">
 | 
						||
<span>«</span>
 | 
						||
</div>
 | 
						||
 | 
						||
      </div>
 | 
						||
      <div class="clearer"></div>
 | 
						||
    </div>  
 | 
						||
    <div class="related" role="navigation" aria-label="Related">
 | 
						||
      <h3>Navigation</h3>
 | 
						||
      <ul>
 | 
						||
        <li class="right" style="margin-right: 10px">
 | 
						||
          <a href="../genindex.html" title="General Index"
 | 
						||
             >index</a></li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="../py-modindex.html" title="Python Module Index"
 | 
						||
             >modules</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="annotations.html" title="Annotations Best Practices"
 | 
						||
             >next</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="instrumentation.html" title="Instrumenting CPython with DTrace and SystemTap"
 | 
						||
             >previous</a> |</li>
 | 
						||
 | 
						||
          <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | 
						||
          <li><a href="https://www.python.org/">Python</a> »</li>
 | 
						||
          <li class="switchers">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            <div class="version_switcher_placeholder"></div>
 | 
						||
          </li>
 | 
						||
          <li>
 | 
						||
              
 | 
						||
          </li>
 | 
						||
    <li id="cpython-language-and-version">
 | 
						||
      <a href="../index.html">3.13.3 Documentation</a> »
 | 
						||
    </li>
 | 
						||
 | 
						||
          <li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
 | 
						||
        <li class="nav-item nav-item-this"><a href="">Python support for the Linux <code class="docutils literal notranslate"><span class="pre">perf</span></code> profiler</a></li>
 | 
						||
                <li class="right">
 | 
						||
                    
 | 
						||
 | 
						||
    <div class="inline-search" role="search">
 | 
						||
        <form class="inline-search" action="../search.html" method="get">
 | 
						||
          <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | 
						||
          <input type="submit" value="Go" />
 | 
						||
        </form>
 | 
						||
    </div>
 | 
						||
                     |
 | 
						||
                </li>
 | 
						||
            <li class="right">
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label> |</li>
 | 
						||
            
 | 
						||
      </ul>
 | 
						||
    </div>  
 | 
						||
    <div class="footer">
 | 
						||
    © 
 | 
						||
      <a href="../copyright.html">
 | 
						||
    
 | 
						||
    Copyright
 | 
						||
    
 | 
						||
      </a>
 | 
						||
     2001-2025, Python Software Foundation.
 | 
						||
    <br />
 | 
						||
    This page is licensed under the Python Software Foundation License Version 2.
 | 
						||
    <br />
 | 
						||
    Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License.
 | 
						||
    <br />
 | 
						||
    
 | 
						||
      See <a href="/license.html">History and License</a> for more information.<br />
 | 
						||
    
 | 
						||
    
 | 
						||
    <br />
 | 
						||
 | 
						||
    The Python Software Foundation is a non-profit corporation.
 | 
						||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
 | 
						||
<br />
 | 
						||
    <br />
 | 
						||
      Last updated on Apr 08, 2025 (14:33 UTC).
 | 
						||
    
 | 
						||
      <a href="/bugs.html">Found a bug</a>?
 | 
						||
    
 | 
						||
    <br />
 | 
						||
 | 
						||
    Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
 | 
						||
    </div>
 | 
						||
 | 
						||
  </body>
 | 
						||
</html> |