mirror of
				https://github.com/bunny-lab-io/Borealis.git
				synced 2025-10-28 07:41:58 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			1449 lines
		
	
	
		
			148 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1449 lines
		
	
	
		
			148 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | ||
| 
 | ||
| <html lang="en" data-content_root="../">
 | ||
|   <head>
 | ||
|     <meta charset="utf-8" />
 | ||
|     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 | ||
| <meta property="og:title" content="statistics — Mathematical statistics functions" />
 | ||
| <meta property="og:type" content="website" />
 | ||
| <meta property="og:url" content="https://docs.python.org/3/library/statistics.html" />
 | ||
| <meta property="og:site_name" content="Python documentation" />
 | ||
| <meta property="og:description" content="Source code: Lib/statistics.py This module provides functions for calculating mathematical statistics of numeric ( Real-valued) data. The module is not intended to be a competitor to third-party li..." />
 | ||
| <meta property="og:image" content="https://docs.python.org/3/_static/og-image.png" />
 | ||
| <meta property="og:image:alt" content="Python documentation" />
 | ||
| <meta name="description" content="Source code: Lib/statistics.py This module provides functions for calculating mathematical statistics of numeric ( Real-valued) data. The module is not intended to be a competitor to third-party li..." />
 | ||
| <meta property="og:image:width" content="200">
 | ||
| <meta property="og:image:height" content="200">
 | ||
| <meta name="theme-color" content="#3776ab">
 | ||
| 
 | ||
|     <title>statistics — Mathematical statistics functions — Python 3.13.3 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
 | ||
|     
 | ||
|     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
 | ||
|     <link rel="stylesheet" type="text/css" href="../_static/pydoctheme.css?v=23252803" />
 | ||
|     <link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="../_static/pygments_dark.css?v=5349f25f" />
 | ||
|     
 | ||
|     <script src="../_static/documentation_options.js?v=5d57ca2d"></script>
 | ||
|     <script src="../_static/doctools.js?v=9bcbadda"></script>
 | ||
|     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 | ||
|     
 | ||
|     <script src="../_static/sidebar.js"></script>
 | ||
|     
 | ||
|     <link rel="search" type="application/opensearchdescription+xml"
 | ||
|           title="Search within Python 3.13.3 documentation"
 | ||
|           href="../_static/opensearch.xml"/>
 | ||
|     <link rel="author" title="About these documents" href="../about.html" />
 | ||
|     <link rel="index" title="Index" href="../genindex.html" />
 | ||
|     <link rel="search" title="Search" href="../search.html" />
 | ||
|     <link rel="copyright" title="Copyright" href="../copyright.html" />
 | ||
|     <link rel="next" title="Functional Programming Modules" href="functional.html" />
 | ||
|     <link rel="prev" title="random — Generate pseudo-random numbers" href="random.html" />
 | ||
|     
 | ||
|     <link rel="canonical" href="https://docs.python.org/3/library/statistics.html">
 | ||
|     
 | ||
|       
 | ||
|     
 | ||
| 
 | ||
|     
 | ||
|     <style>
 | ||
|       @media only screen {
 | ||
|         table.full-width-table {
 | ||
|             width: 100%;
 | ||
|         }
 | ||
|       }
 | ||
|     </style>
 | ||
| <link rel="stylesheet" href="../_static/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css">
 | ||
|     <link rel="shortcut icon" type="image/png" href="../_static/py.svg" />
 | ||
|             <script type="text/javascript" src="../_static/copybutton.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/menu.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/search-focus.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/themetoggle.js"></script> 
 | ||
|             <script type="text/javascript" src="../_static/rtd_switcher.js"></script>
 | ||
|             <meta name="readthedocs-addons-api-version" content="1">
 | ||
| 
 | ||
|   </head>
 | ||
| <body>
 | ||
| <div class="mobile-nav">
 | ||
|     <input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation"
 | ||
|            aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu" />
 | ||
|     <nav class="nav-content" role="navigation">
 | ||
|         <label for="menuToggler" class="toggler__label">
 | ||
|             <span></span>
 | ||
|         </label>
 | ||
|         <span class="nav-items-wrapper">
 | ||
|             <a href="https://www.python.org/" class="nav-logo">
 | ||
|                 <img src="../_static/py.svg" alt="Python logo"/>
 | ||
|             </a>
 | ||
|             <span class="version_switcher_placeholder"></span>
 | ||
|             <form role="search" class="search" action="../search.html" method="get">
 | ||
|                 <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon">
 | ||
|                     <path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path>
 | ||
|                 </svg>
 | ||
|                 <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" />
 | ||
|                 <input type="submit" value="Go"/>
 | ||
|             </form>
 | ||
|         </span>
 | ||
|     </nav>
 | ||
|     <div class="menu-wrapper">
 | ||
|         <nav class="menu" role="navigation" aria-label="main navigation">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label>
 | ||
|   <div>
 | ||
|     <h3><a href="../contents.html">Table of Contents</a></h3>
 | ||
|     <ul>
 | ||
| <li><a class="reference internal" href="#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">statistics</span></code> — Mathematical statistics functions</a><ul>
 | ||
| <li><a class="reference internal" href="#averages-and-measures-of-central-location">Averages and measures of central location</a></li>
 | ||
| <li><a class="reference internal" href="#measures-of-spread">Measures of spread</a></li>
 | ||
| <li><a class="reference internal" href="#statistics-for-relations-between-two-inputs">Statistics for relations between two inputs</a></li>
 | ||
| <li><a class="reference internal" href="#function-details">Function details</a></li>
 | ||
| <li><a class="reference internal" href="#exceptions">Exceptions</a></li>
 | ||
| <li><a class="reference internal" href="#normaldist-objects"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code> objects</a></li>
 | ||
| <li><a class="reference internal" href="#examples-and-recipes">Examples and Recipes</a><ul>
 | ||
| <li><a class="reference internal" href="#classic-probability-problems">Classic probability problems</a></li>
 | ||
| <li><a class="reference internal" href="#monte-carlo-inputs-for-simulations">Monte Carlo inputs for simulations</a></li>
 | ||
| <li><a class="reference internal" href="#approximating-binomial-distributions">Approximating binomial distributions</a></li>
 | ||
| <li><a class="reference internal" href="#naive-bayesian-classifier">Naive bayesian classifier</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| 
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Previous topic</h4>
 | ||
|     <p class="topless"><a href="random.html"
 | ||
|                           title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">random</span></code> — Generate pseudo-random numbers</a></p>
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Next topic</h4>
 | ||
|     <p class="topless"><a href="functional.html"
 | ||
|                           title="next chapter">Functional Programming Modules</a></p>
 | ||
|   </div>
 | ||
|   <div role="note" aria-label="source link">
 | ||
|     <h3>This Page</h3>
 | ||
|     <ul class="this-page-menu">
 | ||
|       <li><a href="../bugs.html">Report a Bug</a></li>
 | ||
|       <li>
 | ||
|         <a href="https://github.com/python/cpython/blob/main/Doc/library/statistics.rst"
 | ||
|             rel="nofollow">Show Source
 | ||
|         </a>
 | ||
|       </li>
 | ||
|     </ul>
 | ||
|   </div>
 | ||
|         </nav>
 | ||
|     </div>
 | ||
| </div>
 | ||
| 
 | ||
|   
 | ||
|     <div class="related" role="navigation" aria-label="Related">
 | ||
|       <h3>Navigation</h3>
 | ||
|       <ul>
 | ||
|         <li class="right" style="margin-right: 10px">
 | ||
|           <a href="../genindex.html" title="General Index"
 | ||
|              accesskey="I">index</a></li>
 | ||
|         <li class="right" >
 | ||
|           <a href="../py-modindex.html" title="Python Module Index"
 | ||
|              >modules</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="functional.html" title="Functional Programming Modules"
 | ||
|              accesskey="N">next</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="random.html" title="random — Generate pseudo-random numbers"
 | ||
|              accesskey="P">previous</a> |</li>
 | ||
| 
 | ||
|           <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | ||
|           <li><a href="https://www.python.org/">Python</a> »</li>
 | ||
|           <li class="switchers">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             <div class="version_switcher_placeholder"></div>
 | ||
|           </li>
 | ||
|           <li>
 | ||
|               
 | ||
|           </li>
 | ||
|     <li id="cpython-language-and-version">
 | ||
|       <a href="../index.html">3.13.3 Documentation</a> »
 | ||
|     </li>
 | ||
| 
 | ||
|           <li class="nav-item nav-item-1"><a href="index.html" >The Python Standard Library</a> »</li>
 | ||
|           <li class="nav-item nav-item-2"><a href="numeric.html" accesskey="U">Numeric and Mathematical Modules</a> »</li>
 | ||
|         <li class="nav-item nav-item-this"><a href=""><code class="xref py py-mod docutils literal notranslate"><span class="pre">statistics</span></code> — Mathematical statistics functions</a></li>
 | ||
|                 <li class="right">
 | ||
|                     
 | ||
| 
 | ||
|     <div class="inline-search" role="search">
 | ||
|         <form class="inline-search" action="../search.html" method="get">
 | ||
|           <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | ||
|           <input type="submit" value="Go" />
 | ||
|         </form>
 | ||
|     </div>
 | ||
|                      |
 | ||
|                 </li>
 | ||
|             <li class="right">
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label> |</li>
 | ||
|             
 | ||
|       </ul>
 | ||
|     </div>    
 | ||
| 
 | ||
|     <div class="document">
 | ||
|       <div class="documentwrapper">
 | ||
|         <div class="bodywrapper">
 | ||
|           <div class="body" role="main">
 | ||
|             
 | ||
|   <section id="module-statistics">
 | ||
| <span id="statistics-mathematical-statistics-functions"></span><h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">statistics</span></code> — Mathematical statistics functions<a class="headerlink" href="#module-statistics" title="Link to this heading">¶</a></h1>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.4.</span></p>
 | ||
| </div>
 | ||
| <p><strong>Source code:</strong> <a class="extlink-source reference external" href="https://github.com/python/cpython/tree/3.13/Lib/statistics.py">Lib/statistics.py</a></p>
 | ||
| <hr class="docutils" />
 | ||
| <p>This module provides functions for calculating mathematical statistics of
 | ||
| numeric (<a class="reference internal" href="numbers.html#numbers.Real" title="numbers.Real"><code class="xref py py-class docutils literal notranslate"><span class="pre">Real</span></code></a>-valued) data.</p>
 | ||
| <p>The module is not intended to be a competitor to third-party libraries such
 | ||
| as <a class="reference external" href="https://numpy.org">NumPy</a>, <a class="reference external" href="https://scipy.org/">SciPy</a>, or
 | ||
| proprietary full-featured statistics packages aimed at professional
 | ||
| statisticians such as Minitab, SAS and Matlab. It is aimed at the level of
 | ||
| graphing and scientific calculators.</p>
 | ||
| <p>Unless explicitly noted, these functions support <a class="reference internal" href="functions.html#int" title="int"><code class="xref py py-class docutils literal notranslate"><span class="pre">int</span></code></a>,
 | ||
| <a class="reference internal" href="functions.html#float" title="float"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>, <a class="reference internal" href="decimal.html#decimal.Decimal" title="decimal.Decimal"><code class="xref py py-class docutils literal notranslate"><span class="pre">Decimal</span></code></a> and <a class="reference internal" href="fractions.html#fractions.Fraction" title="fractions.Fraction"><code class="xref py py-class docutils literal notranslate"><span class="pre">Fraction</span></code></a>.
 | ||
| Behaviour with other types (whether in the numeric tower or not) is
 | ||
| currently unsupported.  Collections with a mix of types are also undefined
 | ||
| and implementation-dependent.  If your input data consists of mixed types,
 | ||
| you may be able to use <a class="reference internal" href="functions.html#map" title="map"><code class="xref py py-func docutils literal notranslate"><span class="pre">map()</span></code></a> to ensure a consistent result, for
 | ||
| example: <code class="docutils literal notranslate"><span class="pre">map(float,</span> <span class="pre">input_data)</span></code>.</p>
 | ||
| <p>Some datasets use <code class="docutils literal notranslate"><span class="pre">NaN</span></code> (not a number) values to represent missing data.
 | ||
| Since NaNs have unusual comparison semantics, they cause surprising or
 | ||
| undefined behaviors in the statistics functions that sort data or that count
 | ||
| occurrences.  The functions affected are <code class="docutils literal notranslate"><span class="pre">median()</span></code>, <code class="docutils literal notranslate"><span class="pre">median_low()</span></code>,
 | ||
| <code class="docutils literal notranslate"><span class="pre">median_high()</span></code>, <code class="docutils literal notranslate"><span class="pre">median_grouped()</span></code>, <code class="docutils literal notranslate"><span class="pre">mode()</span></code>, <code class="docutils literal notranslate"><span class="pre">multimode()</span></code>, and
 | ||
| <code class="docutils literal notranslate"><span class="pre">quantiles()</span></code>.  The <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values should be stripped before calling these
 | ||
| functions:</p>
 | ||
| <div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">statistics</span><span class="w"> </span><span class="kn">import</span> <span class="n">median</span>
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">math</span><span class="w"> </span><span class="kn">import</span> <span class="n">isnan</span>
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">itertools</span><span class="w"> </span><span class="kn">import</span> <span class="n">filterfalse</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mf">20.7</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="s1">'NaN'</span><span class="p">),</span><span class="mf">19.2</span><span class="p">,</span> <span class="mf">18.3</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="s1">'NaN'</span><span class="p">),</span> <span class="mf">14.4</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>  <span class="c1"># This has surprising behavior</span>
 | ||
| <span class="go">[20.7, nan, 14.4, 18.3, 19.2, nan]</span>
 | ||
| <span class="gp">>>> </span><span class="n">median</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>  <span class="c1"># This result is unexpected</span>
 | ||
| <span class="go">16.35</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="nb">sum</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="n">isnan</span><span class="p">,</span> <span class="n">data</span><span class="p">))</span>    <span class="c1"># Number of missing values</span>
 | ||
| <span class="go">2</span>
 | ||
| <span class="gp">>>> </span><span class="n">clean</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">filterfalse</span><span class="p">(</span><span class="n">isnan</span><span class="p">,</span> <span class="n">data</span><span class="p">))</span>  <span class="c1"># Strip NaN values</span>
 | ||
| <span class="gp">>>> </span><span class="n">clean</span>
 | ||
| <span class="go">[20.7, 19.2, 18.3, 14.4]</span>
 | ||
| <span class="gp">>>> </span><span class="nb">sorted</span><span class="p">(</span><span class="n">clean</span><span class="p">)</span>  <span class="c1"># Sorting now works as expected</span>
 | ||
| <span class="go">[14.4, 18.3, 19.2, 20.7]</span>
 | ||
| <span class="gp">>>> </span><span class="n">median</span><span class="p">(</span><span class="n">clean</span><span class="p">)</span>       <span class="c1"># This result is now well defined</span>
 | ||
| <span class="go">18.75</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <section id="averages-and-measures-of-central-location">
 | ||
| <h2>Averages and measures of central location<a class="headerlink" href="#averages-and-measures-of-central-location" title="Link to this heading">¶</a></h2>
 | ||
| <p>These functions calculate an average or typical value from a population
 | ||
| or sample.</p>
 | ||
| <table class="docutils align-default">
 | ||
| <tbody>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.mean" title="statistics.mean"><code class="xref py py-func docutils literal notranslate"><span class="pre">mean()</span></code></a></p></td>
 | ||
| <td><p>Arithmetic mean (“average”) of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.fmean" title="statistics.fmean"><code class="xref py py-func docutils literal notranslate"><span class="pre">fmean()</span></code></a></p></td>
 | ||
| <td><p>Fast, floating-point arithmetic mean, with optional weighting.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.geometric_mean" title="statistics.geometric_mean"><code class="xref py py-func docutils literal notranslate"><span class="pre">geometric_mean()</span></code></a></p></td>
 | ||
| <td><p>Geometric mean of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.harmonic_mean" title="statistics.harmonic_mean"><code class="xref py py-func docutils literal notranslate"><span class="pre">harmonic_mean()</span></code></a></p></td>
 | ||
| <td><p>Harmonic mean of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.kde" title="statistics.kde"><code class="xref py py-func docutils literal notranslate"><span class="pre">kde()</span></code></a></p></td>
 | ||
| <td><p>Estimate the probability density distribution of the data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.kde_random" title="statistics.kde_random"><code class="xref py py-func docutils literal notranslate"><span class="pre">kde_random()</span></code></a></p></td>
 | ||
| <td><p>Random sampling from the PDF generated by kde().</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.median" title="statistics.median"><code class="xref py py-func docutils literal notranslate"><span class="pre">median()</span></code></a></p></td>
 | ||
| <td><p>Median (middle value) of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.median_low" title="statistics.median_low"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_low()</span></code></a></p></td>
 | ||
| <td><p>Low median of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.median_high" title="statistics.median_high"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_high()</span></code></a></p></td>
 | ||
| <td><p>High median of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.median_grouped" title="statistics.median_grouped"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_grouped()</span></code></a></p></td>
 | ||
| <td><p>Median (50th percentile) of grouped data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.mode" title="statistics.mode"><code class="xref py py-func docutils literal notranslate"><span class="pre">mode()</span></code></a></p></td>
 | ||
| <td><p>Single mode (most common value) of discrete or nominal data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.multimode" title="statistics.multimode"><code class="xref py py-func docutils literal notranslate"><span class="pre">multimode()</span></code></a></p></td>
 | ||
| <td><p>List of modes (most common values) of discrete or nominal data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.quantiles" title="statistics.quantiles"><code class="xref py py-func docutils literal notranslate"><span class="pre">quantiles()</span></code></a></p></td>
 | ||
| <td><p>Divide data into intervals with equal probability.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| </section>
 | ||
| <section id="measures-of-spread">
 | ||
| <h2>Measures of spread<a class="headerlink" href="#measures-of-spread" title="Link to this heading">¶</a></h2>
 | ||
| <p>These functions calculate a measure of how much the population or sample
 | ||
| tends to deviate from the typical or average values.</p>
 | ||
| <table class="docutils align-default">
 | ||
| <tbody>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.pstdev" title="statistics.pstdev"><code class="xref py py-func docutils literal notranslate"><span class="pre">pstdev()</span></code></a></p></td>
 | ||
| <td><p>Population standard deviation of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.pvariance" title="statistics.pvariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">pvariance()</span></code></a></p></td>
 | ||
| <td><p>Population variance of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.stdev" title="statistics.stdev"><code class="xref py py-func docutils literal notranslate"><span class="pre">stdev()</span></code></a></p></td>
 | ||
| <td><p>Sample standard deviation of data.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.variance" title="statistics.variance"><code class="xref py py-func docutils literal notranslate"><span class="pre">variance()</span></code></a></p></td>
 | ||
| <td><p>Sample variance of data.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| </section>
 | ||
| <section id="statistics-for-relations-between-two-inputs">
 | ||
| <h2>Statistics for relations between two inputs<a class="headerlink" href="#statistics-for-relations-between-two-inputs" title="Link to this heading">¶</a></h2>
 | ||
| <p>These functions calculate statistics regarding relations between two inputs.</p>
 | ||
| <table class="docutils align-default">
 | ||
| <tbody>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.covariance" title="statistics.covariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">covariance()</span></code></a></p></td>
 | ||
| <td><p>Sample covariance for two variables.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><a class="reference internal" href="#statistics.correlation" title="statistics.correlation"><code class="xref py py-func docutils literal notranslate"><span class="pre">correlation()</span></code></a></p></td>
 | ||
| <td><p>Pearson and Spearman’s correlation coefficients.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><a class="reference internal" href="#statistics.linear_regression" title="statistics.linear_regression"><code class="xref py py-func docutils literal notranslate"><span class="pre">linear_regression()</span></code></a></p></td>
 | ||
| <td><p>Slope and intercept for simple linear regression.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| </section>
 | ||
| <section id="function-details">
 | ||
| <h2>Function details<a class="headerlink" href="#function-details" title="Link to this heading">¶</a></h2>
 | ||
| <p>Note: The functions do not require the data given to them to be sorted.
 | ||
| However, for reading convenience, most of the examples show sorted sequences.</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.mean">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">mean</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.mean" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the sample arithmetic mean of <em>data</em> which can be a sequence or iterable.</p>
 | ||
| <p>The arithmetic mean is the sum of the data divided by the number of data
 | ||
| points.  It is commonly called “the average”, although it is only one of many
 | ||
| different mathematical averages.  It is a measure of the central location of
 | ||
| the data.</p>
 | ||
| <p>If <em>data</em> is empty, <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> will be raised.</p>
 | ||
| <p>Some examples of use:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">mean</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
 | ||
| <span class="go">2.8</span>
 | ||
| <span class="gp">>>> </span><span class="n">mean</span><span class="p">([</span><span class="o">-</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">,</span> <span class="mf">3.25</span><span class="p">,</span> <span class="mf">5.75</span><span class="p">])</span>
 | ||
| <span class="go">2.625</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">fractions</span><span class="w"> </span><span class="kn">import</span> <span class="n">Fraction</span> <span class="k">as</span> <span class="n">F</span>
 | ||
| <span class="gp">>>> </span><span class="n">mean</span><span class="p">([</span><span class="n">F</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)])</span>
 | ||
| <span class="go">Fraction(13, 21)</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">decimal</span><span class="w"> </span><span class="kn">import</span> <span class="n">Decimal</span> <span class="k">as</span> <span class="n">D</span>
 | ||
| <span class="gp">>>> </span><span class="n">mean</span><span class="p">([</span><span class="n">D</span><span class="p">(</span><span class="s2">"0.5"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"0.75"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"0.625"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"0.375"</span><span class="p">)])</span>
 | ||
| <span class="go">Decimal('0.5625')</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="admonition note">
 | ||
| <p class="admonition-title">Note</p>
 | ||
| <p>The mean is strongly affected by <a class="reference external" href="https://en.wikipedia.org/wiki/Outlier">outliers</a> and is not necessarily a
 | ||
| typical example of the data points. For a more robust, although less
 | ||
| efficient, measure of <a class="reference external" href="https://en.wikipedia.org/wiki/Central_tendency">central tendency</a>, see <a class="reference internal" href="#statistics.median" title="statistics.median"><code class="xref py py-func docutils literal notranslate"><span class="pre">median()</span></code></a>.</p>
 | ||
| <p>The sample mean gives an unbiased estimate of the true population mean,
 | ||
| so that when taken on average over all the possible samples,
 | ||
| <code class="docutils literal notranslate"><span class="pre">mean(sample)</span></code> converges on the true mean of the entire population.  If
 | ||
| <em>data</em> represents the entire population rather than a sample, then
 | ||
| <code class="docutils literal notranslate"><span class="pre">mean(data)</span></code> is equivalent to calculating the true population mean μ.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.fmean">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">fmean</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weights</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.fmean" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Convert <em>data</em> to floats and compute the arithmetic mean.</p>
 | ||
| <p>This runs faster than the <a class="reference internal" href="#statistics.mean" title="statistics.mean"><code class="xref py py-func docutils literal notranslate"><span class="pre">mean()</span></code></a> function and it always returns a
 | ||
| <a class="reference internal" href="functions.html#float" title="float"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>.  The <em>data</em> may be a sequence or iterable.  If the input
 | ||
| dataset is empty, raises a <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a>.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">fmean</span><span class="p">([</span><span class="mf">3.5</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">,</span> <span class="mf">5.25</span><span class="p">])</span>
 | ||
| <span class="go">4.25</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Optional weighting is supported.  For example, a professor assigns a
 | ||
| grade for a course by weighting quizzes at 20%, homework at 20%, a
 | ||
| midterm exam at 30%, and a final exam at 30%:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">grades</span> <span class="o">=</span> <span class="p">[</span><span class="mi">85</span><span class="p">,</span> <span class="mi">92</span><span class="p">,</span> <span class="mi">83</span><span class="p">,</span> <span class="mi">91</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">weights</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.20</span><span class="p">,</span> <span class="mf">0.20</span><span class="p">,</span> <span class="mf">0.30</span><span class="p">,</span> <span class="mf">0.30</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">fmean</span><span class="p">(</span><span class="n">grades</span><span class="p">,</span> <span class="n">weights</span><span class="p">)</span>
 | ||
| <span class="go">87.6</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>If <em>weights</em> is supplied, it must be the same length as the <em>data</em> or
 | ||
| a <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> will be raised.</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.8.</span></p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.11: </span>Added support for <em>weights</em>.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.geometric_mean">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">geometric_mean</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.geometric_mean" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Convert <em>data</em> to floats and compute the geometric mean.</p>
 | ||
| <p>The geometric mean indicates the central tendency or typical value of the
 | ||
| <em>data</em> using the product of the values (as opposed to the arithmetic mean
 | ||
| which uses their sum).</p>
 | ||
| <p>Raises a <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> if the input dataset is empty,
 | ||
| if it contains a zero, or if it contains a negative value.
 | ||
| The <em>data</em> may be a sequence or iterable.</p>
 | ||
| <p>No special efforts are made to achieve exact results.
 | ||
| (However, this may change in the future.)</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">geometric_mean</span><span class="p">([</span><span class="mi">54</span><span class="p">,</span> <span class="mi">24</span><span class="p">,</span> <span class="mi">36</span><span class="p">]),</span> <span class="mi">1</span><span class="p">)</span>
 | ||
| <span class="go">36.0</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.8.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.harmonic_mean">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">harmonic_mean</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weights</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.harmonic_mean" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the harmonic mean of <em>data</em>, a sequence or iterable of
 | ||
| real-valued numbers.  If <em>weights</em> is omitted or <code class="docutils literal notranslate"><span class="pre">None</span></code>, then
 | ||
| equal weighting is assumed.</p>
 | ||
| <p>The harmonic mean is the reciprocal of the arithmetic <a class="reference internal" href="#statistics.mean" title="statistics.mean"><code class="xref py py-func docutils literal notranslate"><span class="pre">mean()</span></code></a> of the
 | ||
| reciprocals of the data. For example, the harmonic mean of three values <em>a</em>,
 | ||
| <em>b</em> and <em>c</em> will be equivalent to <code class="docutils literal notranslate"><span class="pre">3/(1/a</span> <span class="pre">+</span> <span class="pre">1/b</span> <span class="pre">+</span> <span class="pre">1/c)</span></code>.  If one of the
 | ||
| values is zero, the result will be zero.</p>
 | ||
| <p>The harmonic mean is a type of average, a measure of the central
 | ||
| location of the data.  It is often appropriate when averaging
 | ||
| ratios or rates, for example speeds.</p>
 | ||
| <p>Suppose a car travels 10 km at 40 km/hr, then another 10 km at 60 km/hr.
 | ||
| What is the average speed?</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">harmonic_mean</span><span class="p">([</span><span class="mi">40</span><span class="p">,</span> <span class="mi">60</span><span class="p">])</span>
 | ||
| <span class="go">48.0</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
 | ||
| speeds-up to 60 km/hr for the remaining 30 km of the journey. What
 | ||
| is the average speed?</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">harmonic_mean</span><span class="p">([</span><span class="mi">40</span><span class="p">,</span> <span class="mi">60</span><span class="p">],</span> <span class="n">weights</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">30</span><span class="p">])</span>
 | ||
| <span class="go">56.0</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p><a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised if <em>data</em> is empty, any element
 | ||
| is less than zero, or if the weighted sum isn’t positive.</p>
 | ||
| <p>The current algorithm has an early-out when it encounters a zero
 | ||
| in the input.  This means that the subsequent inputs are not tested
 | ||
| for validity.  (This behavior may change in the future.)</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.6.</span></p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.10: </span>Added support for <em>weights</em>.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.kde">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">kde</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">h</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'normal'</span></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">cumulative</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.kde" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p><a class="reference external" href="https://www.itm-conferences.org/articles/itmconf/pdf/2018/08/itmconf_sam2018_00037.pdf">Kernel Density Estimation (KDE)</a>:
 | ||
| Create a continuous probability density function or cumulative
 | ||
| distribution function from discrete samples.</p>
 | ||
| <p>The basic idea is to smooth the data using <a class="reference external" href="https://en.wikipedia.org/wiki/Kernel_(statistics)">a kernel function</a>.
 | ||
| to help draw inferences about a population from a sample.</p>
 | ||
| <p>The degree of smoothing is controlled by the scaling parameter <em>h</em>
 | ||
| which is called the bandwidth.  Smaller values emphasize local
 | ||
| features while larger values give smoother results.</p>
 | ||
| <p>The <em>kernel</em> determines the relative weights of the sample data
 | ||
| points.  Generally, the choice of kernel shape does not matter
 | ||
| as much as the more influential bandwidth smoothing parameter.</p>
 | ||
| <p>Kernels that give some weight to every sample point include
 | ||
| <em>normal</em> (<em>gauss</em>), <em>logistic</em>, and <em>sigmoid</em>.</p>
 | ||
| <p>Kernels that only give weight to sample points within the bandwidth
 | ||
| include <em>rectangular</em> (<em>uniform</em>), <em>triangular</em>, <em>parabolic</em>
 | ||
| (<em>epanechnikov</em>), <em>quartic</em> (<em>biweight</em>), <em>triweight</em>, and <em>cosine</em>.</p>
 | ||
| <p>If <em>cumulative</em> is true, will return a cumulative distribution function.</p>
 | ||
| <p>A <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> will be raised if the <em>data</em> sequence is empty.</p>
 | ||
| <p><a class="reference external" href="https://en.wikipedia.org/wiki/Kernel_density_estimation#Example">Wikipedia has an example</a>
 | ||
| where we can use <a class="reference internal" href="#statistics.kde" title="statistics.kde"><code class="xref py py-func docutils literal notranslate"><span class="pre">kde()</span></code></a> to generate and plot a probability
 | ||
| density function estimated from a small sample:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sample</span> <span class="o">=</span> <span class="p">[</span><span class="o">-</span><span class="mf">2.1</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.3</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.4</span><span class="p">,</span> <span class="mf">1.9</span><span class="p">,</span> <span class="mf">5.1</span><span class="p">,</span> <span class="mf">6.2</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">f_hat</span> <span class="o">=</span> <span class="n">kde</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">h</span><span class="o">=</span><span class="mf">1.5</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">xarr</span> <span class="o">=</span> <span class="p">[</span><span class="n">i</span><span class="o">/</span><span class="mi">100</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="o">-</span><span class="mi">750</span><span class="p">,</span> <span class="mi">1100</span><span class="p">)]</span>
 | ||
| <span class="gp">>>> </span><span class="n">yarr</span> <span class="o">=</span> <span class="p">[</span><span class="n">f_hat</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">xarr</span><span class="p">]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>The points in <code class="docutils literal notranslate"><span class="pre">xarr</span></code> and <code class="docutils literal notranslate"><span class="pre">yarr</span></code> can be used to make a PDF plot:</p>
 | ||
| <img alt="Scatter plot of the estimated probability density function." src="../_images/kde_example.png" />
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.13.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.kde_random">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">kde_random</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">h</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'normal'</span></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.kde_random" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return a function that makes a random selection from the estimated
 | ||
| probability density function produced by <code class="docutils literal notranslate"><span class="pre">kde(data,</span> <span class="pre">h,</span> <span class="pre">kernel)</span></code>.</p>
 | ||
| <p>Providing a <em>seed</em> allows reproducible selections. In the future, the
 | ||
| values may change slightly as more accurate kernel inverse CDF estimates
 | ||
| are implemented.  The seed may be an integer, float, str, or bytes.</p>
 | ||
| <p>A <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> will be raised if the <em>data</em> sequence is empty.</p>
 | ||
| <p>Continuing the example for <a class="reference internal" href="#statistics.kde" title="statistics.kde"><code class="xref py py-func docutils literal notranslate"><span class="pre">kde()</span></code></a>, we can use
 | ||
| <a class="reference internal" href="#statistics.kde_random" title="statistics.kde_random"><code class="xref py py-func docutils literal notranslate"><span class="pre">kde_random()</span></code></a> to generate new random selections from an
 | ||
| estimated probability density function:</p>
 | ||
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="o">-</span><span class="mf">2.1</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.3</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.4</span><span class="p">,</span> <span class="mf">1.9</span><span class="p">,</span> <span class="mf">5.1</span><span class="p">,</span> <span class="mf">6.2</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">rand</span> <span class="o">=</span> <span class="n">kde_random</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">h</span><span class="o">=</span><span class="mf">1.5</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">8675309</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">new_selections</span> <span class="o">=</span> <span class="p">[</span><span class="n">rand</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">)]</span>
 | ||
| <span class="gp">>>> </span><span class="p">[</span><span class="nb">round</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">new_selections</span><span class="p">]</span>
 | ||
| <span class="go">[0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.13.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.median">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">median</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.median" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the median (middle value) of numeric data, using the common “mean of
 | ||
| middle two” method.  If <em>data</em> is empty, <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.
 | ||
| <em>data</em> can be a sequence or iterable.</p>
 | ||
| <p>The median is a robust measure of central location and is less affected by
 | ||
| the presence of outliers.  When the number of data points is odd, the
 | ||
| middle data point is returned:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">median</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
 | ||
| <span class="go">3</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>When the number of data points is even, the median is interpolated by taking
 | ||
| the average of the two middle values:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">median</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
 | ||
| <span class="go">4.0</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>This is suited for when your data is discrete, and you don’t mind that the
 | ||
| median may not be an actual data point.</p>
 | ||
| <p>If the data is ordinal (supports order operations) but not numeric (doesn’t
 | ||
| support addition), consider using <a class="reference internal" href="#statistics.median_low" title="statistics.median_low"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_low()</span></code></a> or <a class="reference internal" href="#statistics.median_high" title="statistics.median_high"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_high()</span></code></a>
 | ||
| instead.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.median_low">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">median_low</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.median_low" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the low median of numeric data.  If <em>data</em> is empty,
 | ||
| <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.  <em>data</em> can be a sequence or iterable.</p>
 | ||
| <p>The low median is always a member of the data set.  When the number of data
 | ||
| points is odd, the middle value is returned.  When it is even, the smaller of
 | ||
| the two middle values is returned.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">median_low</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
 | ||
| <span class="go">3</span>
 | ||
| <span class="gp">>>> </span><span class="n">median_low</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
 | ||
| <span class="go">3</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Use the low median when your data are discrete and you prefer the median to
 | ||
| be an actual data point rather than interpolated.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.median_high">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">median_high</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.median_high" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the high median of data.  If <em>data</em> is empty, <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a>
 | ||
| is raised.  <em>data</em> can be a sequence or iterable.</p>
 | ||
| <p>The high median is always a member of the data set.  When the number of data
 | ||
| points is odd, the middle value is returned.  When it is even, the larger of
 | ||
| the two middle values is returned.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">median_high</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
 | ||
| <span class="go">3</span>
 | ||
| <span class="gp">>>> </span><span class="n">median_high</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
 | ||
| <span class="go">5</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Use the high median when your data are discrete and you prefer the median to
 | ||
| be an actual data point rather than interpolated.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.median_grouped">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">median_grouped</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">interval</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.median_grouped" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Estimates the median for numeric data that has been <a class="reference external" href="https://en.wikipedia.org/wiki/Data_binning">grouped or binned</a> around the midpoints
 | ||
| of consecutive, fixed-width intervals.</p>
 | ||
| <p>The <em>data</em> can be any iterable of numeric data with each value being
 | ||
| exactly the midpoint of a bin.  At least one value must be present.</p>
 | ||
| <p>The <em>interval</em> is the width of each bin.</p>
 | ||
| <p>For example, demographic information may have been summarized into
 | ||
| consecutive ten-year age groups with each group being represented
 | ||
| by the 5-year midpoints of the intervals:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">collections</span><span class="w"> </span><span class="kn">import</span> <span class="n">Counter</span>
 | ||
| <span class="gp">>>> </span><span class="n">demographics</span> <span class="o">=</span> <span class="n">Counter</span><span class="p">({</span>
 | ||
| <span class="gp">... </span>   <span class="mi">25</span><span class="p">:</span> <span class="mi">172</span><span class="p">,</span>   <span class="c1"># 20 to 30 years old</span>
 | ||
| <span class="gp">... </span>   <span class="mi">35</span><span class="p">:</span> <span class="mi">484</span><span class="p">,</span>   <span class="c1"># 30 to 40 years old</span>
 | ||
| <span class="gp">... </span>   <span class="mi">45</span><span class="p">:</span> <span class="mi">387</span><span class="p">,</span>   <span class="c1"># 40 to 50 years old</span>
 | ||
| <span class="gp">... </span>   <span class="mi">55</span><span class="p">:</span>  <span class="mi">22</span><span class="p">,</span>   <span class="c1"># 50 to 60 years old</span>
 | ||
| <span class="gp">... </span>   <span class="mi">65</span><span class="p">:</span>   <span class="mi">6</span><span class="p">,</span>   <span class="c1"># 60 to 70 years old</span>
 | ||
| <span class="gp">... </span><span class="p">})</span>
 | ||
| <span class="gp">...</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>The 50th percentile (median) is the 536th person out of the 1071
 | ||
| member cohort.  That person is in the 30 to 40 year old age group.</p>
 | ||
| <p>The regular <a class="reference internal" href="#statistics.median" title="statistics.median"><code class="xref py py-func docutils literal notranslate"><span class="pre">median()</span></code></a> function would assume that everyone in the
 | ||
| tricenarian age group was exactly 35 years old.  A more tenable
 | ||
| assumption is that the 484 members of that age group are evenly
 | ||
| distributed between 30 and 40.  For that, we use
 | ||
| <a class="reference internal" href="#statistics.median_grouped" title="statistics.median_grouped"><code class="xref py py-func docutils literal notranslate"><span class="pre">median_grouped()</span></code></a>:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">demographics</span><span class="o">.</span><span class="n">elements</span><span class="p">())</span>
 | ||
| <span class="gp">>>> </span><span class="n">median</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 | ||
| <span class="go">35</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">median_grouped</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">interval</span><span class="o">=</span><span class="mi">10</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>
 | ||
| <span class="go">37.5</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>The caller is responsible for making sure the data points are separated
 | ||
| by exact multiples of <em>interval</em>.  This is essential for getting a
 | ||
| correct result.  The function does not check this precondition.</p>
 | ||
| <p>Inputs may be any numeric type that can be coerced to a float during
 | ||
| the interpolation step.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.mode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">mode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.mode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the single most common data point from discrete or nominal <em>data</em>.
 | ||
| The mode (when it exists) is the most typical value and serves as a
 | ||
| measure of central location.</p>
 | ||
| <p>If there are multiple modes with the same frequency, returns the first one
 | ||
| encountered in the <em>data</em>.  If the smallest or largest of those is
 | ||
| desired instead, use <code class="docutils literal notranslate"><span class="pre">min(multimode(data))</span></code> or <code class="docutils literal notranslate"><span class="pre">max(multimode(data))</span></code>.
 | ||
| If the input <em>data</em> is empty, <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.</p>
 | ||
| <p><code class="docutils literal notranslate"><span class="pre">mode</span></code> assumes discrete data and returns a single value. This is the
 | ||
| standard treatment of the mode as commonly taught in schools:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">mode</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
 | ||
| <span class="go">3</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>The mode is unique in that it is the only statistic in this package that
 | ||
| also applies to nominal (non-numeric) data:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">mode</span><span class="p">([</span><span class="s2">"red"</span><span class="p">,</span> <span class="s2">"blue"</span><span class="p">,</span> <span class="s2">"blue"</span><span class="p">,</span> <span class="s2">"red"</span><span class="p">,</span> <span class="s2">"green"</span><span class="p">,</span> <span class="s2">"red"</span><span class="p">,</span> <span class="s2">"red"</span><span class="p">])</span>
 | ||
| <span class="go">'red'</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Only hashable inputs are supported.  To handle type <a class="reference internal" href="stdtypes.html#set" title="set"><code class="xref py py-class docutils literal notranslate"><span class="pre">set</span></code></a>,
 | ||
| consider casting to <a class="reference internal" href="stdtypes.html#frozenset" title="frozenset"><code class="xref py py-class docutils literal notranslate"><span class="pre">frozenset</span></code></a>.  To handle type <a class="reference internal" href="stdtypes.html#list" title="list"><code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code></a>,
 | ||
| consider casting to <a class="reference internal" href="stdtypes.html#tuple" title="tuple"><code class="xref py py-class docutils literal notranslate"><span class="pre">tuple</span></code></a>.  For mixed or nested inputs, consider
 | ||
| using this slower quadratic algorithm that only depends on equality tests:
 | ||
| <code class="docutils literal notranslate"><span class="pre">max(data,</span> <span class="pre">key=data.count)</span></code>.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.8: </span>Now handles multimodal datasets by returning the first mode encountered.
 | ||
| Formerly, it raised <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> when more than one mode was
 | ||
| found.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.multimode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">multimode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.multimode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return a list of the most frequently occurring values in the order they
 | ||
| were first encountered in the <em>data</em>.  Will return more than one result if
 | ||
| there are multiple modes or an empty list if the <em>data</em> is empty:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">multimode</span><span class="p">(</span><span class="s1">'aabbbbccddddeeffffgg'</span><span class="p">)</span>
 | ||
| <span class="go">['b', 'd', 'f']</span>
 | ||
| <span class="gp">>>> </span><span class="n">multimode</span><span class="p">(</span><span class="s1">''</span><span class="p">)</span>
 | ||
| <span class="go">[]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.8.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.pstdev">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">pstdev</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mu</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.pstdev" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the population standard deviation (the square root of the population
 | ||
| variance).  See <a class="reference internal" href="#statistics.pvariance" title="statistics.pvariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">pvariance()</span></code></a> for arguments and other details.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">pstdev</span><span class="p">([</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">,</span> <span class="mf">2.75</span><span class="p">,</span> <span class="mf">3.25</span><span class="p">,</span> <span class="mf">4.75</span><span class="p">])</span>
 | ||
| <span class="go">0.986893273527251</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.pvariance">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">pvariance</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mu</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.pvariance" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the population variance of <em>data</em>, a non-empty sequence or iterable
 | ||
| of real-valued numbers.  Variance, or second moment about the mean, is a
 | ||
| measure of the variability (spread or dispersion) of data.  A large
 | ||
| variance indicates that the data is spread out; a small variance indicates
 | ||
| it is clustered closely around the mean.</p>
 | ||
| <p>If the optional second argument <em>mu</em> is given, it should be the <em>population</em>
 | ||
| mean of the <em>data</em>.  It can also be used to compute the second moment around
 | ||
| a point that is not the mean.  If it is missing or <code class="docutils literal notranslate"><span class="pre">None</span></code> (the default),
 | ||
| the arithmetic mean is automatically calculated.</p>
 | ||
| <p>Use this function to calculate the variance from the entire population.  To
 | ||
| estimate the variance from a sample, the <a class="reference internal" href="#statistics.variance" title="statistics.variance"><code class="xref py py-func docutils literal notranslate"><span class="pre">variance()</span></code></a> function is usually
 | ||
| a better choice.</p>
 | ||
| <p>Raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> if <em>data</em> is empty.</p>
 | ||
| <p>Examples:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">1.25</span><span class="p">,</span> <span class="mf">1.5</span><span class="p">,</span> <span class="mf">1.75</span><span class="p">,</span> <span class="mf">2.75</span><span class="p">,</span> <span class="mf">3.25</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">pvariance</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 | ||
| <span class="go">1.25</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>If you have already calculated the mean of your data, you can pass it as the
 | ||
| optional second argument <em>mu</em> to avoid recalculation:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">mu</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">pvariance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">mu</span><span class="p">)</span>
 | ||
| <span class="go">1.25</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Decimals and Fractions are supported:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">decimal</span><span class="w"> </span><span class="kn">import</span> <span class="n">Decimal</span> <span class="k">as</span> <span class="n">D</span>
 | ||
| <span class="gp">>>> </span><span class="n">pvariance</span><span class="p">([</span><span class="n">D</span><span class="p">(</span><span class="s2">"27.5"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"30.25"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"30.25"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"34.5"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"41.75"</span><span class="p">)])</span>
 | ||
| <span class="go">Decimal('24.815')</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">fractions</span><span class="w"> </span><span class="kn">import</span> <span class="n">Fraction</span> <span class="k">as</span> <span class="n">F</span>
 | ||
| <span class="gp">>>> </span><span class="n">pvariance</span><span class="p">([</span><span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">)])</span>
 | ||
| <span class="go">Fraction(13, 72)</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="admonition note">
 | ||
| <p class="admonition-title">Note</p>
 | ||
| <p>When called with the entire population, this gives the population variance
 | ||
| σ².  When called on a sample instead, this is the biased sample variance
 | ||
| s², also known as variance with N degrees of freedom.</p>
 | ||
| <p>If you somehow know the true population mean μ, you may use this
 | ||
| function to calculate the variance of a sample, giving the known
 | ||
| population mean as the second argument.  Provided the data points are a
 | ||
| random sample of the population, the result will be an unbiased estimate
 | ||
| of the population variance.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.stdev">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">stdev</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">xbar</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.stdev" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the sample standard deviation (the square root of the sample
 | ||
| variance).  See <a class="reference internal" href="#statistics.variance" title="statistics.variance"><code class="xref py py-func docutils literal notranslate"><span class="pre">variance()</span></code></a> for arguments and other details.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">stdev</span><span class="p">([</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">,</span> <span class="mf">2.75</span><span class="p">,</span> <span class="mf">3.25</span><span class="p">,</span> <span class="mf">4.75</span><span class="p">])</span>
 | ||
| <span class="go">1.0810874155219827</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.variance">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">variance</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">xbar</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.variance" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the sample variance of <em>data</em>, an iterable of at least two real-valued
 | ||
| numbers.  Variance, or second moment about the mean, is a measure of the
 | ||
| variability (spread or dispersion) of data.  A large variance indicates that
 | ||
| the data is spread out; a small variance indicates it is clustered closely
 | ||
| around the mean.</p>
 | ||
| <p>If the optional second argument <em>xbar</em> is given, it should be the <em>sample</em>
 | ||
| mean of <em>data</em>.  If it is missing or <code class="docutils literal notranslate"><span class="pre">None</span></code> (the default), the mean is
 | ||
| automatically calculated.</p>
 | ||
| <p>Use this function when your data is a sample from a population. To calculate
 | ||
| the variance from the entire population, see <a class="reference internal" href="#statistics.pvariance" title="statistics.pvariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">pvariance()</span></code></a>.</p>
 | ||
| <p>Raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> if <em>data</em> has fewer than two values.</p>
 | ||
| <p>Examples:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mf">2.75</span><span class="p">,</span> <span class="mf">1.75</span><span class="p">,</span> <span class="mf">1.25</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">1.25</span><span class="p">,</span> <span class="mf">3.5</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">variance</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 | ||
| <span class="go">1.3720238095238095</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>If you have already calculated the sample mean of your data, you can pass it
 | ||
| as the optional second argument <em>xbar</em> to avoid recalculation:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">variance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">m</span><span class="p">)</span>
 | ||
| <span class="go">1.3720238095238095</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>This function does not attempt to verify that you have passed the actual mean
 | ||
| as <em>xbar</em>.  Using arbitrary values for <em>xbar</em> can lead to invalid or
 | ||
| impossible results.</p>
 | ||
| <p>Decimal and Fraction values are supported:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">decimal</span><span class="w"> </span><span class="kn">import</span> <span class="n">Decimal</span> <span class="k">as</span> <span class="n">D</span>
 | ||
| <span class="gp">>>> </span><span class="n">variance</span><span class="p">([</span><span class="n">D</span><span class="p">(</span><span class="s2">"27.5"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"30.25"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"30.25"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"34.5"</span><span class="p">),</span> <span class="n">D</span><span class="p">(</span><span class="s2">"41.75"</span><span class="p">)])</span>
 | ||
| <span class="go">Decimal('31.01875')</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">fractions</span><span class="w"> </span><span class="kn">import</span> <span class="n">Fraction</span> <span class="k">as</span> <span class="n">F</span>
 | ||
| <span class="gp">>>> </span><span class="n">variance</span><span class="p">([</span><span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="n">F</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">)])</span>
 | ||
| <span class="go">Fraction(67, 108)</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="admonition note">
 | ||
| <p class="admonition-title">Note</p>
 | ||
| <p>This is the sample variance s² with Bessel’s correction, also known as
 | ||
| variance with N-1 degrees of freedom.  Provided that the data points are
 | ||
| representative (e.g. independent and identically distributed), the result
 | ||
| should be an unbiased estimate of the true population variance.</p>
 | ||
| <p>If you somehow know the actual population mean μ you should pass it to the
 | ||
| <a class="reference internal" href="#statistics.pvariance" title="statistics.pvariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">pvariance()</span></code></a> function as the <em>mu</em> parameter to get the variance of a
 | ||
| sample.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.quantiles">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">quantiles</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'exclusive'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.quantiles" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Divide <em>data</em> into <em>n</em> continuous intervals with equal probability.
 | ||
| Returns a list of <code class="docutils literal notranslate"><span class="pre">n</span> <span class="pre">-</span> <span class="pre">1</span></code> cut points separating the intervals.</p>
 | ||
| <p>Set <em>n</em> to 4 for quartiles (the default).  Set <em>n</em> to 10 for deciles.  Set
 | ||
| <em>n</em> to 100 for percentiles which gives the 99 cuts points that separate
 | ||
| <em>data</em> into 100 equal sized groups.  Raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> if <em>n</em>
 | ||
| is not least 1.</p>
 | ||
| <p>The <em>data</em> can be any iterable containing sample data.  For meaningful
 | ||
| results, the number of data points in <em>data</em> should be larger than <em>n</em>.
 | ||
| Raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> if there is not at least one data point.</p>
 | ||
| <p>The cut points are linearly interpolated from the
 | ||
| two nearest data points.  For example, if a cut point falls one-third
 | ||
| of the distance between two sample values, <code class="docutils literal notranslate"><span class="pre">100</span></code> and <code class="docutils literal notranslate"><span class="pre">112</span></code>, the
 | ||
| cut-point will evaluate to <code class="docutils literal notranslate"><span class="pre">104</span></code>.</p>
 | ||
| <p>The <em>method</em> for computing quantiles can be varied depending on
 | ||
| whether the <em>data</em> includes or excludes the lowest and
 | ||
| highest possible values from the population.</p>
 | ||
| <p>The default <em>method</em> is “exclusive” and is used for data sampled from
 | ||
| a population that can have more extreme values than found in the
 | ||
| samples.  The portion of the population falling below the <em>i-th</em> of
 | ||
| <em>m</em> sorted data points is computed as <code class="docutils literal notranslate"><span class="pre">i</span> <span class="pre">/</span> <span class="pre">(m</span> <span class="pre">+</span> <span class="pre">1)</span></code>.  Given nine
 | ||
| sample values, the method sorts them and assigns the following
 | ||
| percentiles: 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%.</p>
 | ||
| <p>Setting the <em>method</em> to “inclusive” is used for describing population
 | ||
| data or for samples that are known to include the most extreme values
 | ||
| from the population.  The minimum value in <em>data</em> is treated as the 0th
 | ||
| percentile and the maximum value is treated as the 100th percentile.
 | ||
| The portion of the population falling below the <em>i-th</em> of <em>m</em> sorted
 | ||
| data points is computed as <code class="docutils literal notranslate"><span class="pre">(i</span> <span class="pre">-</span> <span class="pre">1)</span> <span class="pre">/</span> <span class="pre">(m</span> <span class="pre">-</span> <span class="pre">1)</span></code>.  Given 11 sample
 | ||
| values, the method sorts them and assigns the following percentiles:
 | ||
| 0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go"># Decile cut points for empirically sampled data</span>
 | ||
| <span class="gp">>>> </span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mi">105</span><span class="p">,</span> <span class="mi">129</span><span class="p">,</span> <span class="mi">87</span><span class="p">,</span> <span class="mi">86</span><span class="p">,</span> <span class="mi">111</span><span class="p">,</span> <span class="mi">111</span><span class="p">,</span> <span class="mi">89</span><span class="p">,</span> <span class="mi">81</span><span class="p">,</span> <span class="mi">108</span><span class="p">,</span> <span class="mi">92</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span>
 | ||
| <span class="gp">... </span>        <span class="mi">100</span><span class="p">,</span> <span class="mi">75</span><span class="p">,</span> <span class="mi">105</span><span class="p">,</span> <span class="mi">103</span><span class="p">,</span> <span class="mi">109</span><span class="p">,</span> <span class="mi">76</span><span class="p">,</span> <span class="mi">119</span><span class="p">,</span> <span class="mi">99</span><span class="p">,</span> <span class="mi">91</span><span class="p">,</span> <span class="mi">103</span><span class="p">,</span> <span class="mi">129</span><span class="p">,</span>
 | ||
| <span class="gp">... </span>        <span class="mi">106</span><span class="p">,</span> <span class="mi">101</span><span class="p">,</span> <span class="mi">84</span><span class="p">,</span> <span class="mi">111</span><span class="p">,</span> <span class="mi">74</span><span class="p">,</span> <span class="mi">87</span><span class="p">,</span> <span class="mi">86</span><span class="p">,</span> <span class="mi">103</span><span class="p">,</span> <span class="mi">103</span><span class="p">,</span> <span class="mi">106</span><span class="p">,</span> <span class="mi">86</span><span class="p">,</span>
 | ||
| <span class="gp">... </span>        <span class="mi">111</span><span class="p">,</span> <span class="mi">75</span><span class="p">,</span> <span class="mi">87</span><span class="p">,</span> <span class="mi">102</span><span class="p">,</span> <span class="mi">121</span><span class="p">,</span> <span class="mi">111</span><span class="p">,</span> <span class="mi">88</span><span class="p">,</span> <span class="mi">89</span><span class="p">,</span> <span class="mi">101</span><span class="p">,</span> <span class="mi">106</span><span class="p">,</span> <span class="mi">95</span><span class="p">,</span>
 | ||
| <span class="gp">... </span>        <span class="mi">103</span><span class="p">,</span> <span class="mi">107</span><span class="p">,</span> <span class="mi">101</span><span class="p">,</span> <span class="mi">81</span><span class="p">,</span> <span class="mi">109</span><span class="p">,</span> <span class="mi">104</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="p">[</span><span class="nb">round</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">q</span> <span class="ow">in</span> <span class="n">quantiles</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">n</span><span class="o">=</span><span class="mi">10</span><span class="p">)]</span>
 | ||
| <span class="go">[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.8.</span></p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.13: </span>No longer raises an exception for an input with only a single data point.
 | ||
| This allows quantile estimates to be built up one sample point
 | ||
| at a time becoming gradually more refined with each new data point.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.covariance">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">covariance</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="positional-only-separator o"><abbr title="Positional-only parameter separator (PEP 570)"><span class="pre">/</span></abbr></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.covariance" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the sample covariance of two inputs <em>x</em> and <em>y</em>. Covariance
 | ||
| is a measure of the joint variability of two inputs.</p>
 | ||
| <p>Both inputs must be of the same length (no less than two), otherwise
 | ||
| <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.</p>
 | ||
| <p>Examples:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">covariance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
 | ||
| <span class="go">0.75</span>
 | ||
| <span class="gp">>>> </span><span class="n">z</span> <span class="o">=</span> <span class="p">[</span><span class="mi">9</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">covariance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">z</span><span class="p">)</span>
 | ||
| <span class="go">-7.5</span>
 | ||
| <span class="gp">>>> </span><span class="n">covariance</span><span class="p">(</span><span class="n">z</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
 | ||
| <span class="go">-7.5</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.10.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.correlation">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">correlation</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="positional-only-separator o"><abbr title="Positional-only parameter separator (PEP 570)"><span class="pre">/</span></abbr></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'linear'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.correlation" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the <a class="reference external" href="https://en.wikipedia.org/wiki/Pearson_correlation_coefficient">Pearson’s correlation coefficient</a>
 | ||
| for two inputs. Pearson’s correlation coefficient <em>r</em> takes values
 | ||
| between -1 and +1. It measures the strength and direction of a linear
 | ||
| relationship.</p>
 | ||
| <p>If <em>method</em> is “ranked”, computes <a class="reference external" href="https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient">Spearman’s rank correlation coefficient</a>
 | ||
| for two inputs. The data is replaced by ranks.  Ties are averaged so that
 | ||
| equal values receive the same rank.  The resulting coefficient measures the
 | ||
| strength of a monotonic relationship.</p>
 | ||
| <p>Spearman’s correlation coefficient is appropriate for ordinal data or for
 | ||
| continuous data that doesn’t meet the linear proportion requirement for
 | ||
| Pearson’s correlation coefficient.</p>
 | ||
| <p>Both inputs must be of the same length (no less than two), and need
 | ||
| not to be constant, otherwise <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.</p>
 | ||
| <p>Example with <a class="reference external" href="https://en.wikipedia.org/wiki/Kepler's_laws_of_planetary_motion">Kepler’s laws of planetary motion</a>:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="c1"># Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and  Neptune</span>
 | ||
| <span class="gp">>>> </span><span class="n">orbital_period</span> <span class="o">=</span> <span class="p">[</span><span class="mi">88</span><span class="p">,</span> <span class="mi">225</span><span class="p">,</span> <span class="mi">365</span><span class="p">,</span> <span class="mi">687</span><span class="p">,</span> <span class="mi">4331</span><span class="p">,</span> <span class="mi">10_756</span><span class="p">,</span> <span class="mi">30_687</span><span class="p">,</span> <span class="mi">60_190</span><span class="p">]</span>    <span class="c1"># days</span>
 | ||
| <span class="gp">>>> </span><span class="n">dist_from_sun</span> <span class="o">=</span> <span class="p">[</span><span class="mi">58</span><span class="p">,</span> <span class="mi">108</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">228</span><span class="p">,</span> <span class="mi">778</span><span class="p">,</span> <span class="mi">1_400</span><span class="p">,</span> <span class="mi">2_900</span><span class="p">,</span> <span class="mi">4_500</span><span class="p">]</span> <span class="c1"># million km</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Show that a perfect monotonic relationship exists</span>
 | ||
| <span class="gp">>>> </span><span class="n">correlation</span><span class="p">(</span><span class="n">orbital_period</span><span class="p">,</span> <span class="n">dist_from_sun</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'ranked'</span><span class="p">)</span>
 | ||
| <span class="go">1.0</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Observe that a linear relationship is imperfect</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">correlation</span><span class="p">(</span><span class="n">orbital_period</span><span class="p">,</span> <span class="n">dist_from_sun</span><span class="p">),</span> <span class="mi">4</span><span class="p">)</span>
 | ||
| <span class="go">0.9882</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Demonstrate Kepler's third law: There is a linear correlation</span>
 | ||
| <span class="gp">>>> </span><span class="c1"># between the square of the orbital period and the cube of the</span>
 | ||
| <span class="gp">>>> </span><span class="c1"># distance from the sun.</span>
 | ||
| <span class="gp">>>> </span><span class="n">period_squared</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span> <span class="o">*</span> <span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">orbital_period</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">dist_cubed</span> <span class="o">=</span> <span class="p">[</span><span class="n">d</span> <span class="o">*</span> <span class="n">d</span> <span class="o">*</span> <span class="n">d</span> <span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">dist_from_sun</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">correlation</span><span class="p">(</span><span class="n">period_squared</span><span class="p">,</span> <span class="n">dist_cubed</span><span class="p">),</span> <span class="mi">4</span><span class="p">)</span>
 | ||
| <span class="go">1.0</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.10.</span></p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.12: </span>Added support for Spearman’s rank correlation coefficient.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="statistics.linear_regression">
 | ||
| <span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">linear_regression</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em>, <em class="sig-param"><span class="positional-only-separator o"><abbr title="Positional-only parameter separator (PEP 570)"><span class="pre">/</span></abbr></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">proportional</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.linear_regression" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the slope and intercept of <a class="reference external" href="https://en.wikipedia.org/wiki/Simple_linear_regression">simple linear regression</a>
 | ||
| parameters estimated using ordinary least squares. Simple linear
 | ||
| regression describes the relationship between an independent variable <em>x</em> and
 | ||
| a dependent variable <em>y</em> in terms of this linear function:</p>
 | ||
| <blockquote>
 | ||
| <div><p><em>y = slope * x + intercept + noise</em></p>
 | ||
| </div></blockquote>
 | ||
| <p>where <code class="docutils literal notranslate"><span class="pre">slope</span></code> and <code class="docutils literal notranslate"><span class="pre">intercept</span></code> are the regression parameters that are
 | ||
| estimated, and <code class="docutils literal notranslate"><span class="pre">noise</span></code> represents the
 | ||
| variability of the data that was not explained by the linear regression
 | ||
| (it is equal to the difference between predicted and actual values
 | ||
| of the dependent variable).</p>
 | ||
| <p>Both inputs must be of the same length (no less than two), and
 | ||
| the independent variable <em>x</em> cannot be constant;
 | ||
| otherwise a <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> is raised.</p>
 | ||
| <p>For example, we can use the <a class="reference external" href="https://en.wikipedia.org/wiki/Monty_Python#Films">release dates of the Monty
 | ||
| Python films</a>
 | ||
| to predict the cumulative number of Monty Python films
 | ||
| that would have been produced by 2019
 | ||
| assuming that they had kept the pace.</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">year</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1971</span><span class="p">,</span> <span class="mi">1975</span><span class="p">,</span> <span class="mi">1979</span><span class="p">,</span> <span class="mi">1982</span><span class="p">,</span> <span class="mi">1983</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">films_total</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="n">slope</span><span class="p">,</span> <span class="n">intercept</span> <span class="o">=</span> <span class="n">linear_regression</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">films_total</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">slope</span> <span class="o">*</span> <span class="mi">2019</span> <span class="o">+</span> <span class="n">intercept</span><span class="p">)</span>
 | ||
| <span class="go">16</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>If <em>proportional</em> is true, the independent variable <em>x</em> and the
 | ||
| dependent variable <em>y</em> are assumed to be directly proportional.
 | ||
| The data is fit to a line passing through the origin.
 | ||
| Since the <em>intercept</em> will always be 0.0, the underlying linear
 | ||
| function simplifies to:</p>
 | ||
| <blockquote>
 | ||
| <div><p><em>y = slope * x + noise</em></p>
 | ||
| </div></blockquote>
 | ||
| <p>Continuing the example from <a class="reference internal" href="#statistics.correlation" title="statistics.correlation"><code class="xref py py-func docutils literal notranslate"><span class="pre">correlation()</span></code></a>, we look to see
 | ||
| how well a model based on major planets can predict the orbital
 | ||
| distances for dwarf planets:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">model</span> <span class="o">=</span> <span class="n">linear_regression</span><span class="p">(</span><span class="n">period_squared</span><span class="p">,</span> <span class="n">dist_cubed</span><span class="p">,</span> <span class="n">proportional</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">slope</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">slope</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Dwarf planets:   Pluto,  Eris,    Makemake, Haumea, Ceres</span>
 | ||
| <span class="gp">>>> </span><span class="n">orbital_periods</span> <span class="o">=</span> <span class="p">[</span><span class="mi">90_560</span><span class="p">,</span> <span class="mi">204_199</span><span class="p">,</span> <span class="mi">111_845</span><span class="p">,</span> <span class="mi">103_410</span><span class="p">,</span> <span class="mi">1_680</span><span class="p">]</span>  <span class="c1"># days</span>
 | ||
| <span class="gp">>>> </span><span class="n">predicted_dist</span> <span class="o">=</span> <span class="p">[</span><span class="n">math</span><span class="o">.</span><span class="n">cbrt</span><span class="p">(</span><span class="n">slope</span> <span class="o">*</span> <span class="p">(</span><span class="n">p</span> <span class="o">*</span> <span class="n">p</span><span class="p">))</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">orbital_periods</span><span class="p">]</span>
 | ||
| <span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">round</span><span class="p">,</span> <span class="n">predicted_dist</span><span class="p">))</span>
 | ||
| <span class="go">[5912, 10166, 6806, 6459, 414]</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="p">[</span><span class="mi">5_906</span><span class="p">,</span> <span class="mi">10_152</span><span class="p">,</span> <span class="mi">6_796</span><span class="p">,</span> <span class="mi">6_450</span><span class="p">,</span> <span class="mi">414</span><span class="p">]</span>  <span class="c1"># actual distance in million km</span>
 | ||
| <span class="go">[5906, 10152, 6796, 6450, 414]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.10.</span></p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.11: </span>Added support for <em>proportional</em>.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="exceptions">
 | ||
| <h2>Exceptions<a class="headerlink" href="#exceptions" title="Link to this heading">¶</a></h2>
 | ||
| <p>A single exception is defined:</p>
 | ||
| <dl class="py exception">
 | ||
| <dt class="sig sig-object py" id="statistics.StatisticsError">
 | ||
| <em class="property"><span class="k"><span class="pre">exception</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">StatisticsError</span></span><a class="headerlink" href="#statistics.StatisticsError" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Subclass of <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> for statistics-related exceptions.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="normaldist-objects">
 | ||
| <h2><a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> objects<a class="headerlink" href="#normaldist-objects" title="Link to this heading">¶</a></h2>
 | ||
| <p><a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> is a tool for creating and manipulating normal
 | ||
| distributions of a <a class="reference external" href="http://www.stat.yale.edu/Courses/1997-98/101/ranvar.htm">random variable</a>.  It is a
 | ||
| class that treats the mean and standard deviation of data
 | ||
| measurements as a single entity.</p>
 | ||
| <p>Normal distributions arise from the <a class="reference external" href="https://en.wikipedia.org/wiki/Central_limit_theorem">Central Limit Theorem</a> and have a wide range
 | ||
| of applications in statistics.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">statistics.</span></span><span class="sig-name descname"><span class="pre">NormalDist</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">mu</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sigma</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Returns a new <em>NormalDist</em> object where <em>mu</em> represents the <a class="reference external" href="https://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic
 | ||
| mean</a> and <em>sigma</em>
 | ||
| represents the <a class="reference external" href="https://en.wikipedia.org/wiki/Standard_deviation">standard deviation</a>.</p>
 | ||
| <p>If <em>sigma</em> is negative, raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a>.</p>
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.mean">
 | ||
| <span class="sig-name descname"><span class="pre">mean</span></span><a class="headerlink" href="#statistics.NormalDist.mean" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>A read-only property for the <a class="reference external" href="https://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of a normal
 | ||
| distribution.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.median">
 | ||
| <span class="sig-name descname"><span class="pre">median</span></span><a class="headerlink" href="#statistics.NormalDist.median" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>A read-only property for the <a class="reference external" href="https://en.wikipedia.org/wiki/Median">median</a> of a normal
 | ||
| distribution.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.mode">
 | ||
| <span class="sig-name descname"><span class="pre">mode</span></span><a class="headerlink" href="#statistics.NormalDist.mode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>A read-only property for the <a class="reference external" href="https://en.wikipedia.org/wiki/Mode_(statistics)">mode</a> of a normal
 | ||
| distribution.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.stdev">
 | ||
| <span class="sig-name descname"><span class="pre">stdev</span></span><a class="headerlink" href="#statistics.NormalDist.stdev" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>A read-only property for the <a class="reference external" href="https://en.wikipedia.org/wiki/Standard_deviation">standard deviation</a> of a normal
 | ||
| distribution.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.variance">
 | ||
| <span class="sig-name descname"><span class="pre">variance</span></span><a class="headerlink" href="#statistics.NormalDist.variance" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>A read-only property for the <a class="reference external" href="https://en.wikipedia.org/wiki/Variance">variance</a> of a normal
 | ||
| distribution. Equal to the square of the standard deviation.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.from_samples">
 | ||
| <em class="property"><span class="k"><span class="pre">classmethod</span></span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_samples</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.from_samples" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Makes a normal distribution instance with <em>mu</em> and <em>sigma</em> parameters
 | ||
| estimated from the <em>data</em> using <a class="reference internal" href="#statistics.fmean" title="statistics.fmean"><code class="xref py py-func docutils literal notranslate"><span class="pre">fmean()</span></code></a> and <a class="reference internal" href="#statistics.stdev" title="statistics.stdev"><code class="xref py py-func docutils literal notranslate"><span class="pre">stdev()</span></code></a>.</p>
 | ||
| <p>The <em>data</em> can be any <a class="reference internal" href="../glossary.html#term-iterable"><span class="xref std std-term">iterable</span></a> and should consist of values
 | ||
| that can be converted to type <a class="reference internal" href="functions.html#float" title="float"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a>.  If <em>data</em> does not
 | ||
| contain at least two elements, raises <a class="reference internal" href="#statistics.StatisticsError" title="statistics.StatisticsError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">StatisticsError</span></code></a> because it
 | ||
| takes at least one point to estimate a central value and at least two
 | ||
| points to estimate dispersion.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.samples">
 | ||
| <span class="sig-name descname"><span class="pre">samples</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span></em>, <em class="sig-param"><span class="keyword-only-separator o"><abbr title="Keyword-only parameters separator (PEP 3102)"><span class="pre">*</span></abbr></span></em>, <em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.samples" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Generates <em>n</em> random samples for a given mean and standard deviation.
 | ||
| Returns a <a class="reference internal" href="stdtypes.html#list" title="list"><code class="xref py py-class docutils literal notranslate"><span class="pre">list</span></code></a> of <a class="reference internal" href="functions.html#float" title="float"><code class="xref py py-class docutils literal notranslate"><span class="pre">float</span></code></a> values.</p>
 | ||
| <p>If <em>seed</em> is given, creates a new instance of the underlying random
 | ||
| number generator.  This is useful for creating reproducible results,
 | ||
| even in a multi-threading context.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.13.</span></p>
 | ||
| </div>
 | ||
| <p>Switched to a faster algorithm.  To reproduce samples from previous
 | ||
| versions, use <a class="reference internal" href="random.html#random.seed" title="random.seed"><code class="xref py py-func docutils literal notranslate"><span class="pre">random.seed()</span></code></a> and <a class="reference internal" href="random.html#random.gauss" title="random.gauss"><code class="xref py py-func docutils literal notranslate"><span class="pre">random.gauss()</span></code></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.pdf">
 | ||
| <span class="sig-name descname"><span class="pre">pdf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.pdf" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Using a <a class="reference external" href="https://en.wikipedia.org/wiki/Probability_density_function">probability density function (pdf)</a>, compute
 | ||
| the relative likelihood that a random variable <em>X</em> will be near the
 | ||
| given value <em>x</em>.  Mathematically, it is the limit of the ratio <code class="docutils literal notranslate"><span class="pre">P(x</span> <span class="pre"><=</span>
 | ||
| <span class="pre">X</span> <span class="pre"><</span> <span class="pre">x+dx)</span> <span class="pre">/</span> <span class="pre">dx</span></code> as <em>dx</em> approaches zero.</p>
 | ||
| <p>The relative likelihood is computed as the probability of a sample
 | ||
| occurring in a narrow range divided by the width of the range (hence
 | ||
| the word “density”).  Since the likelihood is relative to other points,
 | ||
| its value can be greater than <code class="docutils literal notranslate"><span class="pre">1.0</span></code>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.cdf">
 | ||
| <span class="sig-name descname"><span class="pre">cdf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.cdf" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Using a <a class="reference external" href="https://en.wikipedia.org/wiki/Cumulative_distribution_function">cumulative distribution function (cdf)</a>,
 | ||
| compute the probability that a random variable <em>X</em> will be less than or
 | ||
| equal to <em>x</em>.  Mathematically, it is written <code class="docutils literal notranslate"><span class="pre">P(X</span> <span class="pre"><=</span> <span class="pre">x)</span></code>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.inv_cdf">
 | ||
| <span class="sig-name descname"><span class="pre">inv_cdf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.inv_cdf" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Compute the inverse cumulative distribution function, also known as the
 | ||
| <a class="reference external" href="https://en.wikipedia.org/wiki/Quantile_function">quantile function</a>
 | ||
| or the <a class="reference external" href="https://web.archive.org/web/20190203145224/https://www.statisticshowto.datasciencecentral.com/inverse-distribution-function/">percent-point</a>
 | ||
| function.  Mathematically, it is written <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">:</span> <span class="pre">P(X</span> <span class="pre"><=</span> <span class="pre">x)</span> <span class="pre">=</span> <span class="pre">p</span></code>.</p>
 | ||
| <p>Finds the value <em>x</em> of the random variable <em>X</em> such that the
 | ||
| probability of the variable being less than or equal to that value
 | ||
| equals the given probability <em>p</em>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.overlap">
 | ||
| <span class="sig-name descname"><span class="pre">overlap</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.overlap" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Measures the agreement between two normal probability distributions.
 | ||
| Returns a value between 0.0 and 1.0 giving <a class="reference external" href="https://www.rasch.org/rmt/rmt101r.htm">the overlapping area for
 | ||
| the two probability density functions</a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.quantiles">
 | ||
| <span class="sig-name descname"><span class="pre">quantiles</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">4</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.quantiles" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Divide the normal distribution into <em>n</em> continuous intervals with
 | ||
| equal probability.  Returns a list of (n - 1) cut points separating
 | ||
| the intervals.</p>
 | ||
| <p>Set <em>n</em> to 4 for quartiles (the default).  Set <em>n</em> to 10 for deciles.
 | ||
| Set <em>n</em> to 100 for percentiles which gives the 99 cuts points that
 | ||
| separate the normal distribution into 100 equal sized groups.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="statistics.NormalDist.zscore">
 | ||
| <span class="sig-name descname"><span class="pre">zscore</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#statistics.NormalDist.zscore" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Compute the
 | ||
| <a class="reference external" href="https://www.statisticshowto.com/probability-and-statistics/z-score/">Standard Score</a>
 | ||
| describing <em>x</em> in terms of the number of standard deviations
 | ||
| above or below the mean of the normal distribution:
 | ||
| <code class="docutils literal notranslate"><span class="pre">(x</span> <span class="pre">-</span> <span class="pre">mean)</span> <span class="pre">/</span> <span class="pre">stdev</span></code>.</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.9.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>Instances of <a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> support addition, subtraction,
 | ||
| multiplication and division by a constant.  These operations
 | ||
| are used for translation and scaling.  For example:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">temperature_february</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">)</span>             <span class="c1"># Celsius</span>
 | ||
| <span class="gp">>>> </span><span class="n">temperature_february</span> <span class="o">*</span> <span class="p">(</span><span class="mi">9</span><span class="o">/</span><span class="mi">5</span><span class="p">)</span> <span class="o">+</span> <span class="mi">32</span>                     <span class="c1"># Fahrenheit</span>
 | ||
| <span class="go">NormalDist(mu=41.0, sigma=4.5)</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Dividing a constant by an instance of <a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> is not supported
 | ||
| because the result wouldn’t be normally distributed.</p>
 | ||
| <p>Since normal distributions arise from additive effects of independent
 | ||
| variables, it is possible to <a class="reference external" href="https://en.wikipedia.org/wiki/Sum_of_normally_distributed_random_variables">add and subtract two independent normally
 | ||
| distributed random variables</a>
 | ||
| represented as instances of <a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a>.  For example:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">birth_weights</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mf">2.5</span><span class="p">,</span> <span class="mf">3.1</span><span class="p">,</span> <span class="mf">2.1</span><span class="p">,</span> <span class="mf">2.4</span><span class="p">,</span> <span class="mf">2.7</span><span class="p">,</span> <span class="mf">3.5</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">drug_effects</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mf">0.4</span><span class="p">,</span> <span class="mf">0.15</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">combined</span> <span class="o">=</span> <span class="n">birth_weights</span> <span class="o">+</span> <span class="n">drug_effects</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">combined</span><span class="o">.</span><span class="n">mean</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
 | ||
| <span class="go">3.1</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">combined</span><span class="o">.</span><span class="n">stdev</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
 | ||
| <span class="go">0.5</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.8.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="examples-and-recipes">
 | ||
| <h2>Examples and Recipes<a class="headerlink" href="#examples-and-recipes" title="Link to this heading">¶</a></h2>
 | ||
| <section id="classic-probability-problems">
 | ||
| <h3>Classic probability problems<a class="headerlink" href="#classic-probability-problems" title="Link to this heading">¶</a></h3>
 | ||
| <p><a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> readily solves classic probability problems.</p>
 | ||
| <p>For example, given <a class="reference external" href="https://nces.ed.gov/programs/digest/d17/tables/dt17_226.40.asp">historical data for SAT exams</a> showing
 | ||
| that scores are normally distributed with a mean of 1060 and a standard
 | ||
| deviation of 195, determine the percentage of students with test scores
 | ||
| between 1100 and 1200, after rounding to the nearest whole number:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">sat</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mi">1060</span><span class="p">,</span> <span class="mi">195</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">fraction</span> <span class="o">=</span> <span class="n">sat</span><span class="o">.</span><span class="n">cdf</span><span class="p">(</span><span class="mi">1200</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">-</span> <span class="n">sat</span><span class="o">.</span><span class="n">cdf</span><span class="p">(</span><span class="mi">1100</span> <span class="o">-</span> <span class="mf">0.5</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">fraction</span> <span class="o">*</span> <span class="mf">100.0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
 | ||
| <span class="go">18.4</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Find the <a class="reference external" href="https://en.wikipedia.org/wiki/Quartile">quartiles</a> and <a class="reference external" href="https://en.wikipedia.org/wiki/Decile">deciles</a> for the SAT scores:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">round</span><span class="p">,</span> <span class="n">sat</span><span class="o">.</span><span class="n">quantiles</span><span class="p">()))</span>
 | ||
| <span class="go">[928, 1060, 1192]</span>
 | ||
| <span class="gp">>>> </span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">round</span><span class="p">,</span> <span class="n">sat</span><span class="o">.</span><span class="n">quantiles</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">10</span><span class="p">)))</span>
 | ||
| <span class="go">[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="monte-carlo-inputs-for-simulations">
 | ||
| <h3>Monte Carlo inputs for simulations<a class="headerlink" href="#monte-carlo-inputs-for-simulations" title="Link to this heading">¶</a></h3>
 | ||
| <p>To estimate the distribution for a model that isn’t easy to solve
 | ||
| analytically, <a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a> can generate input samples for a <a class="reference external" href="https://en.wikipedia.org/wiki/Monte_Carlo_method">Monte
 | ||
| Carlo simulation</a>:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">def</span><span class="w"> </span><span class="nf">model</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">z</span><span class="p">):</span>
 | ||
| <span class="gp">... </span>    <span class="k">return</span> <span class="p">(</span><span class="mi">3</span><span class="o">*</span><span class="n">x</span> <span class="o">+</span> <span class="mi">7</span><span class="o">*</span><span class="n">x</span><span class="o">*</span><span class="n">y</span> <span class="o">-</span> <span class="mi">5</span><span class="o">*</span><span class="n">y</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="mi">11</span> <span class="o">*</span> <span class="n">z</span><span class="p">)</span>
 | ||
| <span class="gp">...</span>
 | ||
| <span class="gp">>>> </span><span class="n">n</span> <span class="o">=</span> <span class="mi">100_000</span>
 | ||
| <span class="gp">>>> </span><span class="n">X</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mf">2.5</span><span class="p">)</span><span class="o">.</span><span class="n">samples</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">3652260728</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">Y</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mi">15</span><span class="p">,</span> <span class="mf">1.75</span><span class="p">)</span><span class="o">.</span><span class="n">samples</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">4582495471</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">Z</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="mf">1.25</span><span class="p">)</span><span class="o">.</span><span class="n">samples</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="mi">6582483453</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">quantiles</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">Z</span><span class="p">))</span>
 | ||
| <span class="go">[1.4591308524824727, 1.8035946855390597, 2.175091447274739]</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="approximating-binomial-distributions">
 | ||
| <h3>Approximating binomial distributions<a class="headerlink" href="#approximating-binomial-distributions" title="Link to this heading">¶</a></h3>
 | ||
| <p>Normal distributions can be used to approximate <a class="reference external" href="https://mathworld.wolfram.com/BinomialDistribution.html">Binomial
 | ||
| distributions</a>
 | ||
| when the sample size is large and when the probability of a successful
 | ||
| trial is near 50%.</p>
 | ||
| <p>For example, an open source conference has 750 attendees and two rooms with a
 | ||
| 500 person capacity.  There is a talk about Python and another about Ruby.
 | ||
| In previous conferences, 65% of the attendees preferred to listen to Python
 | ||
| talks.  Assuming the population preferences haven’t changed, what is the
 | ||
| probability that the Python room will stay within its capacity limits?</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">n</span> <span class="o">=</span> <span class="mi">750</span>             <span class="c1"># Sample size</span>
 | ||
| <span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="mf">0.65</span>            <span class="c1"># Preference for Python</span>
 | ||
| <span class="gp">>>> </span><span class="n">q</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">-</span> <span class="n">p</span>         <span class="c1"># Preference for Ruby</span>
 | ||
| <span class="gp">>>> </span><span class="n">k</span> <span class="o">=</span> <span class="mi">500</span>             <span class="c1"># Room capacity</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Approximation using the cumulative normal distribution</span>
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">math</span><span class="w"> </span><span class="kn">import</span> <span class="n">sqrt</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">NormalDist</span><span class="p">(</span><span class="n">mu</span><span class="o">=</span><span class="n">n</span><span class="o">*</span><span class="n">p</span><span class="p">,</span> <span class="n">sigma</span><span class="o">=</span><span class="n">sqrt</span><span class="p">(</span><span class="n">n</span><span class="o">*</span><span class="n">p</span><span class="o">*</span><span class="n">q</span><span class="p">))</span><span class="o">.</span><span class="n">cdf</span><span class="p">(</span><span class="n">k</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">),</span> <span class="mi">4</span><span class="p">)</span>
 | ||
| <span class="go">0.8402</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Exact solution using the cumulative binomial distribution</span>
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">math</span><span class="w"> </span><span class="kn">import</span> <span class="n">comb</span><span class="p">,</span> <span class="n">fsum</span>
 | ||
| <span class="gp">>>> </span><span class="nb">round</span><span class="p">(</span><span class="n">fsum</span><span class="p">(</span><span class="n">comb</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">r</span><span class="p">)</span> <span class="o">*</span> <span class="n">p</span><span class="o">**</span><span class="n">r</span> <span class="o">*</span> <span class="n">q</span><span class="o">**</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="n">r</span><span class="p">)</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">k</span><span class="o">+</span><span class="mi">1</span><span class="p">)),</span> <span class="mi">4</span><span class="p">)</span>
 | ||
| <span class="go">0.8402</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="c1"># Approximation using a simulation</span>
 | ||
| <span class="gp">>>> </span><span class="kn">from</span><span class="w"> </span><span class="nn">random</span><span class="w"> </span><span class="kn">import</span> <span class="n">seed</span><span class="p">,</span> <span class="n">binomialvariate</span>
 | ||
| <span class="gp">>>> </span><span class="n">seed</span><span class="p">(</span><span class="mi">8675309</span><span class="p">)</span>
 | ||
| <span class="gp">>>> </span><span class="n">mean</span><span class="p">(</span><span class="n">binomialvariate</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span> <span class="o"><=</span> <span class="n">k</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10_000</span><span class="p">))</span>
 | ||
| <span class="go">0.8406</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="naive-bayesian-classifier">
 | ||
| <h3>Naive bayesian classifier<a class="headerlink" href="#naive-bayesian-classifier" title="Link to this heading">¶</a></h3>
 | ||
| <p>Normal distributions commonly arise in machine learning problems.</p>
 | ||
| <p>Wikipedia has a <a class="reference external" href="https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Person_classification">nice example of a Naive Bayesian Classifier</a>.
 | ||
| The challenge is to predict a person’s gender from measurements of normally
 | ||
| distributed features including height, weight, and foot size.</p>
 | ||
| <p>We’re given a training dataset with measurements for eight people.  The
 | ||
| measurements are assumed to be normally distributed, so we summarize the data
 | ||
| with <a class="reference internal" href="#statistics.NormalDist" title="statistics.NormalDist"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code></a>:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">height_male</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">6</span><span class="p">,</span> <span class="mf">5.92</span><span class="p">,</span> <span class="mf">5.58</span><span class="p">,</span> <span class="mf">5.92</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">height_female</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mf">5.5</span><span class="p">,</span> <span class="mf">5.42</span><span class="p">,</span> <span class="mf">5.75</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">weight_male</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">180</span><span class="p">,</span> <span class="mi">190</span><span class="p">,</span> <span class="mi">170</span><span class="p">,</span> <span class="mi">165</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">weight_female</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">130</span><span class="p">,</span> <span class="mi">150</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">foot_size_male</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">12</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">10</span><span class="p">])</span>
 | ||
| <span class="gp">>>> </span><span class="n">foot_size_female</span> <span class="o">=</span> <span class="n">NormalDist</span><span class="o">.</span><span class="n">from_samples</span><span class="p">([</span><span class="mi">6</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">9</span><span class="p">])</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Next, we encounter a new person whose feature measurements are known but whose
 | ||
| gender is unknown:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">ht</span> <span class="o">=</span> <span class="mf">6.0</span>        <span class="c1"># height</span>
 | ||
| <span class="gp">>>> </span><span class="n">wt</span> <span class="o">=</span> <span class="mi">130</span>        <span class="c1"># weight</span>
 | ||
| <span class="gp">>>> </span><span class="n">fs</span> <span class="o">=</span> <span class="mi">8</span>          <span class="c1"># foot size</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>Starting with a 50% <a class="reference external" href="https://en.wikipedia.org/wiki/Prior_probability">prior probability</a> of being male or female,
 | ||
| we compute the posterior as the prior times the product of likelihoods for the
 | ||
| feature measurements given the gender:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">prior_male</span> <span class="o">=</span> <span class="mf">0.5</span>
 | ||
| <span class="gp">>>> </span><span class="n">prior_female</span> <span class="o">=</span> <span class="mf">0.5</span>
 | ||
| <span class="gp">>>> </span><span class="n">posterior_male</span> <span class="o">=</span> <span class="p">(</span><span class="n">prior_male</span> <span class="o">*</span> <span class="n">height_male</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">ht</span><span class="p">)</span> <span class="o">*</span>
 | ||
| <span class="gp">... </span>                  <span class="n">weight_male</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">wt</span><span class="p">)</span> <span class="o">*</span> <span class="n">foot_size_male</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">fs</span><span class="p">))</span>
 | ||
| 
 | ||
| <span class="gp">>>> </span><span class="n">posterior_female</span> <span class="o">=</span> <span class="p">(</span><span class="n">prior_female</span> <span class="o">*</span> <span class="n">height_female</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">ht</span><span class="p">)</span> <span class="o">*</span>
 | ||
| <span class="gp">... </span>                    <span class="n">weight_female</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">wt</span><span class="p">)</span> <span class="o">*</span> <span class="n">foot_size_female</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">fs</span><span class="p">))</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p>The final prediction goes to the largest posterior. This is known as the
 | ||
| <a class="reference external" href="https://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation">maximum a posteriori</a> or MAP:</p>
 | ||
| <div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="s1">'male'</span> <span class="k">if</span> <span class="n">posterior_male</span> <span class="o">></span> <span class="n">posterior_female</span> <span class="k">else</span> <span class="s1">'female'</span>
 | ||
| <span class="go">'female'</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| </section>
 | ||
| </section>
 | ||
| </section>
 | ||
| 
 | ||
| 
 | ||
|             <div class="clearer"></div>
 | ||
|           </div>
 | ||
|         </div>
 | ||
|       </div>
 | ||
|       <div class="sphinxsidebar" role="navigation" aria-label="Main">
 | ||
|         <div class="sphinxsidebarwrapper">
 | ||
|   <div>
 | ||
|     <h3><a href="../contents.html">Table of Contents</a></h3>
 | ||
|     <ul>
 | ||
| <li><a class="reference internal" href="#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">statistics</span></code> — Mathematical statistics functions</a><ul>
 | ||
| <li><a class="reference internal" href="#averages-and-measures-of-central-location">Averages and measures of central location</a></li>
 | ||
| <li><a class="reference internal" href="#measures-of-spread">Measures of spread</a></li>
 | ||
| <li><a class="reference internal" href="#statistics-for-relations-between-two-inputs">Statistics for relations between two inputs</a></li>
 | ||
| <li><a class="reference internal" href="#function-details">Function details</a></li>
 | ||
| <li><a class="reference internal" href="#exceptions">Exceptions</a></li>
 | ||
| <li><a class="reference internal" href="#normaldist-objects"><code class="xref py py-class docutils literal notranslate"><span class="pre">NormalDist</span></code> objects</a></li>
 | ||
| <li><a class="reference internal" href="#examples-and-recipes">Examples and Recipes</a><ul>
 | ||
| <li><a class="reference internal" href="#classic-probability-problems">Classic probability problems</a></li>
 | ||
| <li><a class="reference internal" href="#monte-carlo-inputs-for-simulations">Monte Carlo inputs for simulations</a></li>
 | ||
| <li><a class="reference internal" href="#approximating-binomial-distributions">Approximating binomial distributions</a></li>
 | ||
| <li><a class="reference internal" href="#naive-bayesian-classifier">Naive bayesian classifier</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| 
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Previous topic</h4>
 | ||
|     <p class="topless"><a href="random.html"
 | ||
|                           title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">random</span></code> — Generate pseudo-random numbers</a></p>
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Next topic</h4>
 | ||
|     <p class="topless"><a href="functional.html"
 | ||
|                           title="next chapter">Functional Programming Modules</a></p>
 | ||
|   </div>
 | ||
|   <div role="note" aria-label="source link">
 | ||
|     <h3>This Page</h3>
 | ||
|     <ul class="this-page-menu">
 | ||
|       <li><a href="../bugs.html">Report a Bug</a></li>
 | ||
|       <li>
 | ||
|         <a href="https://github.com/python/cpython/blob/main/Doc/library/statistics.rst"
 | ||
|             rel="nofollow">Show Source
 | ||
|         </a>
 | ||
|       </li>
 | ||
|     </ul>
 | ||
|   </div>
 | ||
|         </div>
 | ||
| <div id="sidebarbutton" title="Collapse sidebar">
 | ||
| <span>«</span>
 | ||
| </div>
 | ||
| 
 | ||
|       </div>
 | ||
|       <div class="clearer"></div>
 | ||
|     </div>  
 | ||
|     <div class="related" role="navigation" aria-label="Related">
 | ||
|       <h3>Navigation</h3>
 | ||
|       <ul>
 | ||
|         <li class="right" style="margin-right: 10px">
 | ||
|           <a href="../genindex.html" title="General Index"
 | ||
|              >index</a></li>
 | ||
|         <li class="right" >
 | ||
|           <a href="../py-modindex.html" title="Python Module Index"
 | ||
|              >modules</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="functional.html" title="Functional Programming Modules"
 | ||
|              >next</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="random.html" title="random — Generate pseudo-random numbers"
 | ||
|              >previous</a> |</li>
 | ||
| 
 | ||
|           <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | ||
|           <li><a href="https://www.python.org/">Python</a> »</li>
 | ||
|           <li class="switchers">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             <div class="version_switcher_placeholder"></div>
 | ||
|           </li>
 | ||
|           <li>
 | ||
|               
 | ||
|           </li>
 | ||
|     <li id="cpython-language-and-version">
 | ||
|       <a href="../index.html">3.13.3 Documentation</a> »
 | ||
|     </li>
 | ||
| 
 | ||
|           <li class="nav-item nav-item-1"><a href="index.html" >The Python Standard Library</a> »</li>
 | ||
|           <li class="nav-item nav-item-2"><a href="numeric.html" >Numeric and Mathematical Modules</a> »</li>
 | ||
|         <li class="nav-item nav-item-this"><a href=""><code class="xref py py-mod docutils literal notranslate"><span class="pre">statistics</span></code> — Mathematical statistics functions</a></li>
 | ||
|                 <li class="right">
 | ||
|                     
 | ||
| 
 | ||
|     <div class="inline-search" role="search">
 | ||
|         <form class="inline-search" action="../search.html" method="get">
 | ||
|           <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | ||
|           <input type="submit" value="Go" />
 | ||
|         </form>
 | ||
|     </div>
 | ||
|                      |
 | ||
|                 </li>
 | ||
|             <li class="right">
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label> |</li>
 | ||
|             
 | ||
|       </ul>
 | ||
|     </div>  
 | ||
|     <div class="footer">
 | ||
|     © 
 | ||
|       <a href="../copyright.html">
 | ||
|     
 | ||
|     Copyright
 | ||
|     
 | ||
|       </a>
 | ||
|      2001-2025, Python Software Foundation.
 | ||
|     <br />
 | ||
|     This page is licensed under the Python Software Foundation License Version 2.
 | ||
|     <br />
 | ||
|     Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License.
 | ||
|     <br />
 | ||
|     
 | ||
|       See <a href="/license.html">History and License</a> for more information.<br />
 | ||
|     
 | ||
|     
 | ||
|     <br />
 | ||
| 
 | ||
|     The Python Software Foundation is a non-profit corporation.
 | ||
| <a href="https://www.python.org/psf/donations/">Please donate.</a>
 | ||
| <br />
 | ||
|     <br />
 | ||
|       Last updated on Apr 08, 2025 (14:33 UTC).
 | ||
|     
 | ||
|       <a href="/bugs.html">Found a bug</a>?
 | ||
|     
 | ||
|     <br />
 | ||
| 
 | ||
|     Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
 | ||
|     </div>
 | ||
| 
 | ||
|   </body>
 | ||
| </html> |