mirror of
				https://github.com/bunny-lab-io/Borealis.git
				synced 2025-11-03 19:41:57 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			1715 lines
		
	
	
		
			162 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1715 lines
		
	
	
		
			162 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html>
 | 
						||
 | 
						||
<html lang="en" data-content_root="../">
 | 
						||
  <head>
 | 
						||
    <meta charset="utf-8" />
 | 
						||
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 | 
						||
<meta property="og:title" content="Regular Expression HOWTO" />
 | 
						||
<meta property="og:type" content="website" />
 | 
						||
<meta property="og:url" content="https://docs.python.org/3/howto/regex.html" />
 | 
						||
<meta property="og:site_name" content="Python documentation" />
 | 
						||
<meta property="og:description" content="Author, A.M. Kuchling < amk@amk.ca>,. Abstract: This document is an introductory tutorial to using regular expressions in Python with the re module. It provides a gentler introduction than the corr..." />
 | 
						||
<meta property="og:image" content="https://docs.python.org/3/_static/og-image.png" />
 | 
						||
<meta property="og:image:alt" content="Python documentation" />
 | 
						||
<meta name="description" content="Author, A.M. Kuchling < amk@amk.ca>,. Abstract: This document is an introductory tutorial to using regular expressions in Python with the re module. It provides a gentler introduction than the corr..." />
 | 
						||
<meta property="og:image:width" content="200">
 | 
						||
<meta property="og:image:height" content="200">
 | 
						||
<meta name="theme-color" content="#3776ab">
 | 
						||
 | 
						||
    <title>Regular Expression HOWTO — Python 3.13.3 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
						||
    
 | 
						||
    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
 | 
						||
    <link rel="stylesheet" type="text/css" href="../_static/pydoctheme.css?v=23252803" />
 | 
						||
    <link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="../_static/pygments_dark.css?v=5349f25f" />
 | 
						||
    
 | 
						||
    <script src="../_static/documentation_options.js?v=5d57ca2d"></script>
 | 
						||
    <script src="../_static/doctools.js?v=9bcbadda"></script>
 | 
						||
    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 | 
						||
    
 | 
						||
    <script src="../_static/sidebar.js"></script>
 | 
						||
    
 | 
						||
    <link rel="search" type="application/opensearchdescription+xml"
 | 
						||
          title="Search within Python 3.13.3 documentation"
 | 
						||
          href="../_static/opensearch.xml"/>
 | 
						||
    <link rel="author" title="About these documents" href="../about.html" />
 | 
						||
    <link rel="index" title="Index" href="../genindex.html" />
 | 
						||
    <link rel="search" title="Search" href="../search.html" />
 | 
						||
    <link rel="copyright" title="Copyright" href="../copyright.html" />
 | 
						||
    <link rel="next" title="Socket Programming HOWTO" href="sockets.html" />
 | 
						||
    <link rel="prev" title="Logging Cookbook" href="logging-cookbook.html" />
 | 
						||
    
 | 
						||
    <link rel="canonical" href="https://docs.python.org/3/howto/regex.html">
 | 
						||
    
 | 
						||
      
 | 
						||
    
 | 
						||
 | 
						||
    
 | 
						||
    <style>
 | 
						||
      @media only screen {
 | 
						||
        table.full-width-table {
 | 
						||
            width: 100%;
 | 
						||
        }
 | 
						||
      }
 | 
						||
    </style>
 | 
						||
<link rel="stylesheet" href="../_static/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css">
 | 
						||
    <link rel="shortcut icon" type="image/png" href="../_static/py.svg" />
 | 
						||
            <script type="text/javascript" src="../_static/copybutton.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/menu.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/search-focus.js"></script>
 | 
						||
            <script type="text/javascript" src="../_static/themetoggle.js"></script> 
 | 
						||
            <script type="text/javascript" src="../_static/rtd_switcher.js"></script>
 | 
						||
            <meta name="readthedocs-addons-api-version" content="1">
 | 
						||
 | 
						||
  </head>
 | 
						||
<body>
 | 
						||
<div class="mobile-nav">
 | 
						||
    <input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation"
 | 
						||
           aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu" />
 | 
						||
    <nav class="nav-content" role="navigation">
 | 
						||
        <label for="menuToggler" class="toggler__label">
 | 
						||
            <span></span>
 | 
						||
        </label>
 | 
						||
        <span class="nav-items-wrapper">
 | 
						||
            <a href="https://www.python.org/" class="nav-logo">
 | 
						||
                <img src="../_static/py.svg" alt="Python logo"/>
 | 
						||
            </a>
 | 
						||
            <span class="version_switcher_placeholder"></span>
 | 
						||
            <form role="search" class="search" action="../search.html" method="get">
 | 
						||
                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon">
 | 
						||
                    <path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path>
 | 
						||
                </svg>
 | 
						||
                <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" />
 | 
						||
                <input type="submit" value="Go"/>
 | 
						||
            </form>
 | 
						||
        </span>
 | 
						||
    </nav>
 | 
						||
    <div class="menu-wrapper">
 | 
						||
        <nav class="menu" role="navigation" aria-label="main navigation">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label>
 | 
						||
  <div>
 | 
						||
    <h3><a href="../contents.html">Table of Contents</a></h3>
 | 
						||
    <ul>
 | 
						||
<li><a class="reference internal" href="#">Regular Expression HOWTO</a><ul>
 | 
						||
<li><a class="reference internal" href="#introduction">Introduction</a></li>
 | 
						||
<li><a class="reference internal" href="#simple-patterns">Simple Patterns</a><ul>
 | 
						||
<li><a class="reference internal" href="#matching-characters">Matching Characters</a></li>
 | 
						||
<li><a class="reference internal" href="#repeating-things">Repeating Things</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#using-regular-expressions">Using Regular Expressions</a><ul>
 | 
						||
<li><a class="reference internal" href="#compiling-regular-expressions">Compiling Regular Expressions</a></li>
 | 
						||
<li><a class="reference internal" href="#the-backslash-plague">The Backslash Plague</a></li>
 | 
						||
<li><a class="reference internal" href="#performing-matches">Performing Matches</a></li>
 | 
						||
<li><a class="reference internal" href="#module-level-functions">Module-Level Functions</a></li>
 | 
						||
<li><a class="reference internal" href="#compilation-flags">Compilation Flags</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#more-pattern-power">More Pattern Power</a><ul>
 | 
						||
<li><a class="reference internal" href="#more-metacharacters">More Metacharacters</a></li>
 | 
						||
<li><a class="reference internal" href="#grouping">Grouping</a></li>
 | 
						||
<li><a class="reference internal" href="#non-capturing-and-named-groups">Non-capturing and Named Groups</a></li>
 | 
						||
<li><a class="reference internal" href="#lookahead-assertions">Lookahead Assertions</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#modifying-strings">Modifying Strings</a><ul>
 | 
						||
<li><a class="reference internal" href="#splitting-strings">Splitting Strings</a></li>
 | 
						||
<li><a class="reference internal" href="#search-and-replace">Search and Replace</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#common-problems">Common Problems</a><ul>
 | 
						||
<li><a class="reference internal" href="#use-string-methods">Use String Methods</a></li>
 | 
						||
<li><a class="reference internal" href="#match-versus-search">match() versus search()</a></li>
 | 
						||
<li><a class="reference internal" href="#greedy-versus-non-greedy">Greedy versus Non-Greedy</a></li>
 | 
						||
<li><a class="reference internal" href="#using-re-verbose">Using re.VERBOSE</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#feedback">Feedback</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
</ul>
 | 
						||
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Previous topic</h4>
 | 
						||
    <p class="topless"><a href="logging-cookbook.html"
 | 
						||
                          title="previous chapter">Logging Cookbook</a></p>
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Next topic</h4>
 | 
						||
    <p class="topless"><a href="sockets.html"
 | 
						||
                          title="next chapter">Socket Programming HOWTO</a></p>
 | 
						||
  </div>
 | 
						||
  <div role="note" aria-label="source link">
 | 
						||
    <h3>This Page</h3>
 | 
						||
    <ul class="this-page-menu">
 | 
						||
      <li><a href="../bugs.html">Report a Bug</a></li>
 | 
						||
      <li>
 | 
						||
        <a href="https://github.com/python/cpython/blob/main/Doc/howto/regex.rst"
 | 
						||
            rel="nofollow">Show Source
 | 
						||
        </a>
 | 
						||
      </li>
 | 
						||
    </ul>
 | 
						||
  </div>
 | 
						||
        </nav>
 | 
						||
    </div>
 | 
						||
</div>
 | 
						||
 | 
						||
  
 | 
						||
    <div class="related" role="navigation" aria-label="Related">
 | 
						||
      <h3>Navigation</h3>
 | 
						||
      <ul>
 | 
						||
        <li class="right" style="margin-right: 10px">
 | 
						||
          <a href="../genindex.html" title="General Index"
 | 
						||
             accesskey="I">index</a></li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="../py-modindex.html" title="Python Module Index"
 | 
						||
             >modules</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="sockets.html" title="Socket Programming HOWTO"
 | 
						||
             accesskey="N">next</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="logging-cookbook.html" title="Logging Cookbook"
 | 
						||
             accesskey="P">previous</a> |</li>
 | 
						||
 | 
						||
          <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | 
						||
          <li><a href="https://www.python.org/">Python</a> »</li>
 | 
						||
          <li class="switchers">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            <div class="version_switcher_placeholder"></div>
 | 
						||
          </li>
 | 
						||
          <li>
 | 
						||
              
 | 
						||
          </li>
 | 
						||
    <li id="cpython-language-and-version">
 | 
						||
      <a href="../index.html">3.13.3 Documentation</a> »
 | 
						||
    </li>
 | 
						||
 | 
						||
          <li class="nav-item nav-item-1"><a href="index.html" accesskey="U">Python HOWTOs</a> »</li>
 | 
						||
        <li class="nav-item nav-item-this"><a href="">Regular Expression HOWTO</a></li>
 | 
						||
                <li class="right">
 | 
						||
                    
 | 
						||
 | 
						||
    <div class="inline-search" role="search">
 | 
						||
        <form class="inline-search" action="../search.html" method="get">
 | 
						||
          <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | 
						||
          <input type="submit" value="Go" />
 | 
						||
        </form>
 | 
						||
    </div>
 | 
						||
                     |
 | 
						||
                </li>
 | 
						||
            <li class="right">
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label> |</li>
 | 
						||
            
 | 
						||
      </ul>
 | 
						||
    </div>    
 | 
						||
 | 
						||
    <div class="document">
 | 
						||
      <div class="documentwrapper">
 | 
						||
        <div class="bodywrapper">
 | 
						||
          <div class="body" role="main">
 | 
						||
            
 | 
						||
  <section id="regular-expression-howto">
 | 
						||
<span id="regex-howto"></span><h1>Regular Expression HOWTO<a class="headerlink" href="#regular-expression-howto" title="Link to this heading">¶</a></h1>
 | 
						||
<dl class="field-list simple">
 | 
						||
<dt class="field-odd">Author<span class="colon">:</span></dt>
 | 
						||
<dd class="field-odd"><p>A.M. Kuchling <<a class="reference external" href="mailto:amk%40amk.ca">amk<span>@</span>amk<span>.</span>ca</a>></p>
 | 
						||
</dd>
 | 
						||
</dl>
 | 
						||
<aside class="topic">
 | 
						||
<p class="topic-title">Abstract</p>
 | 
						||
<p>This document is an introductory tutorial to using regular expressions in Python
 | 
						||
with the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module.  It provides a gentler introduction than the
 | 
						||
corresponding section in the Library Reference.</p>
 | 
						||
</aside>
 | 
						||
<section id="introduction">
 | 
						||
<h2>Introduction<a class="headerlink" href="#introduction" title="Link to this heading">¶</a></h2>
 | 
						||
<p>Regular expressions (called REs, or regexes, or regex patterns) are essentially
 | 
						||
a tiny, highly specialized programming language embedded inside Python and made
 | 
						||
available through the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module. Using this little language, you specify
 | 
						||
the rules for the set of possible strings that you want to match; this set might
 | 
						||
contain English sentences, or e-mail addresses, or TeX commands, or anything you
 | 
						||
like.  You can then ask questions such as “Does this string match the pattern?”,
 | 
						||
or “Is there a match for the pattern anywhere in this string?”.  You can also
 | 
						||
use REs to modify a string or to split it apart in various ways.</p>
 | 
						||
<p>Regular expression patterns are compiled into a series of bytecodes which are
 | 
						||
then executed by a matching engine written in C.  For advanced use, it may be
 | 
						||
necessary to pay careful attention to how the engine will execute a given RE,
 | 
						||
and write the RE in a certain way in order to produce bytecode that runs faster.
 | 
						||
Optimization isn’t covered in this document, because it requires that you have a
 | 
						||
good understanding of the matching engine’s internals.</p>
 | 
						||
<p>The regular expression language is relatively small and restricted, so not all
 | 
						||
possible string processing tasks can be done using regular expressions.  There
 | 
						||
are also tasks that <em>can</em> be done with regular expressions, but the expressions
 | 
						||
turn out to be very complicated.  In these cases, you may be better off writing
 | 
						||
Python code to do the processing; while Python code will be slower than an
 | 
						||
elaborate regular expression, it will also probably be more understandable.</p>
 | 
						||
</section>
 | 
						||
<section id="simple-patterns">
 | 
						||
<h2>Simple Patterns<a class="headerlink" href="#simple-patterns" title="Link to this heading">¶</a></h2>
 | 
						||
<p>We’ll start by learning about the simplest possible regular expressions.  Since
 | 
						||
regular expressions are used to operate on strings, we’ll begin with the most
 | 
						||
common task: matching characters.</p>
 | 
						||
<p>For a detailed explanation of the computer science underlying regular
 | 
						||
expressions (deterministic and non-deterministic finite automata), you can refer
 | 
						||
to almost any textbook on writing compilers.</p>
 | 
						||
<section id="matching-characters">
 | 
						||
<h3>Matching Characters<a class="headerlink" href="#matching-characters" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Most letters and characters will simply match themselves.  For example, the
 | 
						||
regular expression <code class="docutils literal notranslate"><span class="pre">test</span></code> will match the string <code class="docutils literal notranslate"><span class="pre">test</span></code> exactly.  (You can
 | 
						||
enable a case-insensitive mode that would let this RE match <code class="docutils literal notranslate"><span class="pre">Test</span></code> or <code class="docutils literal notranslate"><span class="pre">TEST</span></code>
 | 
						||
as well; more about this later.)</p>
 | 
						||
<p>There are exceptions to this rule; some characters are special
 | 
						||
<em class="dfn">metacharacters</em>, and don’t match themselves.  Instead, they signal that
 | 
						||
some out-of-the-ordinary thing should be matched, or they affect other portions
 | 
						||
of the RE by repeating them or changing their meaning.  Much of this document is
 | 
						||
devoted to discussing various metacharacters and what they do.</p>
 | 
						||
<p>Here’s a complete list of the metacharacters; their meanings will be discussed
 | 
						||
in the rest of this HOWTO.</p>
 | 
						||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>. ^ $ * + ? { } [ ] \ | ( )
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The first metacharacters we’ll look at are <code class="docutils literal notranslate"><span class="pre">[</span></code> and <code class="docutils literal notranslate"><span class="pre">]</span></code>. They’re used for
 | 
						||
specifying a character class, which is a set of characters that you wish to
 | 
						||
match.  Characters can be listed individually, or a range of characters can be
 | 
						||
indicated by giving two characters and separating them by a <code class="docutils literal notranslate"><span class="pre">'-'</span></code>.  For
 | 
						||
example, <code class="docutils literal notranslate"><span class="pre">[abc]</span></code> will match any of the characters <code class="docutils literal notranslate"><span class="pre">a</span></code>, <code class="docutils literal notranslate"><span class="pre">b</span></code>, or <code class="docutils literal notranslate"><span class="pre">c</span></code>; this
 | 
						||
is the same as <code class="docutils literal notranslate"><span class="pre">[a-c]</span></code>, which uses a range to express the same set of
 | 
						||
characters.  If you wanted to match only lowercase letters, your RE would be
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[a-z]</span></code>.</p>
 | 
						||
<p>Metacharacters (except <code class="docutils literal notranslate"><span class="pre">\</span></code>) are not active inside classes.  For example, <code class="docutils literal notranslate"><span class="pre">[akm$]</span></code> will
 | 
						||
match any of the characters <code class="docutils literal notranslate"><span class="pre">'a'</span></code>, <code class="docutils literal notranslate"><span class="pre">'k'</span></code>, <code class="docutils literal notranslate"><span class="pre">'m'</span></code>, or <code class="docutils literal notranslate"><span class="pre">'$'</span></code>; <code class="docutils literal notranslate"><span class="pre">'$'</span></code> is
 | 
						||
usually a metacharacter, but inside a character class it’s stripped of its
 | 
						||
special nature.</p>
 | 
						||
<p>You can match the characters not listed within the class by <em class="dfn">complementing</em>
 | 
						||
the set.  This is indicated by including a <code class="docutils literal notranslate"><span class="pre">'^'</span></code> as the first character of the
 | 
						||
class. For example, <code class="docutils literal notranslate"><span class="pre">[^5]</span></code> will match any character except <code class="docutils literal notranslate"><span class="pre">'5'</span></code>.  If the
 | 
						||
caret appears elsewhere in a character class, it does not have special meaning.
 | 
						||
For example: <code class="docutils literal notranslate"><span class="pre">[5^]</span></code> will match either a <code class="docutils literal notranslate"><span class="pre">'5'</span></code> or a <code class="docutils literal notranslate"><span class="pre">'^'</span></code>.</p>
 | 
						||
<p>Perhaps the most important metacharacter is the backslash, <code class="docutils literal notranslate"><span class="pre">\</span></code>.   As in Python
 | 
						||
string literals, the backslash can be followed by various characters to signal
 | 
						||
various special sequences.  It’s also used to escape all the metacharacters so
 | 
						||
you can still match them in patterns; for example, if you need to match a <code class="docutils literal notranslate"><span class="pre">[</span></code>
 | 
						||
or  <code class="docutils literal notranslate"><span class="pre">\</span></code>, you can precede them with a backslash to remove their special
 | 
						||
meaning: <code class="docutils literal notranslate"><span class="pre">\[</span></code> or <code class="docutils literal notranslate"><span class="pre">\\</span></code>.</p>
 | 
						||
<p>Some of the special sequences beginning with <code class="docutils literal notranslate"><span class="pre">'\'</span></code> represent
 | 
						||
predefined sets of characters that are often useful, such as the set
 | 
						||
of digits, the set of letters, or the set of anything that isn’t
 | 
						||
whitespace.</p>
 | 
						||
<p>Let’s take an example: <code class="docutils literal notranslate"><span class="pre">\w</span></code> matches any alphanumeric character.  If
 | 
						||
the regex pattern is expressed in bytes, this is equivalent to the
 | 
						||
class <code class="docutils literal notranslate"><span class="pre">[a-zA-Z0-9_]</span></code>.  If the regex pattern is a string, <code class="docutils literal notranslate"><span class="pre">\w</span></code> will
 | 
						||
match all the characters marked as letters in the Unicode database
 | 
						||
provided by the <a class="reference internal" href="../library/unicodedata.html#module-unicodedata" title="unicodedata: Access the Unicode Database."><code class="xref py py-mod docutils literal notranslate"><span class="pre">unicodedata</span></code></a> module.  You can use the more
 | 
						||
restricted definition of <code class="docutils literal notranslate"><span class="pre">\w</span></code> in a string pattern by supplying the
 | 
						||
<a class="reference internal" href="../library/re.html#re.ASCII" title="re.ASCII"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.ASCII</span></code></a> flag when compiling the regular expression.</p>
 | 
						||
<p>The following list of special sequences isn’t complete. For a complete
 | 
						||
list of sequences and expanded class definitions for Unicode string
 | 
						||
patterns, see the last part of <a class="reference internal" href="../library/re.html#re-syntax"><span class="std std-ref">Regular Expression Syntax</span></a> in the Standard Library reference.  In general, the
 | 
						||
Unicode versions match any character that’s in the appropriate
 | 
						||
category in the Unicode database.</p>
 | 
						||
<dl class="simple">
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\d</span></code></dt><dd><p>Matches any decimal digit; this is equivalent to the class <code class="docutils literal notranslate"><span class="pre">[0-9]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\D</span></code></dt><dd><p>Matches any non-digit character; this is equivalent to the class <code class="docutils literal notranslate"><span class="pre">[^0-9]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\s</span></code></dt><dd><p>Matches any whitespace character; this is equivalent to the class <code class="docutils literal notranslate"><span class="pre">[</span>
 | 
						||
<span class="pre">\t\n\r\f\v]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\S</span></code></dt><dd><p>Matches any non-whitespace character; this is equivalent to the class <code class="docutils literal notranslate"><span class="pre">[^</span>
 | 
						||
<span class="pre">\t\n\r\f\v]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\w</span></code></dt><dd><p>Matches any alphanumeric character; this is equivalent to the class
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[a-zA-Z0-9_]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\W</span></code></dt><dd><p>Matches any non-alphanumeric character; this is equivalent to the class
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[^a-zA-Z0-9_]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
</dl>
 | 
						||
<p>These sequences can be included inside a character class.  For example,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[\s,.]</span></code> is a character class that will match any whitespace character, or
 | 
						||
<code class="docutils literal notranslate"><span class="pre">','</span></code> or <code class="docutils literal notranslate"><span class="pre">'.'</span></code>.</p>
 | 
						||
<p>The final metacharacter in this section is <code class="docutils literal notranslate"><span class="pre">.</span></code>.  It matches anything except a
 | 
						||
newline character, and there’s an alternate mode (<a class="reference internal" href="../library/re.html#re.DOTALL" title="re.DOTALL"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.DOTALL</span></code></a>) where it will
 | 
						||
match even a newline.  <code class="docutils literal notranslate"><span class="pre">.</span></code> is often used where you want to match “any
 | 
						||
character”.</p>
 | 
						||
</section>
 | 
						||
<section id="repeating-things">
 | 
						||
<h3>Repeating Things<a class="headerlink" href="#repeating-things" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Being able to match varying sets of characters is the first thing regular
 | 
						||
expressions can do that isn’t already possible with the methods available on
 | 
						||
strings.  However, if that was the only additional capability of regexes, they
 | 
						||
wouldn’t be much of an advance. Another capability is that you can specify that
 | 
						||
portions of the RE must be repeated a certain number of times.</p>
 | 
						||
<p>The first metacharacter for repeating things that we’ll look at is <code class="docutils literal notranslate"><span class="pre">*</span></code>.  <code class="docutils literal notranslate"><span class="pre">*</span></code>
 | 
						||
doesn’t match the literal character <code class="docutils literal notranslate"><span class="pre">'*'</span></code>; instead, it specifies that the
 | 
						||
previous character can be matched zero or more times, instead of exactly once.</p>
 | 
						||
<p>For example, <code class="docutils literal notranslate"><span class="pre">ca*t</span></code> will match <code class="docutils literal notranslate"><span class="pre">'ct'</span></code> (0 <code class="docutils literal notranslate"><span class="pre">'a'</span></code> characters), <code class="docutils literal notranslate"><span class="pre">'cat'</span></code> (1 <code class="docutils literal notranslate"><span class="pre">'a'</span></code>),
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'caaat'</span></code> (3 <code class="docutils literal notranslate"><span class="pre">'a'</span></code> characters), and so forth.</p>
 | 
						||
<p>Repetitions such as <code class="docutils literal notranslate"><span class="pre">*</span></code> are <em class="dfn">greedy</em>; when repeating a RE, the matching
 | 
						||
engine will try to repeat it as many times as possible. If later portions of the
 | 
						||
pattern don’t match, the matching engine will then back up and try again with
 | 
						||
fewer repetitions.</p>
 | 
						||
<p>A step-by-step example will make this more obvious.  Let’s consider the
 | 
						||
expression <code class="docutils literal notranslate"><span class="pre">a[bcd]*b</span></code>.  This matches the letter <code class="docutils literal notranslate"><span class="pre">'a'</span></code>, zero or more letters
 | 
						||
from the class <code class="docutils literal notranslate"><span class="pre">[bcd]</span></code>, and finally ends with a <code class="docutils literal notranslate"><span class="pre">'b'</span></code>.  Now imagine matching
 | 
						||
this RE against the string <code class="docutils literal notranslate"><span class="pre">'abcbd'</span></code>.</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Step</p></th>
 | 
						||
<th class="head"><p>Matched</p></th>
 | 
						||
<th class="head"><p>Explanation</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p>1</p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">a</span></code></p></td>
 | 
						||
<td><p>The <code class="docutils literal notranslate"><span class="pre">a</span></code> in the RE matches.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p>2</p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">abcbd</span></code></p></td>
 | 
						||
<td><p>The engine matches <code class="docutils literal notranslate"><span class="pre">[bcd]*</span></code>,
 | 
						||
going as far as it can, which
 | 
						||
is to the end of the string.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p>3</p></td>
 | 
						||
<td><p><em>Failure</em></p></td>
 | 
						||
<td><p>The engine tries to match
 | 
						||
<code class="docutils literal notranslate"><span class="pre">b</span></code>, but the current position
 | 
						||
is at the end of the string, so
 | 
						||
it fails.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p>4</p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">abcb</span></code></p></td>
 | 
						||
<td><p>Back up, so that  <code class="docutils literal notranslate"><span class="pre">[bcd]*</span></code>
 | 
						||
matches one less character.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p>5</p></td>
 | 
						||
<td><p><em>Failure</em></p></td>
 | 
						||
<td><p>Try <code class="docutils literal notranslate"><span class="pre">b</span></code> again, but the
 | 
						||
current position is at the last
 | 
						||
character, which is a <code class="docutils literal notranslate"><span class="pre">'d'</span></code>.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p>6</p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">abc</span></code></p></td>
 | 
						||
<td><p>Back up again, so that
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[bcd]*</span></code> is only matching
 | 
						||
<code class="docutils literal notranslate"><span class="pre">bc</span></code>.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p>6</p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">abcb</span></code></p></td>
 | 
						||
<td><p>Try <code class="docutils literal notranslate"><span class="pre">b</span></code> again.  This time
 | 
						||
the character at the
 | 
						||
current position is <code class="docutils literal notranslate"><span class="pre">'b'</span></code>, so
 | 
						||
it succeeds.</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<p>The end of the RE has now been reached, and it has matched <code class="docutils literal notranslate"><span class="pre">'abcb'</span></code>.  This
 | 
						||
demonstrates how the matching engine goes as far as it can at first, and if no
 | 
						||
match is found it will then progressively back up and retry the rest of the RE
 | 
						||
again and again.  It will back up until it has tried zero matches for
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[bcd]*</span></code>, and if that subsequently fails, the engine will conclude that the
 | 
						||
string doesn’t match the RE at all.</p>
 | 
						||
<p>Another repeating metacharacter is <code class="docutils literal notranslate"><span class="pre">+</span></code>, which matches one or more times.  Pay
 | 
						||
careful attention to the difference between <code class="docutils literal notranslate"><span class="pre">*</span></code> and <code class="docutils literal notranslate"><span class="pre">+</span></code>; <code class="docutils literal notranslate"><span class="pre">*</span></code> matches
 | 
						||
<em>zero</em> or more times, so whatever’s being repeated may not be present at all,
 | 
						||
while <code class="docutils literal notranslate"><span class="pre">+</span></code> requires at least <em>one</em> occurrence.  To use a similar example,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">ca+t</span></code> will match <code class="docutils literal notranslate"><span class="pre">'cat'</span></code> (1 <code class="docutils literal notranslate"><span class="pre">'a'</span></code>), <code class="docutils literal notranslate"><span class="pre">'caaat'</span></code> (3 <code class="docutils literal notranslate"><span class="pre">'a'</span></code>s), but won’t
 | 
						||
match <code class="docutils literal notranslate"><span class="pre">'ct'</span></code>.</p>
 | 
						||
<p>There are two more repeating operators or quantifiers.  The question mark character, <code class="docutils literal notranslate"><span class="pre">?</span></code>,
 | 
						||
matches either once or zero times; you can think of it as marking something as
 | 
						||
being optional.  For example, <code class="docutils literal notranslate"><span class="pre">home-?brew</span></code> matches either <code class="docutils literal notranslate"><span class="pre">'homebrew'</span></code> or
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'home-brew'</span></code>.</p>
 | 
						||
<p>The most complicated quantifier is <code class="docutils literal notranslate"><span class="pre">{m,n}</span></code>, where <em>m</em> and <em>n</em> are
 | 
						||
decimal integers.  This quantifier means there must be at least <em>m</em> repetitions,
 | 
						||
and at most <em>n</em>.  For example, <code class="docutils literal notranslate"><span class="pre">a/{1,3}b</span></code> will match <code class="docutils literal notranslate"><span class="pre">'a/b'</span></code>, <code class="docutils literal notranslate"><span class="pre">'a//b'</span></code>, and
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'a///b'</span></code>.  It won’t match <code class="docutils literal notranslate"><span class="pre">'ab'</span></code>, which has no slashes, or <code class="docutils literal notranslate"><span class="pre">'a////b'</span></code>, which
 | 
						||
has four.</p>
 | 
						||
<p>You can omit either <em>m</em> or <em>n</em>; in that case, a reasonable value is assumed for
 | 
						||
the missing value.  Omitting <em>m</em> is interpreted as a lower limit of 0, while
 | 
						||
omitting <em>n</em> results in an upper bound of infinity.</p>
 | 
						||
<p>The simplest case <code class="docutils literal notranslate"><span class="pre">{m}</span></code> matches the preceding item exactly <em>m</em> times.
 | 
						||
For example, <code class="docutils literal notranslate"><span class="pre">a/{2}b</span></code> will only match <code class="docutils literal notranslate"><span class="pre">'a//b'</span></code>.</p>
 | 
						||
<p>Readers of a reductionist bent may notice that the three other quantifiers can
 | 
						||
all be expressed using this notation.  <code class="docutils literal notranslate"><span class="pre">{0,}</span></code> is the same as <code class="docutils literal notranslate"><span class="pre">*</span></code>, <code class="docutils literal notranslate"><span class="pre">{1,}</span></code>
 | 
						||
is equivalent to <code class="docutils literal notranslate"><span class="pre">+</span></code>, and <code class="docutils literal notranslate"><span class="pre">{0,1}</span></code> is the same as <code class="docutils literal notranslate"><span class="pre">?</span></code>.  It’s better to use
 | 
						||
<code class="docutils literal notranslate"><span class="pre">*</span></code>, <code class="docutils literal notranslate"><span class="pre">+</span></code>, or <code class="docutils literal notranslate"><span class="pre">?</span></code> when you can, simply because they’re shorter and easier
 | 
						||
to read.</p>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
<section id="using-regular-expressions">
 | 
						||
<h2>Using Regular Expressions<a class="headerlink" href="#using-regular-expressions" title="Link to this heading">¶</a></h2>
 | 
						||
<p>Now that we’ve looked at some simple regular expressions, how do we actually use
 | 
						||
them in Python?  The <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module provides an interface to the regular
 | 
						||
expression engine, allowing you to compile REs into objects and then perform
 | 
						||
matches with them.</p>
 | 
						||
<section id="compiling-regular-expressions">
 | 
						||
<h3>Compiling Regular Expressions<a class="headerlink" href="#compiling-regular-expressions" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Regular expressions are compiled into pattern objects, which have
 | 
						||
methods for various operations such as searching for pattern matches or
 | 
						||
performing string substitutions.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'ab*'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span>
 | 
						||
<span class="go">re.compile('ab*')</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p><a class="reference internal" href="../library/re.html#re.compile" title="re.compile"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.compile()</span></code></a> also accepts an optional <em>flags</em> argument, used to enable
 | 
						||
various special features and syntax variations.  We’ll go over the available
 | 
						||
settings later, but for now a single example will do:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'ab*'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">IGNORECASE</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The RE is passed to <a class="reference internal" href="../library/re.html#re.compile" title="re.compile"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.compile()</span></code></a> as a string.  REs are handled as strings
 | 
						||
because regular expressions aren’t part of the core Python language, and no
 | 
						||
special syntax was created for expressing them.  (There are applications that
 | 
						||
don’t need REs at all, so there’s no need to bloat the language specification by
 | 
						||
including them.) Instead, the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module is simply a C extension module
 | 
						||
included with Python, just like the <a class="reference internal" href="../library/socket.html#module-socket" title="socket: Low-level networking interface."><code class="xref py py-mod docutils literal notranslate"><span class="pre">socket</span></code></a> or <a class="reference internal" href="../library/zlib.html#module-zlib" title="zlib: Low-level interface to compression and decompression routines compatible with gzip."><code class="xref py py-mod docutils literal notranslate"><span class="pre">zlib</span></code></a> modules.</p>
 | 
						||
<p>Putting REs in strings keeps the Python language simpler, but has one
 | 
						||
disadvantage which is the topic of the next section.</p>
 | 
						||
</section>
 | 
						||
<section id="the-backslash-plague">
 | 
						||
<span id="id1"></span><h3>The Backslash Plague<a class="headerlink" href="#the-backslash-plague" title="Link to this heading">¶</a></h3>
 | 
						||
<p>As stated earlier, regular expressions use the backslash character (<code class="docutils literal notranslate"><span class="pre">'\'</span></code>) to
 | 
						||
indicate special forms or to allow special characters to be used without
 | 
						||
invoking their special meaning. This conflicts with Python’s usage of the same
 | 
						||
character for the same purpose in string literals.</p>
 | 
						||
<p>Let’s say you want to write a RE that matches the string <code class="docutils literal notranslate"><span class="pre">\section</span></code>, which
 | 
						||
might be found in a LaTeX file.  To figure out what to write in the program
 | 
						||
code, start with the desired string to be matched.  Next, you must escape any
 | 
						||
backslashes and other metacharacters by preceding them with a backslash,
 | 
						||
resulting in the string <code class="docutils literal notranslate"><span class="pre">\\section</span></code>.  The resulting string that must be passed
 | 
						||
to <a class="reference internal" href="../library/re.html#re.compile" title="re.compile"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.compile()</span></code></a> must be <code class="docutils literal notranslate"><span class="pre">\\section</span></code>.  However, to express this as a
 | 
						||
Python string literal, both backslashes must be escaped <em>again</em>.</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Characters</p></th>
 | 
						||
<th class="head"><p>Stage</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">\section</span></code></p></td>
 | 
						||
<td><p>Text string to be matched</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">\\section</span></code></p></td>
 | 
						||
<td><p>Escaped backslash for <a class="reference internal" href="../library/re.html#re.compile" title="re.compile"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.compile()</span></code></a></p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">"\\\\section"</span></code></p></td>
 | 
						||
<td><p>Escaped backslashes for a string literal</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<p>In short, to match a literal backslash, one has to write <code class="docutils literal notranslate"><span class="pre">'\\\\'</span></code> as the RE
 | 
						||
string, because the regular expression must be <code class="docutils literal notranslate"><span class="pre">\\</span></code>, and each backslash must
 | 
						||
be expressed as <code class="docutils literal notranslate"><span class="pre">\\</span></code> inside a regular Python string literal.  In REs that
 | 
						||
feature backslashes repeatedly, this leads to lots of repeated backslashes and
 | 
						||
makes the resulting strings difficult to understand.</p>
 | 
						||
<p>The solution is to use Python’s raw string notation for regular expressions;
 | 
						||
backslashes are not handled in any special way in a string literal prefixed with
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'r'</span></code>, so <code class="docutils literal notranslate"><span class="pre">r"\n"</span></code> is a two-character string containing <code class="docutils literal notranslate"><span class="pre">'\'</span></code> and <code class="docutils literal notranslate"><span class="pre">'n'</span></code>,
 | 
						||
while <code class="docutils literal notranslate"><span class="pre">"\n"</span></code> is a one-character string containing a newline. Regular
 | 
						||
expressions will often be written in Python code using this raw string notation.</p>
 | 
						||
<p>In addition, special escape sequences that are valid in regular expressions,
 | 
						||
but not valid as Python string literals, now result in a
 | 
						||
<a class="reference internal" href="../library/exceptions.html#DeprecationWarning" title="DeprecationWarning"><code class="xref py py-exc docutils literal notranslate"><span class="pre">DeprecationWarning</span></code></a> and will eventually become a <a class="reference internal" href="../library/exceptions.html#SyntaxError" title="SyntaxError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">SyntaxError</span></code></a>,
 | 
						||
which means the sequences will be invalid if raw string notation or escaping
 | 
						||
the backslashes isn’t used.</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Regular String</p></th>
 | 
						||
<th class="head"><p>Raw string</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">"ab*"</span></code></p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">r"ab*"</span></code></p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">"\\\\section"</span></code></p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">r"\\section"</span></code></p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">"\\w+\\s+\\1"</span></code></p></td>
 | 
						||
<td><p><code class="docutils literal notranslate"><span class="pre">r"\w+\s+\1"</span></code></p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
</section>
 | 
						||
<section id="performing-matches">
 | 
						||
<h3>Performing Matches<a class="headerlink" href="#performing-matches" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Once you have an object representing a compiled regular expression, what do you
 | 
						||
do with it?  Pattern objects have several methods and attributes.
 | 
						||
Only the most significant ones will be covered here; consult the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> docs
 | 
						||
for a complete listing.</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Method/Attribute</p></th>
 | 
						||
<th class="head"><p>Purpose</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">match()</span></code></p></td>
 | 
						||
<td><p>Determine if the RE matches at the beginning
 | 
						||
of the string.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">search()</span></code></p></td>
 | 
						||
<td><p>Scan through a string, looking for any
 | 
						||
location where this RE matches.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">findall()</span></code></p></td>
 | 
						||
<td><p>Find all substrings where the RE matches, and
 | 
						||
returns them as a list.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">finditer()</span></code></p></td>
 | 
						||
<td><p>Find all substrings where the RE matches, and
 | 
						||
returns them as an <a class="reference internal" href="../glossary.html#term-iterator"><span class="xref std std-term">iterator</span></a>.</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<p><a class="reference internal" href="../library/re.html#re.Pattern.match" title="re.Pattern.match"><code class="xref py py-meth docutils literal notranslate"><span class="pre">match()</span></code></a> and <a class="reference internal" href="../library/re.html#re.Pattern.search" title="re.Pattern.search"><code class="xref py py-meth docutils literal notranslate"><span class="pre">search()</span></code></a> return <code class="docutils literal notranslate"><span class="pre">None</span></code> if no match can be found.  If
 | 
						||
they’re successful, a <a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> instance is returned,
 | 
						||
containing information about the match: where it starts and ends, the substring
 | 
						||
it matched, and more.</p>
 | 
						||
<p>You can learn about this by interactively experimenting with the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a>
 | 
						||
module.</p>
 | 
						||
<p>This HOWTO uses the standard Python interpreter for its examples. First, run the
 | 
						||
Python interpreter, import the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module, and compile a RE:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'[a-z]+'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span>
 | 
						||
<span class="go">re.compile('[a-z]+')</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Now, you can try matching various strings against the RE <code class="docutils literal notranslate"><span class="pre">[a-z]+</span></code>.  An empty
 | 
						||
string shouldn’t match at all, since <code class="docutils literal notranslate"><span class="pre">+</span></code> means ‘one or more repetitions’.
 | 
						||
<a class="reference internal" href="../library/re.html#re.Pattern.match" title="re.Pattern.match"><code class="xref py py-meth docutils literal notranslate"><span class="pre">match()</span></code></a> should return <code class="docutils literal notranslate"><span class="pre">None</span></code> in this case, which will cause the
 | 
						||
interpreter to print no output.  You can explicitly print the result of
 | 
						||
<code class="xref py py-meth docutils literal notranslate"><span class="pre">match()</span></code> to make this clear.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s2">""</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s2">""</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Now, let’s try it on a string that it should match, such as <code class="docutils literal notranslate"><span class="pre">tempo</span></code>.  In this
 | 
						||
case, <a class="reference internal" href="../library/re.html#re.Pattern.match" title="re.Pattern.match"><code class="xref py py-meth docutils literal notranslate"><span class="pre">match()</span></code></a> will return a <a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a>, so you
 | 
						||
should store the result in a variable for later use.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'tempo'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span>
 | 
						||
<span class="go"><re.Match object; span=(0, 5), match='tempo'></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Now you can query the <a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> for information
 | 
						||
about the matching string.  Match object instances
 | 
						||
also have several methods and attributes; the most important ones are:</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Method/Attribute</p></th>
 | 
						||
<th class="head"><p>Purpose</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">group()</span></code></p></td>
 | 
						||
<td><p>Return the string matched by the RE</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">start()</span></code></p></td>
 | 
						||
<td><p>Return the starting position of the match</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">end()</span></code></p></td>
 | 
						||
<td><p>Return the ending position of the match</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">span()</span></code></p></td>
 | 
						||
<td><p>Return a tuple containing the (start, end)
 | 
						||
positions  of the match</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<p>Trying these methods will soon clarify their meaning:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
 | 
						||
<span class="go">'tempo'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">start</span><span class="p">(),</span> <span class="n">m</span><span class="o">.</span><span class="n">end</span><span class="p">()</span>
 | 
						||
<span class="go">(0, 5)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">span</span><span class="p">()</span>
 | 
						||
<span class="go">(0, 5)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p><a class="reference internal" href="../library/re.html#re.Match.group" title="re.Match.group"><code class="xref py py-meth docutils literal notranslate"><span class="pre">group()</span></code></a> returns the substring that was matched by the RE.  <a class="reference internal" href="../library/re.html#re.Match.start" title="re.Match.start"><code class="xref py py-meth docutils literal notranslate"><span class="pre">start()</span></code></a>
 | 
						||
and <a class="reference internal" href="../library/re.html#re.Match.end" title="re.Match.end"><code class="xref py py-meth docutils literal notranslate"><span class="pre">end()</span></code></a> return the starting and ending index of the match. <a class="reference internal" href="../library/re.html#re.Match.span" title="re.Match.span"><code class="xref py py-meth docutils literal notranslate"><span class="pre">span()</span></code></a>
 | 
						||
returns both start and end indexes in a single tuple.  Since the <a class="reference internal" href="../library/re.html#re.Pattern.match" title="re.Pattern.match"><code class="xref py py-meth docutils literal notranslate"><span class="pre">match()</span></code></a>
 | 
						||
method only checks if the RE matches at the start of a string, <code class="xref py py-meth docutils literal notranslate"><span class="pre">start()</span></code>
 | 
						||
will always be zero.  However, the <a class="reference internal" href="../library/re.html#re.Pattern.search" title="re.Pattern.search"><code class="xref py py-meth docutils literal notranslate"><span class="pre">search()</span></code></a> method of patterns
 | 
						||
scans through the string, so  the match may not start at zero in that
 | 
						||
case.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'::: message'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'::: message'</span><span class="p">);</span> <span class="nb">print</span><span class="p">(</span><span class="n">m</span><span class="p">)</span>
 | 
						||
<span class="go"><re.Match object; span=(4, 11), match='message'></span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
 | 
						||
<span class="go">'message'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">span</span><span class="p">()</span>
 | 
						||
<span class="go">(4, 11)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>In actual programs, the most common style is to store the
 | 
						||
<a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> in a variable, and then check if it was
 | 
						||
<code class="docutils literal notranslate"><span class="pre">None</span></code>.  This usually looks like:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span> <span class="o">...</span> <span class="p">)</span>
 | 
						||
<span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span> <span class="s1">'string goes here'</span> <span class="p">)</span>
 | 
						||
<span class="k">if</span> <span class="n">m</span><span class="p">:</span>
 | 
						||
    <span class="nb">print</span><span class="p">(</span><span class="s1">'Match found: '</span><span class="p">,</span> <span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">())</span>
 | 
						||
<span class="k">else</span><span class="p">:</span>
 | 
						||
    <span class="nb">print</span><span class="p">(</span><span class="s1">'No match'</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Two pattern methods return all of the matches for a pattern.
 | 
						||
<a class="reference internal" href="../library/re.html#re.Pattern.findall" title="re.Pattern.findall"><code class="xref py py-meth docutils literal notranslate"><span class="pre">findall()</span></code></a> returns a list of matching strings:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\d+'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="s1">'12 drummers drumming, 11 pipers piping, 10 lords a-leaping'</span><span class="p">)</span>
 | 
						||
<span class="go">['12', '11', '10']</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The <code class="docutils literal notranslate"><span class="pre">r</span></code> prefix, making the literal a raw string literal, is needed in this
 | 
						||
example because escape sequences in a normal “cooked” string literal that are
 | 
						||
not recognized by Python, as opposed to regular expressions, now result in a
 | 
						||
<a class="reference internal" href="../library/exceptions.html#DeprecationWarning" title="DeprecationWarning"><code class="xref py py-exc docutils literal notranslate"><span class="pre">DeprecationWarning</span></code></a> and will eventually become a <a class="reference internal" href="../library/exceptions.html#SyntaxError" title="SyntaxError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">SyntaxError</span></code></a>.  See
 | 
						||
<a class="reference internal" href="#the-backslash-plague"><span class="std std-ref">The Backslash Plague</span></a>.</p>
 | 
						||
<p><a class="reference internal" href="../library/re.html#re.Pattern.findall" title="re.Pattern.findall"><code class="xref py py-meth docutils literal notranslate"><span class="pre">findall()</span></code></a> has to create the entire list before it can be returned as the
 | 
						||
result.  The <a class="reference internal" href="../library/re.html#re.Pattern.finditer" title="re.Pattern.finditer"><code class="xref py py-meth docutils literal notranslate"><span class="pre">finditer()</span></code></a> method returns a sequence of
 | 
						||
<a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> instances as an <a class="reference internal" href="../glossary.html#term-iterator"><span class="xref std std-term">iterator</span></a>:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">iterator</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">finditer</span><span class="p">(</span><span class="s1">'12 drummers drumming, 11 ... 10 ...'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">iterator</span>
 | 
						||
<span class="go"><callable_iterator object at 0x...></span>
 | 
						||
<span class="gp">>>> </span><span class="k">for</span> <span class="n">match</span> <span class="ow">in</span> <span class="n">iterator</span><span class="p">:</span>
 | 
						||
<span class="gp">... </span>    <span class="nb">print</span><span class="p">(</span><span class="n">match</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="gp">...</span>
 | 
						||
<span class="go">(0, 2)</span>
 | 
						||
<span class="go">(22, 24)</span>
 | 
						||
<span class="go">(29, 31)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
<section id="module-level-functions">
 | 
						||
<h3>Module-Level Functions<a class="headerlink" href="#module-level-functions" title="Link to this heading">¶</a></h3>
 | 
						||
<p>You don’t have to create a pattern object and call its methods; the
 | 
						||
<a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module also provides top-level functions called <a class="reference internal" href="../library/re.html#re.match" title="re.match"><code class="xref py py-func docutils literal notranslate"><span class="pre">match()</span></code></a>,
 | 
						||
<a class="reference internal" href="../library/re.html#re.search" title="re.search"><code class="xref py py-func docutils literal notranslate"><span class="pre">search()</span></code></a>, <a class="reference internal" href="../library/re.html#re.findall" title="re.findall"><code class="xref py py-func docutils literal notranslate"><span class="pre">findall()</span></code></a>, <a class="reference internal" href="../library/re.html#re.sub" title="re.sub"><code class="xref py py-func docutils literal notranslate"><span class="pre">sub()</span></code></a>, and so forth.  These functions
 | 
						||
take the same arguments as the corresponding pattern method with
 | 
						||
the RE string added as the first argument, and still return either <code class="docutils literal notranslate"><span class="pre">None</span></code> or a
 | 
						||
<a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> instance.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s1">'From\s+'</span><span class="p">,</span> <span class="s1">'Fromage amk'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
<span class="gp">>>> </span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s1">'From\s+'</span><span class="p">,</span> <span class="s1">'From amk Thu May 14 19:12:10 1998'</span><span class="p">)</span>
 | 
						||
<span class="go"><re.Match object; span=(0, 5), match='From '></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Under the hood, these functions simply create a pattern object for you
 | 
						||
and call the appropriate method on it.  They also store the compiled
 | 
						||
object in a cache, so future calls using the same RE won’t need to
 | 
						||
parse the pattern again and again.</p>
 | 
						||
<p>Should you use these module-level functions, or should you get the
 | 
						||
pattern and call its methods yourself?  If you’re accessing a regex
 | 
						||
within a loop, pre-compiling it will save a few function calls.
 | 
						||
Outside of loops, there’s not much difference thanks to the internal
 | 
						||
cache.</p>
 | 
						||
</section>
 | 
						||
<section id="compilation-flags">
 | 
						||
<h3>Compilation Flags<a class="headerlink" href="#compilation-flags" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Compilation flags let you modify some aspects of how regular expressions work.
 | 
						||
Flags are available in the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module under two names, a long name such as
 | 
						||
<a class="reference internal" href="../library/re.html#re.IGNORECASE" title="re.IGNORECASE"><code class="xref py py-const docutils literal notranslate"><span class="pre">IGNORECASE</span></code></a> and a short, one-letter form such as <a class="reference internal" href="../library/re.html#re.I" title="re.I"><code class="xref py py-const docutils literal notranslate"><span class="pre">I</span></code></a>.  (If you’re
 | 
						||
familiar with Perl’s pattern modifiers, the one-letter forms use the same
 | 
						||
letters; the short form of <a class="reference internal" href="../library/re.html#re.VERBOSE" title="re.VERBOSE"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.VERBOSE</span></code></a> is <a class="reference internal" href="../library/re.html#re.X" title="re.X"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.X</span></code></a>, for example.)
 | 
						||
Multiple flags can be specified by bitwise OR-ing them; <code class="docutils literal notranslate"><span class="pre">re.I</span> <span class="pre">|</span> <span class="pre">re.M</span></code> sets
 | 
						||
both the <a class="reference internal" href="../library/re.html#re.I" title="re.I"><code class="xref py py-const docutils literal notranslate"><span class="pre">I</span></code></a> and <a class="reference internal" href="../library/re.html#re.M" title="re.M"><code class="xref py py-const docutils literal notranslate"><span class="pre">M</span></code></a> flags, for example.</p>
 | 
						||
<p>Here’s a table of the available flags, followed by a more detailed explanation
 | 
						||
of each one.</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Flag</p></th>
 | 
						||
<th class="head"><p>Meaning</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><a class="reference internal" href="../library/re.html#re.ASCII" title="re.ASCII"><code class="xref py py-const docutils literal notranslate"><span class="pre">ASCII</span></code></a>, <a class="reference internal" href="../library/re.html#re.A" title="re.A"><code class="xref py py-const docutils literal notranslate"><span class="pre">A</span></code></a></p></td>
 | 
						||
<td><p>Makes several escapes like <code class="docutils literal notranslate"><span class="pre">\w</span></code>, <code class="docutils literal notranslate"><span class="pre">\b</span></code>,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">\s</span></code> and <code class="docutils literal notranslate"><span class="pre">\d</span></code> match only on ASCII
 | 
						||
characters with the respective property.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><a class="reference internal" href="../library/re.html#re.DOTALL" title="re.DOTALL"><code class="xref py py-const docutils literal notranslate"><span class="pre">DOTALL</span></code></a>, <a class="reference internal" href="../library/re.html#re.S" title="re.S"><code class="xref py py-const docutils literal notranslate"><span class="pre">S</span></code></a></p></td>
 | 
						||
<td><p>Make <code class="docutils literal notranslate"><span class="pre">.</span></code> match any character, including
 | 
						||
newlines.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><a class="reference internal" href="../library/re.html#re.IGNORECASE" title="re.IGNORECASE"><code class="xref py py-const docutils literal notranslate"><span class="pre">IGNORECASE</span></code></a>, <a class="reference internal" href="../library/re.html#re.I" title="re.I"><code class="xref py py-const docutils literal notranslate"><span class="pre">I</span></code></a></p></td>
 | 
						||
<td><p>Do case-insensitive matches.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><a class="reference internal" href="../library/re.html#re.LOCALE" title="re.LOCALE"><code class="xref py py-const docutils literal notranslate"><span class="pre">LOCALE</span></code></a>, <a class="reference internal" href="../library/re.html#re.L" title="re.L"><code class="xref py py-const docutils literal notranslate"><span class="pre">L</span></code></a></p></td>
 | 
						||
<td><p>Do a locale-aware match.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><a class="reference internal" href="../library/re.html#re.MULTILINE" title="re.MULTILINE"><code class="xref py py-const docutils literal notranslate"><span class="pre">MULTILINE</span></code></a>, <a class="reference internal" href="../library/re.html#re.M" title="re.M"><code class="xref py py-const docutils literal notranslate"><span class="pre">M</span></code></a></p></td>
 | 
						||
<td><p>Multi-line matching, affecting <code class="docutils literal notranslate"><span class="pre">^</span></code> and
 | 
						||
<code class="docutils literal notranslate"><span class="pre">$</span></code>.</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><a class="reference internal" href="../library/re.html#re.VERBOSE" title="re.VERBOSE"><code class="xref py py-const docutils literal notranslate"><span class="pre">VERBOSE</span></code></a>, <a class="reference internal" href="../library/re.html#re.X" title="re.X"><code class="xref py py-const docutils literal notranslate"><span class="pre">X</span></code></a>
 | 
						||
(for ‘extended’)</p></td>
 | 
						||
<td><p>Enable verbose REs, which can be organized
 | 
						||
more cleanly and understandably.</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">I</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">IGNORECASE</span></span></dt>
 | 
						||
<dd><p>Perform case-insensitive matching; character class and literal strings will
 | 
						||
match letters by ignoring case.  For example, <code class="docutils literal notranslate"><span class="pre">[A-Z]</span></code> will match lowercase
 | 
						||
letters, too. Full Unicode matching also works unless the <a class="reference internal" href="../library/re.html#re.ASCII" title="re.ASCII"><code class="xref py py-const docutils literal notranslate"><span class="pre">ASCII</span></code></a>
 | 
						||
flag is used to disable non-ASCII matches.  When the Unicode patterns
 | 
						||
<code class="docutils literal notranslate"><span class="pre">[a-z]</span></code> or <code class="docutils literal notranslate"><span class="pre">[A-Z]</span></code> are used in combination with the <a class="reference internal" href="../library/re.html#re.IGNORECASE" title="re.IGNORECASE"><code class="xref py py-const docutils literal notranslate"><span class="pre">IGNORECASE</span></code></a>
 | 
						||
flag, they will match the 52 ASCII letters and 4 additional non-ASCII
 | 
						||
letters: ‘İ’ (U+0130, Latin capital letter I with dot above), ‘ı’ (U+0131,
 | 
						||
Latin small letter dotless i), ‘ſ’ (U+017F, Latin small letter long s) and
 | 
						||
‘K’ (U+212A, Kelvin sign).  <code class="docutils literal notranslate"><span class="pre">Spam</span></code> will match <code class="docutils literal notranslate"><span class="pre">'Spam'</span></code>, <code class="docutils literal notranslate"><span class="pre">'spam'</span></code>,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'spAM'</span></code>, or <code class="docutils literal notranslate"><span class="pre">'ſpam'</span></code> (the latter is matched only in Unicode mode).
 | 
						||
This lowercasing doesn’t take the current locale into account;
 | 
						||
it will if you also set the <a class="reference internal" href="../library/re.html#re.LOCALE" title="re.LOCALE"><code class="xref py py-const docutils literal notranslate"><span class="pre">LOCALE</span></code></a> flag.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">L</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">LOCALE</span></span></dt>
 | 
						||
<dd><p>Make <code class="docutils literal notranslate"><span class="pre">\w</span></code>, <code class="docutils literal notranslate"><span class="pre">\W</span></code>, <code class="docutils literal notranslate"><span class="pre">\b</span></code>, <code class="docutils literal notranslate"><span class="pre">\B</span></code> and case-insensitive matching dependent
 | 
						||
on the current locale instead of the Unicode database.</p>
 | 
						||
<p>Locales are a feature of the C library intended to help in writing programs
 | 
						||
that take account of language differences.  For example, if you’re
 | 
						||
processing encoded French text, you’d want to be able to write <code class="docutils literal notranslate"><span class="pre">\w+</span></code> to
 | 
						||
match words, but <code class="docutils literal notranslate"><span class="pre">\w</span></code> only matches the character class <code class="docutils literal notranslate"><span class="pre">[A-Za-z]</span></code> in
 | 
						||
bytes patterns; it won’t match bytes corresponding to <code class="docutils literal notranslate"><span class="pre">é</span></code> or <code class="docutils literal notranslate"><span class="pre">ç</span></code>.
 | 
						||
If your system is configured properly and a French locale is selected,
 | 
						||
certain C functions will tell the program that the byte corresponding to
 | 
						||
<code class="docutils literal notranslate"><span class="pre">é</span></code> should also be considered a letter.
 | 
						||
Setting the <a class="reference internal" href="../library/re.html#re.LOCALE" title="re.LOCALE"><code class="xref py py-const docutils literal notranslate"><span class="pre">LOCALE</span></code></a> flag when compiling a regular expression will cause
 | 
						||
the resulting compiled object to use these C functions for <code class="docutils literal notranslate"><span class="pre">\w</span></code>; this is
 | 
						||
slower, but also enables <code class="docutils literal notranslate"><span class="pre">\w+</span></code> to match French words as you’d expect.
 | 
						||
The use of this flag is discouraged in Python 3 as the locale mechanism
 | 
						||
is very unreliable, it only handles one “culture” at a time, and it only
 | 
						||
works with 8-bit locales.  Unicode matching is already enabled by default
 | 
						||
in Python 3 for Unicode (str) patterns, and it is able to handle different
 | 
						||
locales/languages.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">M</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">MULTILINE</span></span></dt>
 | 
						||
<dd><p>(<code class="docutils literal notranslate"><span class="pre">^</span></code> and <code class="docutils literal notranslate"><span class="pre">$</span></code> haven’t been explained yet;  they’ll be introduced in section
 | 
						||
<a class="reference internal" href="#more-metacharacters"><span class="std std-ref">More Metacharacters</span></a>.)</p>
 | 
						||
<p>Usually <code class="docutils literal notranslate"><span class="pre">^</span></code> matches only at the beginning of the string, and <code class="docutils literal notranslate"><span class="pre">$</span></code> matches
 | 
						||
only at the end of the string and immediately before the newline (if any) at the
 | 
						||
end of the string. When this flag is specified, <code class="docutils literal notranslate"><span class="pre">^</span></code> matches at the beginning
 | 
						||
of the string and at the beginning of each line within the string, immediately
 | 
						||
following each newline.  Similarly, the <code class="docutils literal notranslate"><span class="pre">$</span></code> metacharacter matches either at
 | 
						||
the end of the string and at the end of each line (immediately preceding each
 | 
						||
newline).</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">S</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">DOTALL</span></span></dt>
 | 
						||
<dd><p>Makes the <code class="docutils literal notranslate"><span class="pre">'.'</span></code> special character match any character at all, including a
 | 
						||
newline; without this flag, <code class="docutils literal notranslate"><span class="pre">'.'</span></code> will match anything <em>except</em> a newline.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">A</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">ASCII</span></span></dt>
 | 
						||
<dd><p>Make <code class="docutils literal notranslate"><span class="pre">\w</span></code>, <code class="docutils literal notranslate"><span class="pre">\W</span></code>, <code class="docutils literal notranslate"><span class="pre">\b</span></code>, <code class="docutils literal notranslate"><span class="pre">\B</span></code>, <code class="docutils literal notranslate"><span class="pre">\s</span></code> and <code class="docutils literal notranslate"><span class="pre">\S</span></code> perform ASCII-only
 | 
						||
matching instead of full Unicode matching. This is only meaningful for
 | 
						||
Unicode patterns, and is ignored for byte patterns.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<dl class="py data">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">X</span></span></dt>
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">re.</span></span><span class="sig-name descname"><span class="pre">VERBOSE</span></span></dt>
 | 
						||
<dd><p>This flag allows you to write regular expressions that are more readable by
 | 
						||
granting you more flexibility in how you can format them.  When this flag has
 | 
						||
been specified, whitespace within the RE string is ignored, except when the
 | 
						||
whitespace is in a character class or preceded by an unescaped backslash; this
 | 
						||
lets you organize and indent the RE more clearly.  This flag also lets you put
 | 
						||
comments within a RE that will be ignored by the engine; comments are marked by
 | 
						||
a <code class="docutils literal notranslate"><span class="pre">'#'</span></code> that’s neither in a character class or preceded by an unescaped
 | 
						||
backslash.</p>
 | 
						||
<p>For example, here’s a RE that uses <a class="reference internal" href="../library/re.html#re.VERBOSE" title="re.VERBOSE"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.VERBOSE</span></code></a>; see how much easier it
 | 
						||
is to read?</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">charref</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"""</span>
 | 
						||
<span class="s2"> &[#]                # Start of a numeric entity reference</span>
 | 
						||
<span class="s2"> (</span>
 | 
						||
<span class="s2">     0[0-7]+         # Octal form</span>
 | 
						||
<span class="s2">   | [0-9]+          # Decimal form</span>
 | 
						||
<span class="s2">   | x[0-9a-fA-F]+   # Hexadecimal form</span>
 | 
						||
<span class="s2"> )</span>
 | 
						||
<span class="s2"> ;                   # Trailing semicolon</span>
 | 
						||
<span class="s2">"""</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Without the verbose setting, the RE would look like this:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">charref</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s2">"&#(0[0-7]+"</span>
 | 
						||
                     <span class="s2">"|[0-9]+"</span>
 | 
						||
                     <span class="s2">"|x[0-9a-fA-F]+);"</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>In the above example, Python’s automatic concatenation of string literals has
 | 
						||
been used to break up the RE into smaller pieces, but it’s still more difficult
 | 
						||
to understand than the version using <a class="reference internal" href="../library/re.html#re.VERBOSE" title="re.VERBOSE"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.VERBOSE</span></code></a>.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
<section id="more-pattern-power">
 | 
						||
<h2>More Pattern Power<a class="headerlink" href="#more-pattern-power" title="Link to this heading">¶</a></h2>
 | 
						||
<p>So far we’ve only covered a part of the features of regular expressions.  In
 | 
						||
this section, we’ll cover some new metacharacters, and how to use groups to
 | 
						||
retrieve portions of the text that was matched.</p>
 | 
						||
<section id="more-metacharacters">
 | 
						||
<span id="id2"></span><h3>More Metacharacters<a class="headerlink" href="#more-metacharacters" title="Link to this heading">¶</a></h3>
 | 
						||
<p>There are some metacharacters that we haven’t covered yet.  Most of them will be
 | 
						||
covered in this section.</p>
 | 
						||
<p>Some of the remaining metacharacters to be discussed are <em class="dfn">zero-width
 | 
						||
assertions</em>.  They don’t cause the engine to advance through the string;
 | 
						||
instead, they consume no characters at all, and simply succeed or fail.  For
 | 
						||
example, <code class="docutils literal notranslate"><span class="pre">\b</span></code> is an assertion that the current position is located at a word
 | 
						||
boundary; the position isn’t changed by the <code class="docutils literal notranslate"><span class="pre">\b</span></code> at all.  This means that
 | 
						||
zero-width assertions should never be repeated, because if they match once at a
 | 
						||
given location, they can obviously be matched an infinite number of times.</p>
 | 
						||
<dl>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">|</span></code></dt><dd><p>Alternation, or the “or” operator.   If <em>A</em> and <em>B</em> are regular expressions,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">A|B</span></code> will match any string that matches either <em>A</em> or <em>B</em>. <code class="docutils literal notranslate"><span class="pre">|</span></code> has very
 | 
						||
low precedence in order to make it work reasonably when you’re alternating
 | 
						||
multi-character strings. <code class="docutils literal notranslate"><span class="pre">Crow|Servo</span></code> will match either <code class="docutils literal notranslate"><span class="pre">'Crow'</span></code> or <code class="docutils literal notranslate"><span class="pre">'Servo'</span></code>,
 | 
						||
not <code class="docutils literal notranslate"><span class="pre">'Cro'</span></code>, a <code class="docutils literal notranslate"><span class="pre">'w'</span></code> or an <code class="docutils literal notranslate"><span class="pre">'S'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'ervo'</span></code>.</p>
 | 
						||
<p>To match a literal <code class="docutils literal notranslate"><span class="pre">'|'</span></code>, use <code class="docutils literal notranslate"><span class="pre">\|</span></code>, or enclose it inside a character class,
 | 
						||
as in <code class="docutils literal notranslate"><span class="pre">[|]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">^</span></code></dt><dd><p>Matches at the beginning of lines.  Unless the <a class="reference internal" href="../library/re.html#re.MULTILINE" title="re.MULTILINE"><code class="xref py py-const docutils literal notranslate"><span class="pre">MULTILINE</span></code></a> flag has been
 | 
						||
set, this will only match at the beginning of the string.  In <a class="reference internal" href="../library/re.html#re.MULTILINE" title="re.MULTILINE"><code class="xref py py-const docutils literal notranslate"><span class="pre">MULTILINE</span></code></a>
 | 
						||
mode, this also matches immediately after each newline within the string.</p>
 | 
						||
<p>For example, if you wish to match the word <code class="docutils literal notranslate"><span class="pre">From</span></code> only at the beginning of a
 | 
						||
line, the RE to use is <code class="docutils literal notranslate"><span class="pre">^From</span></code>.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'^From'</span><span class="p">,</span> <span class="s1">'From Here to Eternity'</span><span class="p">))</span>
 | 
						||
<span class="go"><re.Match object; span=(0, 4), match='From'></span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'^From'</span><span class="p">,</span> <span class="s1">'Reciting From Memory'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>To match a literal <code class="docutils literal notranslate"><span class="pre">'^'</span></code>, use <code class="docutils literal notranslate"><span class="pre">\^</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">$</span></code></dt><dd><p>Matches at the end of a line, which is defined as either the end of the string,
 | 
						||
or any location followed by a newline character.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'}$'</span><span class="p">,</span> <span class="s1">'</span><span class="si">{block}</span><span class="s1">'</span><span class="p">))</span>
 | 
						||
<span class="go"><re.Match object; span=(6, 7), match='}'></span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'}$'</span><span class="p">,</span> <span class="s1">'</span><span class="si">{block}</span><span class="s1"> '</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'}$'</span><span class="p">,</span> <span class="s1">'</span><span class="si">{block}</span><span class="se">\n</span><span class="s1">'</span><span class="p">))</span>
 | 
						||
<span class="go"><re.Match object; span=(6, 7), match='}'></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>To match a literal <code class="docutils literal notranslate"><span class="pre">'$'</span></code>, use <code class="docutils literal notranslate"><span class="pre">\$</span></code> or enclose it inside a character class,
 | 
						||
as in  <code class="docutils literal notranslate"><span class="pre">[$]</span></code>.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\A</span></code></dt><dd><p>Matches only at the start of the string.  When not in <a class="reference internal" href="../library/re.html#re.MULTILINE" title="re.MULTILINE"><code class="xref py py-const docutils literal notranslate"><span class="pre">MULTILINE</span></code></a> mode,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">\A</span></code> and <code class="docutils literal notranslate"><span class="pre">^</span></code> are effectively the same.  In <a class="reference internal" href="../library/re.html#re.MULTILINE" title="re.MULTILINE"><code class="xref py py-const docutils literal notranslate"><span class="pre">MULTILINE</span></code></a> mode, they’re
 | 
						||
different: <code class="docutils literal notranslate"><span class="pre">\A</span></code> still matches only at the beginning of the string, but <code class="docutils literal notranslate"><span class="pre">^</span></code>
 | 
						||
may match at any location inside the string that follows a newline character.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\Z</span></code></dt><dd><p>Matches only at the end of the string.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\b</span></code></dt><dd><p>Word boundary.  This is a zero-width assertion that matches only at the
 | 
						||
beginning or end of a word.  A word is defined as a sequence of alphanumeric
 | 
						||
characters, so the end of a word is indicated by whitespace or a
 | 
						||
non-alphanumeric character.</p>
 | 
						||
<p>The following example matches <code class="docutils literal notranslate"><span class="pre">class</span></code> only when it’s a complete word; it won’t
 | 
						||
match when it’s contained inside another word.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\bclass\b'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'no class at all'</span><span class="p">))</span>
 | 
						||
<span class="go"><re.Match object; span=(3, 8), match='class'></span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'the declassified algorithm'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'one subclass is'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>There are two subtleties you should remember when using this special sequence.
 | 
						||
First, this is the worst collision between Python’s string literals and regular
 | 
						||
expression sequences.  In Python’s string literals, <code class="docutils literal notranslate"><span class="pre">\b</span></code> is the backspace
 | 
						||
character, ASCII value 8.  If you’re not using raw strings, then Python will
 | 
						||
convert the <code class="docutils literal notranslate"><span class="pre">\b</span></code> to a backspace, and your RE won’t match as you expect it to.
 | 
						||
The following example looks the same as our previous RE, but omits the <code class="docutils literal notranslate"><span class="pre">'r'</span></code>
 | 
						||
in front of the RE string.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'</span><span class="se">\b</span><span class="s1">class</span><span class="se">\b</span><span class="s1">'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'no class at all'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'</span><span class="se">\b</span><span class="s1">'</span> <span class="o">+</span> <span class="s1">'class'</span> <span class="o">+</span> <span class="s1">'</span><span class="se">\b</span><span class="s1">'</span><span class="p">))</span>
 | 
						||
<span class="go"><re.Match object; span=(0, 7), match='\x08class\x08'></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Second, inside a character class, where there’s no use for this assertion,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">\b</span></code> represents the backspace character, for compatibility with Python’s
 | 
						||
string literals.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">\B</span></code></dt><dd><p>Another zero-width assertion, this is the opposite of <code class="docutils literal notranslate"><span class="pre">\b</span></code>, only matching when
 | 
						||
the current position is not at a word boundary.</p>
 | 
						||
</dd>
 | 
						||
</dl>
 | 
						||
</section>
 | 
						||
<section id="grouping">
 | 
						||
<h3>Grouping<a class="headerlink" href="#grouping" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Frequently you need to obtain more information than just whether the RE matched
 | 
						||
or not.  Regular expressions are often used to dissect strings by writing a RE
 | 
						||
divided into several subgroups which match different components of interest.
 | 
						||
For example, an RFC-822 header line is divided into a header name and a value,
 | 
						||
separated by a <code class="docutils literal notranslate"><span class="pre">':'</span></code>, like this:</p>
 | 
						||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>From: author@example.com
 | 
						||
User-Agent: Thunderbird 1.5.0.9 (X11/20061227)
 | 
						||
MIME-Version: 1.0
 | 
						||
To: editor@example.com
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>This can be handled by writing a regular expression which matches an entire
 | 
						||
header line, and has one group which matches the header name, and another group
 | 
						||
which matches the header’s value.</p>
 | 
						||
<p>Groups are marked by the <code class="docutils literal notranslate"><span class="pre">'('</span></code>, <code class="docutils literal notranslate"><span class="pre">')'</span></code> metacharacters. <code class="docutils literal notranslate"><span class="pre">'('</span></code> and <code class="docutils literal notranslate"><span class="pre">')'</span></code>
 | 
						||
have much the same meaning as they do in mathematical expressions; they group
 | 
						||
together the expressions contained inside them, and you can repeat the contents
 | 
						||
of a group with a quantifier, such as <code class="docutils literal notranslate"><span class="pre">*</span></code>, <code class="docutils literal notranslate"><span class="pre">+</span></code>, <code class="docutils literal notranslate"><span class="pre">?</span></code>, or
 | 
						||
<code class="docutils literal notranslate"><span class="pre">{m,n}</span></code>.  For example, <code class="docutils literal notranslate"><span class="pre">(ab)*</span></code> will match zero or more repetitions of
 | 
						||
<code class="docutils literal notranslate"><span class="pre">ab</span></code>.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(ab)*'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'ababababab'</span><span class="p">)</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="go">(0, 10)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Groups indicated with <code class="docutils literal notranslate"><span class="pre">'('</span></code>, <code class="docutils literal notranslate"><span class="pre">')'</span></code> also capture the starting and ending
 | 
						||
index of the text that they match; this can be retrieved by passing an argument
 | 
						||
to <a class="reference internal" href="../library/re.html#re.Match.group" title="re.Match.group"><code class="xref py py-meth docutils literal notranslate"><span class="pre">group()</span></code></a>, <a class="reference internal" href="../library/re.html#re.Match.start" title="re.Match.start"><code class="xref py py-meth docutils literal notranslate"><span class="pre">start()</span></code></a>, <a class="reference internal" href="../library/re.html#re.Match.end" title="re.Match.end"><code class="xref py py-meth docutils literal notranslate"><span class="pre">end()</span></code></a>, and
 | 
						||
<a class="reference internal" href="../library/re.html#re.Match.span" title="re.Match.span"><code class="xref py py-meth docutils literal notranslate"><span class="pre">span()</span></code></a>.  Groups are
 | 
						||
numbered starting with 0.  Group 0 is always present; it’s the whole RE, so
 | 
						||
<a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> methods all have group 0 as their default
 | 
						||
argument.  Later we’ll see how to express groups that don’t capture the span
 | 
						||
of text that they match.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(a)b'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'ab'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
 | 
						||
<span class="go">'ab'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 | 
						||
<span class="go">'ab'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Subgroups are numbered from left to right, from 1 upward.  Groups can be nested;
 | 
						||
to determine the number, just count the opening parenthesis characters, going
 | 
						||
from left to right.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(a(b)c)d'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'abcd'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
 | 
						||
<span class="go">'abcd'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
 | 
						||
<span class="go">'abc'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
 | 
						||
<span class="go">'b'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p><a class="reference internal" href="../library/re.html#re.Match.group" title="re.Match.group"><code class="xref py py-meth docutils literal notranslate"><span class="pre">group()</span></code></a> can be passed multiple group numbers at a time, in which case it
 | 
						||
will return a tuple containing the corresponding values for those groups.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">)</span>
 | 
						||
<span class="go">('b', 'abc', 'b')</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The <a class="reference internal" href="../library/re.html#re.Match.groups" title="re.Match.groups"><code class="xref py py-meth docutils literal notranslate"><span class="pre">groups()</span></code></a> method returns a tuple containing the strings for all the
 | 
						||
subgroups, from 1 up to however many there are.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span>
 | 
						||
<span class="go">('abc', 'b')</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Backreferences in a pattern allow you to specify that the contents of an earlier
 | 
						||
capturing group must also be found at the current location in the string.  For
 | 
						||
example, <code class="docutils literal notranslate"><span class="pre">\1</span></code> will succeed if the exact contents of group 1 can be found at
 | 
						||
the current position, and fails otherwise.  Remember that Python’s string
 | 
						||
literals also use a backslash followed by numbers to allow including arbitrary
 | 
						||
characters in a string, so be sure to use a raw string when incorporating
 | 
						||
backreferences in a RE.</p>
 | 
						||
<p>For example, the following RE detects doubled words in a string.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\b(\w+)\s+\1\b'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'Paris in the the spring'</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
 | 
						||
<span class="go">'the the'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Backreferences like this aren’t often useful for just searching through a string
 | 
						||
— there are few text formats which repeat data in this way — but you’ll soon
 | 
						||
find out that they’re <em>very</em> useful when performing string substitutions.</p>
 | 
						||
</section>
 | 
						||
<section id="non-capturing-and-named-groups">
 | 
						||
<h3>Non-capturing and Named Groups<a class="headerlink" href="#non-capturing-and-named-groups" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Elaborate REs may use many groups, both to capture substrings of interest, and
 | 
						||
to group and structure the RE itself.  In complex REs, it becomes difficult to
 | 
						||
keep track of the group numbers.  There are two features which help with this
 | 
						||
problem.  Both of them use a common syntax for regular expression extensions, so
 | 
						||
we’ll look at that first.</p>
 | 
						||
<p>Perl 5 is well known for its powerful additions to standard regular expressions.
 | 
						||
For these new features the Perl developers couldn’t choose new single-keystroke metacharacters
 | 
						||
or new special sequences beginning with <code class="docutils literal notranslate"><span class="pre">\</span></code> without making Perl’s regular
 | 
						||
expressions confusingly different from standard REs.  If they chose <code class="docutils literal notranslate"><span class="pre">&</span></code> as a
 | 
						||
new metacharacter, for example, old expressions would be assuming that <code class="docutils literal notranslate"><span class="pre">&</span></code> was
 | 
						||
a regular character and wouldn’t have escaped it by writing <code class="docutils literal notranslate"><span class="pre">\&</span></code> or <code class="docutils literal notranslate"><span class="pre">[&]</span></code>.</p>
 | 
						||
<p>The solution chosen by the Perl developers was to use <code class="docutils literal notranslate"><span class="pre">(?...)</span></code> as the
 | 
						||
extension syntax.  <code class="docutils literal notranslate"><span class="pre">?</span></code> immediately after a parenthesis was a syntax error
 | 
						||
because the <code class="docutils literal notranslate"><span class="pre">?</span></code> would have nothing to repeat, so this didn’t introduce any
 | 
						||
compatibility problems.  The characters immediately after the <code class="docutils literal notranslate"><span class="pre">?</span></code>  indicate
 | 
						||
what extension is being used, so <code class="docutils literal notranslate"><span class="pre">(?=foo)</span></code> is one thing (a positive lookahead
 | 
						||
assertion) and <code class="docutils literal notranslate"><span class="pre">(?:foo)</span></code> is something else (a non-capturing group containing
 | 
						||
the subexpression <code class="docutils literal notranslate"><span class="pre">foo</span></code>).</p>
 | 
						||
<p>Python supports several of Perl’s extensions and adds an extension
 | 
						||
syntax to Perl’s extension syntax.  If the first character after the
 | 
						||
question mark is a <code class="docutils literal notranslate"><span class="pre">P</span></code>, you know that it’s an extension that’s
 | 
						||
specific to Python.</p>
 | 
						||
<p>Now that we’ve looked at the general extension syntax, we can return
 | 
						||
to the features that simplify working with groups in complex REs.</p>
 | 
						||
<p>Sometimes you’ll want to use a group to denote a part of a regular expression,
 | 
						||
but aren’t interested in retrieving the group’s contents. You can make this fact
 | 
						||
explicit by using a non-capturing group: <code class="docutils literal notranslate"><span class="pre">(?:...)</span></code>, where you can replace the
 | 
						||
<code class="docutils literal notranslate"><span class="pre">...</span></code> with any other regular expression.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s2">"([abc])+"</span><span class="p">,</span> <span class="s2">"abc"</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span>
 | 
						||
<span class="go">('c',)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s2">"(?:[abc])+"</span><span class="p">,</span> <span class="s2">"abc"</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span>
 | 
						||
<span class="go">()</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Except for the fact that you can’t retrieve the contents of what the group
 | 
						||
matched, a non-capturing group behaves exactly the same as a capturing group;
 | 
						||
you can put anything inside it, repeat it with a repetition metacharacter such
 | 
						||
as <code class="docutils literal notranslate"><span class="pre">*</span></code>, and nest it within other groups (capturing or non-capturing).
 | 
						||
<code class="docutils literal notranslate"><span class="pre">(?:...)</span></code> is particularly useful when modifying an existing pattern, since you
 | 
						||
can add new groups without changing how all the other groups are numbered.  It
 | 
						||
should be mentioned that there’s no performance difference in searching between
 | 
						||
capturing and non-capturing groups; neither form is any faster than the other.</p>
 | 
						||
<p>A more significant feature is named groups: instead of referring to them by
 | 
						||
numbers, groups can be referenced by a name.</p>
 | 
						||
<p>The syntax for a named group is one of the Python-specific extensions:
 | 
						||
<code class="docutils literal notranslate"><span class="pre">(?P<name>...)</span></code>.  <em>name</em> is, obviously, the name of the group.  Named groups
 | 
						||
behave exactly like capturing groups, and additionally associate a name
 | 
						||
with a group.  The <a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> methods that deal with
 | 
						||
capturing groups all accept either integers that refer to the group by number
 | 
						||
or strings that contain the desired group’s name.  Named groups are still
 | 
						||
given numbers, so you can retrieve information about a group in two ways:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(?P<word>\b\w+\b)'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span> <span class="s1">'(((( Lots of punctuation )))'</span> <span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s1">'word'</span><span class="p">)</span>
 | 
						||
<span class="go">'Lots'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
 | 
						||
<span class="go">'Lots'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Additionally, you can retrieve named groups as a dictionary with
 | 
						||
<a class="reference internal" href="../library/re.html#re.Match.groupdict" title="re.Match.groupdict"><code class="xref py py-meth docutils literal notranslate"><span class="pre">groupdict()</span></code></a>:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">m</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(?P<first>\w+) (?P<last>\w+)'</span><span class="p">,</span> <span class="s1">'Jane Doe'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">m</span><span class="o">.</span><span class="n">groupdict</span><span class="p">()</span>
 | 
						||
<span class="go">{'first': 'Jane', 'last': 'Doe'}</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Named groups are handy because they let you use easily remembered names, instead
 | 
						||
of having to remember numbers.  Here’s an example RE from the <a class="reference internal" href="../library/imaplib.html#module-imaplib" title="imaplib: IMAP4 protocol client (requires sockets)."><code class="xref py py-mod docutils literal notranslate"><span class="pre">imaplib</span></code></a>
 | 
						||
module:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">InternalDate</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'INTERNALDATE "'</span>
 | 
						||
        <span class="sa">r</span><span class="s1">'(?P<day>[ 123][0-9])-(?P<mon>[A-Z][a-z][a-z])-'</span>
 | 
						||
        <span class="sa">r</span><span class="s1">'(?P<year>[0-9][0-9][0-9][0-9])'</span>
 | 
						||
        <span class="sa">r</span><span class="s1">' (?P<hour>[0-9][0-9]):(?P<min>[0-9][0-9]):(?P<sec>[0-9][0-9])'</span>
 | 
						||
        <span class="sa">r</span><span class="s1">' (?P<zonen>[-+])(?P<zoneh>[0-9][0-9])(?P<zonem>[0-9][0-9])'</span>
 | 
						||
        <span class="sa">r</span><span class="s1">'"'</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>It’s obviously much easier to retrieve <code class="docutils literal notranslate"><span class="pre">m.group('zonem')</span></code>, instead of having
 | 
						||
to remember to retrieve group 9.</p>
 | 
						||
<p>The syntax for backreferences in an expression such as <code class="docutils literal notranslate"><span class="pre">(...)\1</span></code> refers to the
 | 
						||
number of the group.  There’s naturally a variant that uses the group name
 | 
						||
instead of the number. This is another Python extension: <code class="docutils literal notranslate"><span class="pre">(?P=name)</span></code> indicates
 | 
						||
that the contents of the group called <em>name</em> should again be matched at the
 | 
						||
current point.  The regular expression for finding doubled words,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">\b(\w+)\s+\1\b</span></code> can also be written as <code class="docutils literal notranslate"><span class="pre">\b(?P<word>\w+)\s+(?P=word)\b</span></code>:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\b(?P<word>\w+)\s+(?P=word)\b'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'Paris in the the spring'</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()</span>
 | 
						||
<span class="go">'the the'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
<section id="lookahead-assertions">
 | 
						||
<h3>Lookahead Assertions<a class="headerlink" href="#lookahead-assertions" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Another zero-width assertion is the lookahead assertion.  Lookahead assertions
 | 
						||
are available in both positive and negative form, and  look like this:</p>
 | 
						||
<dl class="simple">
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">(?=...)</span></code></dt><dd><p>Positive lookahead assertion.  This succeeds if the contained regular
 | 
						||
expression, represented here by <code class="docutils literal notranslate"><span class="pre">...</span></code>, successfully matches at the current
 | 
						||
location, and fails otherwise. But, once the contained expression has been
 | 
						||
tried, the matching engine doesn’t advance at all; the rest of the pattern is
 | 
						||
tried right where the assertion started.</p>
 | 
						||
</dd>
 | 
						||
<dt><code class="docutils literal notranslate"><span class="pre">(?!...)</span></code></dt><dd><p>Negative lookahead assertion.  This is the opposite of the positive assertion;
 | 
						||
it succeeds if the contained expression <em>doesn’t</em> match at the current position
 | 
						||
in the string.</p>
 | 
						||
</dd>
 | 
						||
</dl>
 | 
						||
<p>To make this concrete, let’s look at a case where a lookahead is useful.
 | 
						||
Consider a simple pattern to match a filename and split it apart into a base
 | 
						||
name and an extension, separated by a <code class="docutils literal notranslate"><span class="pre">.</span></code>.  For example, in <code class="docutils literal notranslate"><span class="pre">news.rc</span></code>,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">news</span></code> is the base name, and <code class="docutils literal notranslate"><span class="pre">rc</span></code> is the filename’s extension.</p>
 | 
						||
<p>The pattern to match this is quite simple:</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.].*$</span></code></p>
 | 
						||
<p>Notice that the <code class="docutils literal notranslate"><span class="pre">.</span></code> needs to be treated specially because it’s a
 | 
						||
metacharacter, so it’s inside a character class to only match that
 | 
						||
specific character.  Also notice the trailing <code class="docutils literal notranslate"><span class="pre">$</span></code>; this is added to
 | 
						||
ensure that all the rest of the string must be included in the
 | 
						||
extension.  This regular expression matches <code class="docutils literal notranslate"><span class="pre">foo.bar</span></code> and
 | 
						||
<code class="docutils literal notranslate"><span class="pre">autoexec.bat</span></code> and <code class="docutils literal notranslate"><span class="pre">sendmail.cf</span></code> and <code class="docutils literal notranslate"><span class="pre">printers.conf</span></code>.</p>
 | 
						||
<p>Now, consider complicating the problem a bit; what if you want to match
 | 
						||
filenames where the extension is not <code class="docutils literal notranslate"><span class="pre">bat</span></code>? Some incorrect attempts:</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.][^b].*$</span></code>  The first attempt above tries to exclude <code class="docutils literal notranslate"><span class="pre">bat</span></code> by requiring
 | 
						||
that the first character of the extension is not a <code class="docutils literal notranslate"><span class="pre">b</span></code>.  This is wrong,
 | 
						||
because the pattern also doesn’t match <code class="docutils literal notranslate"><span class="pre">foo.bar</span></code>.</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.]([^b]..|.[^a].|..[^t])$</span></code></p>
 | 
						||
<p>The expression gets messier when you try to patch up the first solution by
 | 
						||
requiring one of the following cases to match: the first character of the
 | 
						||
extension isn’t <code class="docutils literal notranslate"><span class="pre">b</span></code>; the second character isn’t <code class="docutils literal notranslate"><span class="pre">a</span></code>; or the third character
 | 
						||
isn’t <code class="docutils literal notranslate"><span class="pre">t</span></code>.  This accepts <code class="docutils literal notranslate"><span class="pre">foo.bar</span></code> and rejects <code class="docutils literal notranslate"><span class="pre">autoexec.bat</span></code>, but it
 | 
						||
requires a three-letter extension and won’t accept a filename with a two-letter
 | 
						||
extension such as <code class="docutils literal notranslate"><span class="pre">sendmail.cf</span></code>.  We’ll complicate the pattern again in an
 | 
						||
effort to fix it.</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.]([^b].?.?|.[^a]?.?|..?[^t]?)$</span></code></p>
 | 
						||
<p>In the third attempt, the second and third letters are all made optional in
 | 
						||
order to allow matching extensions shorter than three characters, such as
 | 
						||
<code class="docutils literal notranslate"><span class="pre">sendmail.cf</span></code>.</p>
 | 
						||
<p>The pattern’s getting really complicated now, which makes it hard to read and
 | 
						||
understand.  Worse, if the problem changes and you want to exclude both <code class="docutils literal notranslate"><span class="pre">bat</span></code>
 | 
						||
and <code class="docutils literal notranslate"><span class="pre">exe</span></code> as extensions, the pattern would get even more complicated and
 | 
						||
confusing.</p>
 | 
						||
<p>A negative lookahead cuts through all this confusion:</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.](?!bat$)[^.]*$</span></code>  The negative lookahead means: if the expression <code class="docutils literal notranslate"><span class="pre">bat</span></code>
 | 
						||
doesn’t match at this point, try the rest of the pattern; if <code class="docutils literal notranslate"><span class="pre">bat$</span></code> does
 | 
						||
match, the whole pattern will fail.  The trailing <code class="docutils literal notranslate"><span class="pre">$</span></code> is required to ensure
 | 
						||
that something like <code class="docutils literal notranslate"><span class="pre">sample.batch</span></code>, where the extension only starts with
 | 
						||
<code class="docutils literal notranslate"><span class="pre">bat</span></code>, will be allowed.  The <code class="docutils literal notranslate"><span class="pre">[^.]*</span></code> makes sure that the pattern works
 | 
						||
when there are multiple dots in the filename.</p>
 | 
						||
<p>Excluding another filename extension is now easy; simply add it as an
 | 
						||
alternative inside the assertion.  The following pattern excludes filenames that
 | 
						||
end in either <code class="docutils literal notranslate"><span class="pre">bat</span></code> or <code class="docutils literal notranslate"><span class="pre">exe</span></code>:</p>
 | 
						||
<p><code class="docutils literal notranslate"><span class="pre">.*[.](?!bat$|exe$)[^.]*$</span></code></p>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
<section id="modifying-strings">
 | 
						||
<h2>Modifying Strings<a class="headerlink" href="#modifying-strings" title="Link to this heading">¶</a></h2>
 | 
						||
<p>Up to this point, we’ve simply performed searches against a static string.
 | 
						||
Regular expressions are also commonly used to modify strings in various ways,
 | 
						||
using the following pattern methods:</p>
 | 
						||
<table class="docutils align-default">
 | 
						||
<thead>
 | 
						||
<tr class="row-odd"><th class="head"><p>Method/Attribute</p></th>
 | 
						||
<th class="head"><p>Purpose</p></th>
 | 
						||
</tr>
 | 
						||
</thead>
 | 
						||
<tbody>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">split()</span></code></p></td>
 | 
						||
<td><p>Split the string into a list, splitting it
 | 
						||
wherever the RE matches</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">sub()</span></code></p></td>
 | 
						||
<td><p>Find all substrings where the RE matches, and
 | 
						||
replace them with a different string</p></td>
 | 
						||
</tr>
 | 
						||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">subn()</span></code></p></td>
 | 
						||
<td><p>Does the same thing as <code class="xref py py-meth docutils literal notranslate"><span class="pre">sub()</span></code>,  but
 | 
						||
returns the new string and the number of
 | 
						||
replacements</p></td>
 | 
						||
</tr>
 | 
						||
</tbody>
 | 
						||
</table>
 | 
						||
<section id="splitting-strings">
 | 
						||
<h3>Splitting Strings<a class="headerlink" href="#splitting-strings" title="Link to this heading">¶</a></h3>
 | 
						||
<p>The <a class="reference internal" href="../library/re.html#re.Pattern.split" title="re.Pattern.split"><code class="xref py py-meth docutils literal notranslate"><span class="pre">split()</span></code></a> method of a pattern splits a string apart
 | 
						||
wherever the RE matches, returning a list of the pieces. It’s similar to the
 | 
						||
<a class="reference internal" href="../library/stdtypes.html#str.split" title="str.split"><code class="xref py py-meth docutils literal notranslate"><span class="pre">split()</span></code></a> method of strings but provides much more generality in the
 | 
						||
delimiters that you can split by; string <code class="xref py py-meth docutils literal notranslate"><span class="pre">split()</span></code> only supports splitting by
 | 
						||
whitespace or by a fixed string.  As you’d expect, there’s a module-level
 | 
						||
<a class="reference internal" href="../library/re.html#re.split" title="re.split"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.split()</span></code></a> function, too.</p>
 | 
						||
<dl class="py method">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">.</span></span><span class="sig-name descname"><span class="pre">split</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">string</span></span></em><span class="optional">[</span>, <em class="sig-param"><span class="n"><span class="pre">maxsplit=0</span></span></em><span class="optional">]</span><span class="sig-paren">)</span></dt>
 | 
						||
<dd><p>Split <em>string</em> by the matches of the regular expression.  If capturing
 | 
						||
parentheses are used in the RE, then their contents will also be returned as
 | 
						||
part of the resulting list.  If <em>maxsplit</em> is nonzero, at most <em>maxsplit</em> splits
 | 
						||
are performed.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<p>You can limit the number of splits made, by passing a value for <em>maxsplit</em>.
 | 
						||
When <em>maxsplit</em> is nonzero, at most <em>maxsplit</em> splits will be made, and the
 | 
						||
remainder of the string is returned as the final element of the list.  In the
 | 
						||
following example, the delimiter is any sequence of non-alphanumeric characters.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\W+'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'This is a test, short and sweet, of split().'</span><span class="p">)</span>
 | 
						||
<span class="go">['This', 'is', 'a', 'test', 'short', 'and', 'sweet', 'of', 'split', '']</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'This is a test, short and sweet, of split().'</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
 | 
						||
<span class="go">['This', 'is', 'a', 'test, short and sweet, of split().']</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Sometimes you’re not only interested in what the text between delimiters is, but
 | 
						||
also need to know what the delimiter was.  If capturing parentheses are used in
 | 
						||
the RE, then their values are also returned as part of the list.  Compare the
 | 
						||
following calls:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\W+'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p2</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(\W+)'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'This... is a test.'</span><span class="p">)</span>
 | 
						||
<span class="go">['This', 'is', 'a', 'test', '']</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p2</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'This... is a test.'</span><span class="p">)</span>
 | 
						||
<span class="go">['This', '... ', 'is', ' ', 'a', ' ', 'test', '.', '']</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The module-level function <a class="reference internal" href="../library/re.html#re.split" title="re.split"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.split()</span></code></a> adds the RE to be used as the first
 | 
						||
argument, but is otherwise the same.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'[\W]+'</span><span class="p">,</span> <span class="s1">'Words, words, words.'</span><span class="p">)</span>
 | 
						||
<span class="go">['Words', 'words', 'words', '']</span>
 | 
						||
<span class="gp">>>> </span><span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'([\W]+)'</span><span class="p">,</span> <span class="s1">'Words, words, words.'</span><span class="p">)</span>
 | 
						||
<span class="go">['Words', ', ', 'words', ', ', 'words', '.', '']</span>
 | 
						||
<span class="gp">>>> </span><span class="n">re</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="sa">r</span><span class="s1">'[\W]+'</span><span class="p">,</span> <span class="s1">'Words, words, words.'</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
 | 
						||
<span class="go">['Words', 'words, words.']</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
<section id="search-and-replace">
 | 
						||
<h3>Search and Replace<a class="headerlink" href="#search-and-replace" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Another common task is to find all the matches for a pattern, and replace them
 | 
						||
with a different string.  The <a class="reference internal" href="../library/re.html#re.Pattern.sub" title="re.Pattern.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sub()</span></code></a> method takes a replacement value,
 | 
						||
which can be either a string or a function, and the string to be processed.</p>
 | 
						||
<dl class="py method">
 | 
						||
<dt class="sig sig-object py">
 | 
						||
<span class="sig-prename descclassname"><span class="pre">.</span></span><span class="sig-name descname"><span class="pre">sub</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">replacement</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">string</span></span></em><span class="optional">[</span>, <em class="sig-param"><span class="n"><span class="pre">count=0</span></span></em><span class="optional">]</span><span class="sig-paren">)</span></dt>
 | 
						||
<dd><p>Returns the string obtained by replacing the leftmost non-overlapping
 | 
						||
occurrences of the RE in <em>string</em> by the replacement <em>replacement</em>.  If the
 | 
						||
pattern isn’t found, <em>string</em> is returned unchanged.</p>
 | 
						||
<p>The optional argument <em>count</em> is the maximum number of pattern occurrences to be
 | 
						||
replaced; <em>count</em> must be a non-negative integer.  The default value of 0 means
 | 
						||
to replace all occurrences.</p>
 | 
						||
</dd></dl>
 | 
						||
 | 
						||
<p>Here’s a simple example of using the <a class="reference internal" href="../library/re.html#re.Pattern.sub" title="re.Pattern.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sub()</span></code></a> method.  It replaces colour
 | 
						||
names with the word <code class="docutils literal notranslate"><span class="pre">colour</span></code>:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(blue|white|red)'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">'colour'</span><span class="p">,</span> <span class="s1">'blue socks and red shoes'</span><span class="p">)</span>
 | 
						||
<span class="go">'colour socks and colour shoes'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">'colour'</span><span class="p">,</span> <span class="s1">'blue socks and red shoes'</span><span class="p">,</span> <span class="n">count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
 | 
						||
<span class="go">'colour socks and red shoes'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The <a class="reference internal" href="../library/re.html#re.Pattern.subn" title="re.Pattern.subn"><code class="xref py py-meth docutils literal notranslate"><span class="pre">subn()</span></code></a> method does the same work, but returns a 2-tuple containing the
 | 
						||
new string value and the number of replacements  that were performed:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'(blue|white|red)'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">subn</span><span class="p">(</span><span class="s1">'colour'</span><span class="p">,</span> <span class="s1">'blue socks and red shoes'</span><span class="p">)</span>
 | 
						||
<span class="go">('colour socks and colour shoes', 2)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">subn</span><span class="p">(</span><span class="s1">'colour'</span><span class="p">,</span> <span class="s1">'no colours at all'</span><span class="p">)</span>
 | 
						||
<span class="go">('no colours at all', 0)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Empty matches are replaced only when they’re not adjacent to a previous empty match.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'x*'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s1">'-'</span><span class="p">,</span> <span class="s1">'abxd'</span><span class="p">)</span>
 | 
						||
<span class="go">'-a-b--d-'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>If <em>replacement</em> is a string, any backslash escapes in it are processed.  That
 | 
						||
is, <code class="docutils literal notranslate"><span class="pre">\n</span></code> is converted to a single newline character, <code class="docutils literal notranslate"><span class="pre">\r</span></code> is converted to a
 | 
						||
carriage return, and so forth. Unknown escapes such as <code class="docutils literal notranslate"><span class="pre">\&</span></code> are left alone.
 | 
						||
Backreferences, such as <code class="docutils literal notranslate"><span class="pre">\6</span></code>, are replaced with the substring matched by the
 | 
						||
corresponding group in the RE.  This lets you incorporate portions of the
 | 
						||
original text in the resulting replacement string.</p>
 | 
						||
<p>This example matches the word <code class="docutils literal notranslate"><span class="pre">section</span></code> followed by a string enclosed in
 | 
						||
<code class="docutils literal notranslate"><span class="pre">{</span></code>, <code class="docutils literal notranslate"><span class="pre">}</span></code>, and changes <code class="docutils literal notranslate"><span class="pre">section</span></code> to <code class="docutils literal notranslate"><span class="pre">subsection</span></code>:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'section{ ( [^}]* ) }'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'subsection{\1}'</span><span class="p">,</span><span class="s1">'section</span><span class="si">{First}</span><span class="s1"> section</span><span class="si">{second}</span><span class="s1">'</span><span class="p">)</span>
 | 
						||
<span class="go">'subsection{First} subsection{second}'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>There’s also a syntax for referring to named groups as defined by the
 | 
						||
<code class="docutils literal notranslate"><span class="pre">(?P<name>...)</span></code> syntax.  <code class="docutils literal notranslate"><span class="pre">\g<name></span></code> will use the substring matched by the
 | 
						||
group named <code class="docutils literal notranslate"><span class="pre">name</span></code>, and  <code class="docutils literal notranslate"><span class="pre">\g<number></span></code>  uses the corresponding group number.
 | 
						||
<code class="docutils literal notranslate"><span class="pre">\g<2></span></code> is therefore equivalent to <code class="docutils literal notranslate"><span class="pre">\2</span></code>,  but isn’t ambiguous in a
 | 
						||
replacement string such as <code class="docutils literal notranslate"><span class="pre">\g<2>0</span></code>.  (<code class="docutils literal notranslate"><span class="pre">\20</span></code> would be interpreted as a
 | 
						||
reference to group 20, not a reference to group 2 followed by the literal
 | 
						||
character <code class="docutils literal notranslate"><span class="pre">'0'</span></code>.)  The following substitutions are all equivalent, but use all
 | 
						||
three variations of the replacement string.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">'section{ (?P<name> [^}]* ) }'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'subsection{\1}'</span><span class="p">,</span><span class="s1">'section</span><span class="si">{First}</span><span class="s1">'</span><span class="p">)</span>
 | 
						||
<span class="go">'subsection{First}'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'subsection{\g<1>}'</span><span class="p">,</span><span class="s1">'section</span><span class="si">{First}</span><span class="s1">'</span><span class="p">)</span>
 | 
						||
<span class="go">'subsection{First}'</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'subsection{\g<name>}'</span><span class="p">,</span><span class="s1">'section</span><span class="si">{First}</span><span class="s1">'</span><span class="p">)</span>
 | 
						||
<span class="go">'subsection{First}'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p><em>replacement</em> can also be a function, which gives you even more control.  If
 | 
						||
<em>replacement</em> is a function, the function is called for every non-overlapping
 | 
						||
occurrence of <em>pattern</em>.  On each call, the function is passed a
 | 
						||
<a class="reference internal" href="../library/re.html#match-objects"><span class="std std-ref">match object</span></a> argument for the match and can use this
 | 
						||
information to compute the desired replacement string and return it.</p>
 | 
						||
<p>In the following example, the replacement function translates decimals into
 | 
						||
hexadecimal:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="k">def</span><span class="w"> </span><span class="nf">hexrepl</span><span class="p">(</span><span class="n">match</span><span class="p">):</span>
 | 
						||
<span class="gp">... </span>    <span class="s2">"Return the hex string for a decimal number"</span>
 | 
						||
<span class="gp">... </span>    <span class="n">value</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">())</span>
 | 
						||
<span class="gp">... </span>    <span class="k">return</span> <span class="nb">hex</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
 | 
						||
<span class="gp">...</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\d+'</span><span class="p">)</span>
 | 
						||
<span class="gp">>>> </span><span class="n">p</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">hexrepl</span><span class="p">,</span> <span class="s1">'Call 65490 for printing, 49152 for user code.'</span><span class="p">)</span>
 | 
						||
<span class="go">'Call 0xffd2 for printing, 0xc000 for user code.'</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>When using the module-level <a class="reference internal" href="../library/re.html#re.sub" title="re.sub"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.sub()</span></code></a> function, the pattern is passed as
 | 
						||
the first argument.  The pattern may be provided as an object or as a string; if
 | 
						||
you need to specify regular expression flags, you must either use a
 | 
						||
pattern object as the first parameter, or use embedded modifiers in the
 | 
						||
pattern string, e.g. <code class="docutils literal notranslate"><span class="pre">sub("(?i)b+",</span> <span class="pre">"x",</span> <span class="pre">"bbbb</span> <span class="pre">BBBB")</span></code> returns <code class="docutils literal notranslate"><span class="pre">'x</span> <span class="pre">x'</span></code>.</p>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
<section id="common-problems">
 | 
						||
<h2>Common Problems<a class="headerlink" href="#common-problems" title="Link to this heading">¶</a></h2>
 | 
						||
<p>Regular expressions are a powerful tool for some applications, but in some ways
 | 
						||
their behaviour isn’t intuitive and at times they don’t behave the way you may
 | 
						||
expect them to.  This section will point out some of the most common pitfalls.</p>
 | 
						||
<section id="use-string-methods">
 | 
						||
<h3>Use String Methods<a class="headerlink" href="#use-string-methods" title="Link to this heading">¶</a></h3>
 | 
						||
<p>Sometimes using the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module is a mistake.  If you’re matching a fixed
 | 
						||
string, or a single character class, and you’re not using any <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> features
 | 
						||
such as the <a class="reference internal" href="../library/re.html#re.IGNORECASE" title="re.IGNORECASE"><code class="xref py py-const docutils literal notranslate"><span class="pre">IGNORECASE</span></code></a> flag, then the full power of regular expressions
 | 
						||
may not be required. Strings have several methods for performing operations with
 | 
						||
fixed strings and they’re usually much faster, because the implementation is a
 | 
						||
single small C loop that’s been optimized for the purpose, instead of the large,
 | 
						||
more generalized regular expression engine.</p>
 | 
						||
<p>One example might be replacing a single fixed string with another one; for
 | 
						||
example, you might replace <code class="docutils literal notranslate"><span class="pre">word</span></code> with <code class="docutils literal notranslate"><span class="pre">deed</span></code>.  <a class="reference internal" href="../library/re.html#re.sub" title="re.sub"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.sub()</span></code></a> seems like the
 | 
						||
function to use for this, but consider the <a class="reference internal" href="../library/stdtypes.html#str.replace" title="str.replace"><code class="xref py py-meth docutils literal notranslate"><span class="pre">replace()</span></code></a> method.  Note that
 | 
						||
<code class="xref py py-meth docutils literal notranslate"><span class="pre">replace()</span></code> will also replace <code class="docutils literal notranslate"><span class="pre">word</span></code> inside words, turning <code class="docutils literal notranslate"><span class="pre">swordfish</span></code>
 | 
						||
into <code class="docutils literal notranslate"><span class="pre">sdeedfish</span></code>, but the  naive RE <code class="docutils literal notranslate"><span class="pre">word</span></code> would have done that, too.  (To
 | 
						||
avoid performing the substitution on parts of words, the pattern would have to
 | 
						||
be <code class="docutils literal notranslate"><span class="pre">\bword\b</span></code>, in order to require that <code class="docutils literal notranslate"><span class="pre">word</span></code> have a word boundary on
 | 
						||
either side.  This takes the job beyond  <code class="xref py py-meth docutils literal notranslate"><span class="pre">replace()</span></code>’s abilities.)</p>
 | 
						||
<p>Another common task is deleting every occurrence of a single character from a
 | 
						||
string or replacing it with another single character.  You might do this with
 | 
						||
something like <code class="docutils literal notranslate"><span class="pre">re.sub('\n',</span> <span class="pre">'</span> <span class="pre">',</span> <span class="pre">S)</span></code>, but <a class="reference internal" href="../library/stdtypes.html#str.translate" title="str.translate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">translate()</span></code></a> is capable of
 | 
						||
doing both tasks and will be faster than any regular expression operation can
 | 
						||
be.</p>
 | 
						||
<p>In short, before turning to the <a class="reference internal" href="../library/re.html#module-re" title="re: Regular expression operations."><code class="xref py py-mod docutils literal notranslate"><span class="pre">re</span></code></a> module, consider whether your problem
 | 
						||
can be solved with a faster and simpler string method.</p>
 | 
						||
</section>
 | 
						||
<section id="match-versus-search">
 | 
						||
<h3>match() versus search()<a class="headerlink" href="#match-versus-search" title="Link to this heading">¶</a></h3>
 | 
						||
<p>The <a class="reference internal" href="../library/re.html#re.match" title="re.match"><code class="xref py py-func docutils literal notranslate"><span class="pre">match()</span></code></a> function only checks if the RE matches at the beginning of the
 | 
						||
string while <a class="reference internal" href="../library/re.html#re.search" title="re.search"><code class="xref py py-func docutils literal notranslate"><span class="pre">search()</span></code></a> will scan forward through the string for a match.
 | 
						||
It’s important to keep this distinction in mind.  Remember,  <code class="xref py py-func docutils literal notranslate"><span class="pre">match()</span></code> will
 | 
						||
only report a successful match which will start at 0; if the match wouldn’t
 | 
						||
start at zero,  <code class="xref py py-func docutils literal notranslate"><span class="pre">match()</span></code> will <em>not</em> report it.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'super'</span><span class="p">,</span> <span class="s1">'superstition'</span><span class="p">)</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="go">(0, 5)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'super'</span><span class="p">,</span> <span class="s1">'insuperable'</span><span class="p">))</span>
 | 
						||
<span class="go">None</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>On the other hand, <a class="reference internal" href="../library/re.html#re.search" title="re.search"><code class="xref py py-func docutils literal notranslate"><span class="pre">search()</span></code></a> will scan forward through the string,
 | 
						||
reporting the first match it finds.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'super'</span><span class="p">,</span> <span class="s1">'superstition'</span><span class="p">)</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="go">(0, 5)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">'super'</span><span class="p">,</span> <span class="s1">'insuperable'</span><span class="p">)</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="go">(2, 7)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>Sometimes you’ll be tempted to keep using <a class="reference internal" href="../library/re.html#re.match" title="re.match"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.match()</span></code></a>, and just add <code class="docutils literal notranslate"><span class="pre">.*</span></code>
 | 
						||
to the front of your RE.  Resist this temptation and use <a class="reference internal" href="../library/re.html#re.search" title="re.search"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.search()</span></code></a>
 | 
						||
instead.  The regular expression compiler does some analysis of REs in order to
 | 
						||
speed up the process of looking for a match.  One such analysis figures out what
 | 
						||
the first character of a match must be; for example, a pattern starting with
 | 
						||
<code class="docutils literal notranslate"><span class="pre">Crow</span></code> must match starting with a <code class="docutils literal notranslate"><span class="pre">'C'</span></code>.  The analysis lets the engine
 | 
						||
quickly scan through the string looking for the starting character, only trying
 | 
						||
the full match if a <code class="docutils literal notranslate"><span class="pre">'C'</span></code> is found.</p>
 | 
						||
<p>Adding <code class="docutils literal notranslate"><span class="pre">.*</span></code> defeats this optimization, requiring scanning to the end of the
 | 
						||
string and then backtracking to find a match for the rest of the RE.  Use
 | 
						||
<a class="reference internal" href="../library/re.html#re.search" title="re.search"><code class="xref py py-func docutils literal notranslate"><span class="pre">re.search()</span></code></a> instead.</p>
 | 
						||
</section>
 | 
						||
<section id="greedy-versus-non-greedy">
 | 
						||
<h3>Greedy versus Non-Greedy<a class="headerlink" href="#greedy-versus-non-greedy" title="Link to this heading">¶</a></h3>
 | 
						||
<p>When repeating a regular expression, as in <code class="docutils literal notranslate"><span class="pre">a*</span></code>, the resulting action is to
 | 
						||
consume as much of the pattern as possible.  This fact often bites you when
 | 
						||
you’re trying to match a pair of balanced delimiters, such as the angle brackets
 | 
						||
surrounding an HTML tag.  The naive pattern for matching a single HTML tag
 | 
						||
doesn’t work because of the greedy nature of <code class="docutils literal notranslate"><span class="pre">.*</span></code>.</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">s</span> <span class="o">=</span> <span class="s1">'<html><head><title>Title</title>'</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
 | 
						||
<span class="go">32</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'<.*>'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span><span class="o">.</span><span class="n">span</span><span class="p">())</span>
 | 
						||
<span class="go">(0, 32)</span>
 | 
						||
<span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'<.*>'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">())</span>
 | 
						||
<span class="go"><html><head><title>Title</title></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>The RE matches the <code class="docutils literal notranslate"><span class="pre">'<'</span></code> in <code class="docutils literal notranslate"><span class="pre">'<html>'</span></code>, and the <code class="docutils literal notranslate"><span class="pre">.*</span></code> consumes the rest of
 | 
						||
the string.  There’s still more left in the RE, though, and the <code class="docutils literal notranslate"><span class="pre">></span></code> can’t
 | 
						||
match at the end of the string, so the regular expression engine has to
 | 
						||
backtrack character by character until it finds a match for the <code class="docutils literal notranslate"><span class="pre">></span></code>.   The
 | 
						||
final match extends from the <code class="docutils literal notranslate"><span class="pre">'<'</span></code> in <code class="docutils literal notranslate"><span class="pre">'<html>'</span></code> to the <code class="docutils literal notranslate"><span class="pre">'>'</span></code> in
 | 
						||
<code class="docutils literal notranslate"><span class="pre">'</title>'</span></code>, which isn’t what you want.</p>
 | 
						||
<p>In this case, the solution is to use the non-greedy quantifiers <code class="docutils literal notranslate"><span class="pre">*?</span></code>, <code class="docutils literal notranslate"><span class="pre">+?</span></code>,
 | 
						||
<code class="docutils literal notranslate"><span class="pre">??</span></code>, or <code class="docutils literal notranslate"><span class="pre">{m,n}?</span></code>, which match as <em>little</em> text as possible.  In the above
 | 
						||
example, the <code class="docutils literal notranslate"><span class="pre">'>'</span></code> is tried immediately after the first <code class="docutils literal notranslate"><span class="pre">'<'</span></code> matches, and
 | 
						||
when it fails, the engine advances a character at a time, retrying the <code class="docutils literal notranslate"><span class="pre">'>'</span></code>
 | 
						||
at every step.  This produces just the right result:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">'<.*?>'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">())</span>
 | 
						||
<span class="go"><html></span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>(Note that parsing HTML or XML with regular expressions is painful.
 | 
						||
Quick-and-dirty patterns will handle common cases, but HTML and XML have special
 | 
						||
cases that will break the obvious regular expression; by the time you’ve written
 | 
						||
a regular expression that handles all of the possible cases, the patterns will
 | 
						||
be <em>very</em> complicated.  Use an HTML or XML parser module for such tasks.)</p>
 | 
						||
</section>
 | 
						||
<section id="using-re-verbose">
 | 
						||
<h3>Using re.VERBOSE<a class="headerlink" href="#using-re-verbose" title="Link to this heading">¶</a></h3>
 | 
						||
<p>By now you’ve probably noticed that regular expressions are a very compact
 | 
						||
notation, but they’re not terribly readable.  REs of moderate complexity can
 | 
						||
become lengthy collections of backslashes, parentheses, and metacharacters,
 | 
						||
making them difficult to read and understand.</p>
 | 
						||
<p>For such REs, specifying the <a class="reference internal" href="../library/re.html#re.VERBOSE" title="re.VERBOSE"><code class="xref py py-const docutils literal notranslate"><span class="pre">re.VERBOSE</span></code></a> flag when compiling the regular
 | 
						||
expression can be helpful, because it allows you to format the regular
 | 
						||
expression more clearly.</p>
 | 
						||
<p>The <code class="docutils literal notranslate"><span class="pre">re.VERBOSE</span></code> flag has several effects.  Whitespace in the regular
 | 
						||
expression that <em>isn’t</em> inside a character class is ignored.  This means that an
 | 
						||
expression such as <code class="docutils literal notranslate"><span class="pre">dog</span> <span class="pre">|</span> <span class="pre">cat</span></code> is equivalent to the less readable <code class="docutils literal notranslate"><span class="pre">dog|cat</span></code>,
 | 
						||
but <code class="docutils literal notranslate"><span class="pre">[a</span> <span class="pre">b]</span></code> will still match the characters <code class="docutils literal notranslate"><span class="pre">'a'</span></code>, <code class="docutils literal notranslate"><span class="pre">'b'</span></code>, or a space.  In
 | 
						||
addition, you can also put comments inside a RE; comments extend from a <code class="docutils literal notranslate"><span class="pre">#</span></code>
 | 
						||
character to the next newline.  When used with triple-quoted strings, this
 | 
						||
enables REs to be formatted more neatly:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">pat</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"""</span>
 | 
						||
<span class="s2"> \s*                 # Skip leading whitespace</span>
 | 
						||
<span class="s2"> (?P<header>[^:]+)   # Header name</span>
 | 
						||
<span class="s2"> \s* :               # Whitespace, and a colon</span>
 | 
						||
<span class="s2"> (?P<value>.*?)      # The header's value -- *? used to</span>
 | 
						||
<span class="s2">                     # lose the following trailing whitespace</span>
 | 
						||
<span class="s2"> \s*$                # Trailing whitespace to end-of-line</span>
 | 
						||
<span class="s2">"""</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">VERBOSE</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
<p>This is far more readable than:</p>
 | 
						||
<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="n">pat</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\s*(?P<header>[^:]+)\s*:(?P<value>.*?)\s*$"</span><span class="p">)</span>
 | 
						||
</pre></div>
 | 
						||
</div>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
<section id="feedback">
 | 
						||
<h2>Feedback<a class="headerlink" href="#feedback" title="Link to this heading">¶</a></h2>
 | 
						||
<p>Regular expressions are a complicated topic.  Did this document help you
 | 
						||
understand them?  Were there parts that were unclear, or Problems you
 | 
						||
encountered that weren’t covered here?  If so, please send suggestions for
 | 
						||
improvements to the author.</p>
 | 
						||
<p>The most complete book on regular expressions is almost certainly Jeffrey
 | 
						||
Friedl’s Mastering Regular Expressions, published by O’Reilly.  Unfortunately,
 | 
						||
it exclusively concentrates on Perl and Java’s flavours of regular expressions,
 | 
						||
and doesn’t contain any Python material at all, so it won’t be useful as a
 | 
						||
reference for programming in Python.  (The first edition covered Python’s
 | 
						||
now-removed <code class="xref py py-mod docutils literal notranslate"><span class="pre">regex</span></code> module, which won’t help you much.)  Consider checking
 | 
						||
it out from your library.</p>
 | 
						||
</section>
 | 
						||
</section>
 | 
						||
 | 
						||
 | 
						||
            <div class="clearer"></div>
 | 
						||
          </div>
 | 
						||
        </div>
 | 
						||
      </div>
 | 
						||
      <div class="sphinxsidebar" role="navigation" aria-label="Main">
 | 
						||
        <div class="sphinxsidebarwrapper">
 | 
						||
  <div>
 | 
						||
    <h3><a href="../contents.html">Table of Contents</a></h3>
 | 
						||
    <ul>
 | 
						||
<li><a class="reference internal" href="#">Regular Expression HOWTO</a><ul>
 | 
						||
<li><a class="reference internal" href="#introduction">Introduction</a></li>
 | 
						||
<li><a class="reference internal" href="#simple-patterns">Simple Patterns</a><ul>
 | 
						||
<li><a class="reference internal" href="#matching-characters">Matching Characters</a></li>
 | 
						||
<li><a class="reference internal" href="#repeating-things">Repeating Things</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#using-regular-expressions">Using Regular Expressions</a><ul>
 | 
						||
<li><a class="reference internal" href="#compiling-regular-expressions">Compiling Regular Expressions</a></li>
 | 
						||
<li><a class="reference internal" href="#the-backslash-plague">The Backslash Plague</a></li>
 | 
						||
<li><a class="reference internal" href="#performing-matches">Performing Matches</a></li>
 | 
						||
<li><a class="reference internal" href="#module-level-functions">Module-Level Functions</a></li>
 | 
						||
<li><a class="reference internal" href="#compilation-flags">Compilation Flags</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#more-pattern-power">More Pattern Power</a><ul>
 | 
						||
<li><a class="reference internal" href="#more-metacharacters">More Metacharacters</a></li>
 | 
						||
<li><a class="reference internal" href="#grouping">Grouping</a></li>
 | 
						||
<li><a class="reference internal" href="#non-capturing-and-named-groups">Non-capturing and Named Groups</a></li>
 | 
						||
<li><a class="reference internal" href="#lookahead-assertions">Lookahead Assertions</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#modifying-strings">Modifying Strings</a><ul>
 | 
						||
<li><a class="reference internal" href="#splitting-strings">Splitting Strings</a></li>
 | 
						||
<li><a class="reference internal" href="#search-and-replace">Search and Replace</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#common-problems">Common Problems</a><ul>
 | 
						||
<li><a class="reference internal" href="#use-string-methods">Use String Methods</a></li>
 | 
						||
<li><a class="reference internal" href="#match-versus-search">match() versus search()</a></li>
 | 
						||
<li><a class="reference internal" href="#greedy-versus-non-greedy">Greedy versus Non-Greedy</a></li>
 | 
						||
<li><a class="reference internal" href="#using-re-verbose">Using re.VERBOSE</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
<li><a class="reference internal" href="#feedback">Feedback</a></li>
 | 
						||
</ul>
 | 
						||
</li>
 | 
						||
</ul>
 | 
						||
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Previous topic</h4>
 | 
						||
    <p class="topless"><a href="logging-cookbook.html"
 | 
						||
                          title="previous chapter">Logging Cookbook</a></p>
 | 
						||
  </div>
 | 
						||
  <div>
 | 
						||
    <h4>Next topic</h4>
 | 
						||
    <p class="topless"><a href="sockets.html"
 | 
						||
                          title="next chapter">Socket Programming HOWTO</a></p>
 | 
						||
  </div>
 | 
						||
  <div role="note" aria-label="source link">
 | 
						||
    <h3>This Page</h3>
 | 
						||
    <ul class="this-page-menu">
 | 
						||
      <li><a href="../bugs.html">Report a Bug</a></li>
 | 
						||
      <li>
 | 
						||
        <a href="https://github.com/python/cpython/blob/main/Doc/howto/regex.rst"
 | 
						||
            rel="nofollow">Show Source
 | 
						||
        </a>
 | 
						||
      </li>
 | 
						||
    </ul>
 | 
						||
  </div>
 | 
						||
        </div>
 | 
						||
<div id="sidebarbutton" title="Collapse sidebar">
 | 
						||
<span>«</span>
 | 
						||
</div>
 | 
						||
 | 
						||
      </div>
 | 
						||
      <div class="clearer"></div>
 | 
						||
    </div>  
 | 
						||
    <div class="related" role="navigation" aria-label="Related">
 | 
						||
      <h3>Navigation</h3>
 | 
						||
      <ul>
 | 
						||
        <li class="right" style="margin-right: 10px">
 | 
						||
          <a href="../genindex.html" title="General Index"
 | 
						||
             >index</a></li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="../py-modindex.html" title="Python Module Index"
 | 
						||
             >modules</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="sockets.html" title="Socket Programming HOWTO"
 | 
						||
             >next</a> |</li>
 | 
						||
        <li class="right" >
 | 
						||
          <a href="logging-cookbook.html" title="Logging Cookbook"
 | 
						||
             >previous</a> |</li>
 | 
						||
 | 
						||
          <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | 
						||
          <li><a href="https://www.python.org/">Python</a> »</li>
 | 
						||
          <li class="switchers">
 | 
						||
            <div class="language_switcher_placeholder"></div>
 | 
						||
            <div class="version_switcher_placeholder"></div>
 | 
						||
          </li>
 | 
						||
          <li>
 | 
						||
              
 | 
						||
          </li>
 | 
						||
    <li id="cpython-language-and-version">
 | 
						||
      <a href="../index.html">3.13.3 Documentation</a> »
 | 
						||
    </li>
 | 
						||
 | 
						||
          <li class="nav-item nav-item-1"><a href="index.html" >Python HOWTOs</a> »</li>
 | 
						||
        <li class="nav-item nav-item-this"><a href="">Regular Expression HOWTO</a></li>
 | 
						||
                <li class="right">
 | 
						||
                    
 | 
						||
 | 
						||
    <div class="inline-search" role="search">
 | 
						||
        <form class="inline-search" action="../search.html" method="get">
 | 
						||
          <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | 
						||
          <input type="submit" value="Go" />
 | 
						||
        </form>
 | 
						||
    </div>
 | 
						||
                     |
 | 
						||
                </li>
 | 
						||
            <li class="right">
 | 
						||
<label class="theme-selector-label">
 | 
						||
    Theme
 | 
						||
    <select class="theme-selector" oninput="activateTheme(this.value)">
 | 
						||
        <option value="auto" selected>Auto</option>
 | 
						||
        <option value="light">Light</option>
 | 
						||
        <option value="dark">Dark</option>
 | 
						||
    </select>
 | 
						||
</label> |</li>
 | 
						||
            
 | 
						||
      </ul>
 | 
						||
    </div>  
 | 
						||
    <div class="footer">
 | 
						||
    © 
 | 
						||
      <a href="../copyright.html">
 | 
						||
    
 | 
						||
    Copyright
 | 
						||
    
 | 
						||
      </a>
 | 
						||
     2001-2025, Python Software Foundation.
 | 
						||
    <br />
 | 
						||
    This page is licensed under the Python Software Foundation License Version 2.
 | 
						||
    <br />
 | 
						||
    Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License.
 | 
						||
    <br />
 | 
						||
    
 | 
						||
      See <a href="/license.html">History and License</a> for more information.<br />
 | 
						||
    
 | 
						||
    
 | 
						||
    <br />
 | 
						||
 | 
						||
    The Python Software Foundation is a non-profit corporation.
 | 
						||
<a href="https://www.python.org/psf/donations/">Please donate.</a>
 | 
						||
<br />
 | 
						||
    <br />
 | 
						||
      Last updated on Apr 08, 2025 (14:33 UTC).
 | 
						||
    
 | 
						||
      <a href="/bugs.html">Found a bug</a>?
 | 
						||
    
 | 
						||
    <br />
 | 
						||
 | 
						||
    Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
 | 
						||
    </div>
 | 
						||
 | 
						||
  </body>
 | 
						||
</html> |