mirror of
				https://github.com/bunny-lab-io/Borealis.git
				synced 2025-10-28 08:21:57 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			2118 lines
		
	
	
		
			161 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			2118 lines
		
	
	
		
			161 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | ||
| 
 | ||
| <html lang="en" data-content_root="../">
 | ||
|   <head>
 | ||
|     <meta charset="utf-8" />
 | ||
|     <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 | ||
| <meta property="og:title" content="codecs — Codec registry and base classes" />
 | ||
| <meta property="og:type" content="website" />
 | ||
| <meta property="og:url" content="https://docs.python.org/3/library/codecs.html" />
 | ||
| <meta property="og:site_name" content="Python documentation" />
 | ||
| <meta property="og:description" content="Source code: Lib/codecs.py This module defines base classes for standard Python codecs (encoders and decoders) and provides access to the internal Python codec registry, which manages the codec and..." />
 | ||
| <meta property="og:image" content="https://docs.python.org/3/_static/og-image.png" />
 | ||
| <meta property="og:image:alt" content="Python documentation" />
 | ||
| <meta name="description" content="Source code: Lib/codecs.py This module defines base classes for standard Python codecs (encoders and decoders) and provides access to the internal Python codec registry, which manages the codec and..." />
 | ||
| <meta property="og:image:width" content="200">
 | ||
| <meta property="og:image:height" content="200">
 | ||
| <meta name="theme-color" content="#3776ab">
 | ||
| 
 | ||
|     <title>codecs — Codec registry and base classes — Python 3.13.3 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
 | ||
|     
 | ||
|     <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
 | ||
|     <link rel="stylesheet" type="text/css" href="../_static/pydoctheme.css?v=23252803" />
 | ||
|     <link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="../_static/pygments_dark.css?v=5349f25f" />
 | ||
|     
 | ||
|     <script src="../_static/documentation_options.js?v=5d57ca2d"></script>
 | ||
|     <script src="../_static/doctools.js?v=9bcbadda"></script>
 | ||
|     <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 | ||
|     
 | ||
|     <script src="../_static/sidebar.js"></script>
 | ||
|     
 | ||
|     <link rel="search" type="application/opensearchdescription+xml"
 | ||
|           title="Search within Python 3.13.3 documentation"
 | ||
|           href="../_static/opensearch.xml"/>
 | ||
|     <link rel="author" title="About these documents" href="../about.html" />
 | ||
|     <link rel="index" title="Index" href="../genindex.html" />
 | ||
|     <link rel="search" title="Search" href="../search.html" />
 | ||
|     <link rel="copyright" title="Copyright" href="../copyright.html" />
 | ||
|     <link rel="next" title="Data Types" href="datatypes.html" />
 | ||
|     <link rel="prev" title="struct — Interpret bytes as packed binary data" href="struct.html" />
 | ||
|     
 | ||
|     <link rel="canonical" href="https://docs.python.org/3/library/codecs.html">
 | ||
|     
 | ||
|       
 | ||
|     
 | ||
| 
 | ||
|     
 | ||
|     <style>
 | ||
|       @media only screen {
 | ||
|         table.full-width-table {
 | ||
|             width: 100%;
 | ||
|         }
 | ||
|       }
 | ||
|     </style>
 | ||
| <link rel="stylesheet" href="../_static/pydoctheme_dark.css" media="(prefers-color-scheme: dark)" id="pydoctheme_dark_css">
 | ||
|     <link rel="shortcut icon" type="image/png" href="../_static/py.svg" />
 | ||
|             <script type="text/javascript" src="../_static/copybutton.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/menu.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/search-focus.js"></script>
 | ||
|             <script type="text/javascript" src="../_static/themetoggle.js"></script> 
 | ||
|             <script type="text/javascript" src="../_static/rtd_switcher.js"></script>
 | ||
|             <meta name="readthedocs-addons-api-version" content="1">
 | ||
| 
 | ||
|   </head>
 | ||
| <body>
 | ||
| <div class="mobile-nav">
 | ||
|     <input type="checkbox" id="menuToggler" class="toggler__input" aria-controls="navigation"
 | ||
|            aria-pressed="false" aria-expanded="false" role="button" aria-label="Menu" />
 | ||
|     <nav class="nav-content" role="navigation">
 | ||
|         <label for="menuToggler" class="toggler__label">
 | ||
|             <span></span>
 | ||
|         </label>
 | ||
|         <span class="nav-items-wrapper">
 | ||
|             <a href="https://www.python.org/" class="nav-logo">
 | ||
|                 <img src="../_static/py.svg" alt="Python logo"/>
 | ||
|             </a>
 | ||
|             <span class="version_switcher_placeholder"></span>
 | ||
|             <form role="search" class="search" action="../search.html" method="get">
 | ||
|                 <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" class="search-icon">
 | ||
|                     <path fill-rule="nonzero" fill="currentColor" d="M15.5 14h-.79l-.28-.27a6.5 6.5 0 001.48-5.34c-.47-2.78-2.79-5-5.59-5.34a6.505 6.505 0 00-7.27 7.27c.34 2.8 2.56 5.12 5.34 5.59a6.5 6.5 0 005.34-1.48l.27.28v.79l4.25 4.25c.41.41 1.08.41 1.49 0 .41-.41.41-1.08 0-1.49L15.5 14zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"></path>
 | ||
|                 </svg>
 | ||
|                 <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" />
 | ||
|                 <input type="submit" value="Go"/>
 | ||
|             </form>
 | ||
|         </span>
 | ||
|     </nav>
 | ||
|     <div class="menu-wrapper">
 | ||
|         <nav class="menu" role="navigation" aria-label="main navigation">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label>
 | ||
|   <div>
 | ||
|     <h3><a href="../contents.html">Table of Contents</a></h3>
 | ||
|     <ul>
 | ||
| <li><a class="reference internal" href="#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code> — Codec registry and base classes</a><ul>
 | ||
| <li><a class="reference internal" href="#codec-base-classes">Codec Base Classes</a><ul>
 | ||
| <li><a class="reference internal" href="#error-handlers">Error Handlers</a></li>
 | ||
| <li><a class="reference internal" href="#stateless-encoding-and-decoding">Stateless Encoding and Decoding</a></li>
 | ||
| <li><a class="reference internal" href="#incremental-encoding-and-decoding">Incremental Encoding and Decoding</a><ul>
 | ||
| <li><a class="reference internal" href="#incrementalencoder-objects">IncrementalEncoder Objects</a></li>
 | ||
| <li><a class="reference internal" href="#incrementaldecoder-objects">IncrementalDecoder Objects</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#stream-encoding-and-decoding">Stream Encoding and Decoding</a><ul>
 | ||
| <li><a class="reference internal" href="#streamwriter-objects">StreamWriter Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamreader-objects">StreamReader Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamreaderwriter-objects">StreamReaderWriter Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamrecoder-objects">StreamRecoder Objects</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#encodings-and-unicode">Encodings and Unicode</a></li>
 | ||
| <li><a class="reference internal" href="#standard-encodings">Standard Encodings</a></li>
 | ||
| <li><a class="reference internal" href="#python-specific-encodings">Python Specific Encodings</a><ul>
 | ||
| <li><a class="reference internal" href="#text-encodings">Text Encodings</a></li>
 | ||
| <li><a class="reference internal" href="#binary-transforms">Binary Transforms</a></li>
 | ||
| <li><a class="reference internal" href="#text-transforms">Text Transforms</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#module-encodings.idna"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.idna</span></code> — Internationalized Domain Names in Applications</a></li>
 | ||
| <li><a class="reference internal" href="#module-encodings.mbcs"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.mbcs</span></code> — Windows ANSI codepage</a></li>
 | ||
| <li><a class="reference internal" href="#module-encodings.utf_8_sig"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.utf_8_sig</span></code> — UTF-8 codec with BOM signature</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| 
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Previous topic</h4>
 | ||
|     <p class="topless"><a href="struct.html"
 | ||
|                           title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">struct</span></code> — Interpret bytes as packed binary data</a></p>
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Next topic</h4>
 | ||
|     <p class="topless"><a href="datatypes.html"
 | ||
|                           title="next chapter">Data Types</a></p>
 | ||
|   </div>
 | ||
|   <div role="note" aria-label="source link">
 | ||
|     <h3>This Page</h3>
 | ||
|     <ul class="this-page-menu">
 | ||
|       <li><a href="../bugs.html">Report a Bug</a></li>
 | ||
|       <li>
 | ||
|         <a href="https://github.com/python/cpython/blob/main/Doc/library/codecs.rst"
 | ||
|             rel="nofollow">Show Source
 | ||
|         </a>
 | ||
|       </li>
 | ||
|     </ul>
 | ||
|   </div>
 | ||
|         </nav>
 | ||
|     </div>
 | ||
| </div>
 | ||
| 
 | ||
|   
 | ||
|     <div class="related" role="navigation" aria-label="Related">
 | ||
|       <h3>Navigation</h3>
 | ||
|       <ul>
 | ||
|         <li class="right" style="margin-right: 10px">
 | ||
|           <a href="../genindex.html" title="General Index"
 | ||
|              accesskey="I">index</a></li>
 | ||
|         <li class="right" >
 | ||
|           <a href="../py-modindex.html" title="Python Module Index"
 | ||
|              >modules</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="datatypes.html" title="Data Types"
 | ||
|              accesskey="N">next</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="struct.html" title="struct — Interpret bytes as packed binary data"
 | ||
|              accesskey="P">previous</a> |</li>
 | ||
| 
 | ||
|           <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | ||
|           <li><a href="https://www.python.org/">Python</a> »</li>
 | ||
|           <li class="switchers">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             <div class="version_switcher_placeholder"></div>
 | ||
|           </li>
 | ||
|           <li>
 | ||
|               
 | ||
|           </li>
 | ||
|     <li id="cpython-language-and-version">
 | ||
|       <a href="../index.html">3.13.3 Documentation</a> »
 | ||
|     </li>
 | ||
| 
 | ||
|           <li class="nav-item nav-item-1"><a href="index.html" >The Python Standard Library</a> »</li>
 | ||
|           <li class="nav-item nav-item-2"><a href="binary.html" accesskey="U">Binary Data Services</a> »</li>
 | ||
|         <li class="nav-item nav-item-this"><a href=""><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code> — Codec registry and base classes</a></li>
 | ||
|                 <li class="right">
 | ||
|                     
 | ||
| 
 | ||
|     <div class="inline-search" role="search">
 | ||
|         <form class="inline-search" action="../search.html" method="get">
 | ||
|           <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | ||
|           <input type="submit" value="Go" />
 | ||
|         </form>
 | ||
|     </div>
 | ||
|                      |
 | ||
|                 </li>
 | ||
|             <li class="right">
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label> |</li>
 | ||
|             
 | ||
|       </ul>
 | ||
|     </div>    
 | ||
| 
 | ||
|     <div class="document">
 | ||
|       <div class="documentwrapper">
 | ||
|         <div class="bodywrapper">
 | ||
|           <div class="body" role="main">
 | ||
|             
 | ||
|   <section id="module-codecs">
 | ||
| <span id="codecs-codec-registry-and-base-classes"></span><h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code> — Codec registry and base classes<a class="headerlink" href="#module-codecs" title="Link to this heading">¶</a></h1>
 | ||
| <p><strong>Source code:</strong> <a class="extlink-source reference external" href="https://github.com/python/cpython/tree/3.13/Lib/codecs.py">Lib/codecs.py</a></p>
 | ||
| <hr class="docutils" id="index-0" />
 | ||
| <p>This module defines base classes for standard Python codecs (encoders and
 | ||
| decoders) and provides access to the internal Python codec registry, which
 | ||
| manages the codec and error handling lookup process. Most standard codecs
 | ||
| are <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encodings</span></a>, which encode text to bytes (and
 | ||
| decode bytes to text), but there are also codecs provided that encode text to
 | ||
| text, and bytes to bytes. Custom codecs may encode and decode between arbitrary
 | ||
| types, but some module features are restricted to be used specifically with
 | ||
| <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encodings</span></a> or with codecs that encode to
 | ||
| <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a>.</p>
 | ||
| <p>The module defines the following functions for encoding and decoding with
 | ||
| any codec:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.encode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">encode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.encode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Encodes <em>obj</em> using the codec registered for <em>encoding</em>.</p>
 | ||
| <p><em>Errors</em> may be given to set the desired error handling scheme. The
 | ||
| default error handler is <code class="docutils literal notranslate"><span class="pre">'strict'</span></code> meaning that encoding errors raise
 | ||
| <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> (or a more codec specific subclass, such as
 | ||
| <a class="reference internal" href="exceptions.html#UnicodeEncodeError" title="UnicodeEncodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeEncodeError</span></code></a>). Refer to <a class="reference internal" href="#codec-base-classes"><span class="std std-ref">Codec Base Classes</span></a> for more
 | ||
| information on codec error handling.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.decode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">decode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'utf-8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.decode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Decodes <em>obj</em> using the codec registered for <em>encoding</em>.</p>
 | ||
| <p><em>Errors</em> may be given to set the desired error handling scheme. The
 | ||
| default error handler is <code class="docutils literal notranslate"><span class="pre">'strict'</span></code> meaning that decoding errors raise
 | ||
| <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> (or a more codec specific subclass, such as
 | ||
| <a class="reference internal" href="exceptions.html#UnicodeDecodeError" title="UnicodeDecodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeDecodeError</span></code></a>). Refer to <a class="reference internal" href="#codec-base-classes"><span class="std std-ref">Codec Base Classes</span></a> for more
 | ||
| information on codec error handling.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>The full details for each codec can also be looked up directly:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.lookup">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">lookup</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.lookup" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Looks up the codec info in the Python codec registry and returns a
 | ||
| <a class="reference internal" href="#codecs.CodecInfo" title="codecs.CodecInfo"><code class="xref py py-class docutils literal notranslate"><span class="pre">CodecInfo</span></code></a> object as defined below.</p>
 | ||
| <p>Encodings are first looked up in the registry’s cache. If not found, the list of
 | ||
| registered search functions is scanned. If no <a class="reference internal" href="#codecs.CodecInfo" title="codecs.CodecInfo"><code class="xref py py-class docutils literal notranslate"><span class="pre">CodecInfo</span></code></a> object is
 | ||
| found, a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> is raised. Otherwise, the <a class="reference internal" href="#codecs.CodecInfo" title="codecs.CodecInfo"><code class="xref py py-class docutils literal notranslate"><span class="pre">CodecInfo</span></code></a> object
 | ||
| is stored in the cache and returned to the caller.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">CodecInfo</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">streamreader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">streamwriter</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">incrementalencoder</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">incrementaldecoder</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.CodecInfo" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Codec details when looking up the codec registry. The constructor
 | ||
| arguments are stored in attributes of the same name:</p>
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.name">
 | ||
| <span class="sig-name descname"><span class="pre">name</span></span><a class="headerlink" href="#codecs.CodecInfo.name" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>The name of the encoding.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.encode">
 | ||
| <span class="sig-name descname"><span class="pre">encode</span></span><a class="headerlink" href="#codecs.CodecInfo.encode" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.decode">
 | ||
| <span class="sig-name descname"><span class="pre">decode</span></span><a class="headerlink" href="#codecs.CodecInfo.decode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>The stateless encoding and decoding functions. These must be
 | ||
| functions or methods which have the same interface as
 | ||
| the <a class="reference internal" href="#codecs.Codec.encode" title="codecs.Codec.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">encode()</span></code></a> and <a class="reference internal" href="#codecs.Codec.decode" title="codecs.Codec.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> methods of Codec
 | ||
| instances (see <a class="reference internal" href="#codec-objects"><span class="std std-ref">Codec Interface</span></a>).
 | ||
| The functions or methods are expected to work in a stateless mode.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.incrementalencoder">
 | ||
| <span class="sig-name descname"><span class="pre">incrementalencoder</span></span><a class="headerlink" href="#codecs.CodecInfo.incrementalencoder" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.incrementaldecoder">
 | ||
| <span class="sig-name descname"><span class="pre">incrementaldecoder</span></span><a class="headerlink" href="#codecs.CodecInfo.incrementaldecoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Incremental encoder and decoder classes or factory functions.
 | ||
| These have to provide the interface defined by the base classes
 | ||
| <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a> and <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a>,
 | ||
| respectively. Incremental codecs can maintain state.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py attribute">
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.streamwriter">
 | ||
| <span class="sig-name descname"><span class="pre">streamwriter</span></span><a class="headerlink" href="#codecs.CodecInfo.streamwriter" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.CodecInfo.streamreader">
 | ||
| <span class="sig-name descname"><span class="pre">streamreader</span></span><a class="headerlink" href="#codecs.CodecInfo.streamreader" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Stream writer and reader classes or factory functions. These have to
 | ||
| provide the interface defined by the base classes
 | ||
| <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> and <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a>, respectively.
 | ||
| Stream codecs can maintain state.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>To simplify access to the various codec components, the module provides
 | ||
| these additional functions which use <a class="reference internal" href="#codecs.lookup" title="codecs.lookup"><code class="xref py py-func docutils literal notranslate"><span class="pre">lookup()</span></code></a> for the codec lookup:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getencoder">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getencoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getencoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its encoder function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getdecoder">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getdecoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getdecoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its decoder function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getincrementalencoder">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getincrementalencoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getincrementalencoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its incremental encoder
 | ||
| class or factory function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found or the codec
 | ||
| doesn’t support an incremental encoder.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getincrementaldecoder">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getincrementaldecoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getincrementaldecoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its incremental decoder
 | ||
| class or factory function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found or the codec
 | ||
| doesn’t support an incremental decoder.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getreader">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getreader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getreader" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a>
 | ||
| class or factory function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.getwriter">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">getwriter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.getwriter" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Look up the codec for the given encoding and return its <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a>
 | ||
| class or factory function.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the encoding cannot be found.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>Custom codecs are made available by registering a suitable codec search
 | ||
| function:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.register">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">register</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">search_function</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.register" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Register a codec search function. Search functions are expected to take one
 | ||
| argument, being the encoding name in all lower case letters with hyphens
 | ||
| and spaces converted to underscores, and return a <a class="reference internal" href="#codecs.CodecInfo" title="codecs.CodecInfo"><code class="xref py py-class docutils literal notranslate"><span class="pre">CodecInfo</span></code></a> object.
 | ||
| In case a search function cannot find a given encoding, it should return
 | ||
| <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.9: </span>Hyphens and spaces are converted to underscore.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.unregister">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">unregister</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">search_function</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.unregister" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Unregister a codec search function and clear the registry’s cache.
 | ||
| If the search function is not registered, do nothing.</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.10.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>While the builtin <a class="reference internal" href="functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> and the associated <a class="reference internal" href="io.html#module-io" title="io: Core tools for working with streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">io</span></code></a> module are the
 | ||
| recommended approach for working with encoded text files, this module
 | ||
| provides additional utility functions and classes that allow the use of a
 | ||
| wider range of codecs when working with binary files:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.open">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">open</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'r'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">buffering</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.open" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Open an encoded file using the given <em>mode</em> and return an instance of
 | ||
| <a class="reference internal" href="#codecs.StreamReaderWriter" title="codecs.StreamReaderWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReaderWriter</span></code></a>, providing transparent encoding/decoding.
 | ||
| The default file mode is <code class="docutils literal notranslate"><span class="pre">'r'</span></code>, meaning to open the file in read mode.</p>
 | ||
| <div class="admonition note">
 | ||
| <p class="admonition-title">Note</p>
 | ||
| <p>If <em>encoding</em> is not <code class="docutils literal notranslate"><span class="pre">None</span></code>, then the
 | ||
| underlying encoded files are always opened in binary mode.
 | ||
| No automatic conversion of <code class="docutils literal notranslate"><span class="pre">'\n'</span></code> is done on reading and writing.
 | ||
| The <em>mode</em> argument may be any binary mode acceptable to the built-in
 | ||
| <a class="reference internal" href="functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> function; the <code class="docutils literal notranslate"><span class="pre">'b'</span></code> is automatically added.</p>
 | ||
| </div>
 | ||
| <p><em>encoding</em> specifies the encoding which is to be used for the file.
 | ||
| Any encoding that encodes to and decodes from bytes is allowed, and
 | ||
| the data types supported by the file methods depend on the codec used.</p>
 | ||
| <p><em>errors</em> may be given to define the error handling. It defaults to <code class="docutils literal notranslate"><span class="pre">'strict'</span></code>
 | ||
| which causes a <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> to be raised in case an encoding error occurs.</p>
 | ||
| <p><em>buffering</em> has the same meaning as for the built-in <a class="reference internal" href="functions.html#open" title="open"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> function.
 | ||
| It defaults to -1 which means that the default buffer size will be used.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.11: </span>The <code class="docutils literal notranslate"><span class="pre">'U'</span></code> mode has been removed.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.EncodedFile">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">EncodedFile</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_encoding</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">file_encoding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.EncodedFile" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return a <a class="reference internal" href="#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> instance, a wrapped version of <em>file</em>
 | ||
| which provides transparent transcoding. The original file is closed
 | ||
| when the wrapped version is closed.</p>
 | ||
| <p>Data written to the wrapped file is decoded according to the given
 | ||
| <em>data_encoding</em> and then written to the original file as bytes using
 | ||
| <em>file_encoding</em>. Bytes read from the original file are decoded
 | ||
| according to <em>file_encoding</em>, and the result is encoded
 | ||
| using <em>data_encoding</em>.</p>
 | ||
| <p>If <em>file_encoding</em> is not given, it defaults to <em>data_encoding</em>.</p>
 | ||
| <p><em>errors</em> may be given to define the error handling. It defaults to
 | ||
| <code class="docutils literal notranslate"><span class="pre">'strict'</span></code>, which causes <a class="reference internal" href="exceptions.html#ValueError" title="ValueError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> to be raised in case an encoding
 | ||
| error occurs.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.iterencode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">iterencode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">iterator</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.iterencode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Uses an incremental encoder to iteratively encode the input provided by
 | ||
| <em>iterator</em>. This function is a <a class="reference internal" href="../glossary.html#term-generator"><span class="xref std std-term">generator</span></a>.
 | ||
| The <em>errors</em> argument (as well as any
 | ||
| other keyword argument) is passed through to the incremental encoder.</p>
 | ||
| <p>This function requires that the codec accept text <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> objects
 | ||
| to encode. Therefore it does not support bytes-to-bytes encoders such as
 | ||
| <code class="docutils literal notranslate"><span class="pre">base64_codec</span></code>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.iterdecode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">iterdecode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">iterator</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoding</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.iterdecode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Uses an incremental decoder to iteratively decode the input provided by
 | ||
| <em>iterator</em>. This function is a <a class="reference internal" href="../glossary.html#term-generator"><span class="xref std std-term">generator</span></a>.
 | ||
| The <em>errors</em> argument (as well as any
 | ||
| other keyword argument) is passed through to the incremental decoder.</p>
 | ||
| <p>This function requires that the codec accept <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a> objects
 | ||
| to decode. Therefore it does not support text-to-text encoders such as
 | ||
| <code class="docutils literal notranslate"><span class="pre">rot_13</span></code>, although <code class="docutils literal notranslate"><span class="pre">rot_13</span></code> may be used equivalently with
 | ||
| <a class="reference internal" href="#codecs.iterencode" title="codecs.iterencode"><code class="xref py py-func docutils literal notranslate"><span class="pre">iterencode()</span></code></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>The module also provides the following constants which are useful for reading
 | ||
| and writing to platform dependent files:</p>
 | ||
| <dl class="py data">
 | ||
| <dt class="sig sig-object py" id="codecs.BOM">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM</span></span><a class="headerlink" href="#codecs.BOM" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_BE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_BE</span></span><a class="headerlink" href="#codecs.BOM_BE" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_LE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_LE</span></span><a class="headerlink" href="#codecs.BOM_LE" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF8">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF8</span></span><a class="headerlink" href="#codecs.BOM_UTF8" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF16">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF16</span></span><a class="headerlink" href="#codecs.BOM_UTF16" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF16_BE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF16_BE</span></span><a class="headerlink" href="#codecs.BOM_UTF16_BE" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF16_LE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF16_LE</span></span><a class="headerlink" href="#codecs.BOM_UTF16_LE" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF32">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF32</span></span><a class="headerlink" href="#codecs.BOM_UTF32" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF32_BE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF32_BE</span></span><a class="headerlink" href="#codecs.BOM_UTF32_BE" title="Link to this definition">¶</a></dt>
 | ||
| <dt class="sig sig-object py" id="codecs.BOM_UTF32_LE">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">BOM_UTF32_LE</span></span><a class="headerlink" href="#codecs.BOM_UTF32_LE" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>These constants define various byte sequences,
 | ||
| being Unicode byte order marks (BOMs) for several encodings. They are
 | ||
| used in UTF-16 and UTF-32 data streams to indicate the byte order used,
 | ||
| and in UTF-8 as a Unicode signature. <a class="reference internal" href="#codecs.BOM_UTF16" title="codecs.BOM_UTF16"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16</span></code></a> is either
 | ||
| <a class="reference internal" href="#codecs.BOM_UTF16_BE" title="codecs.BOM_UTF16_BE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16_BE</span></code></a> or <a class="reference internal" href="#codecs.BOM_UTF16_LE" title="codecs.BOM_UTF16_LE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16_LE</span></code></a> depending on the platform’s
 | ||
| native byte order, <a class="reference internal" href="#codecs.BOM" title="codecs.BOM"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM</span></code></a> is an alias for <a class="reference internal" href="#codecs.BOM_UTF16" title="codecs.BOM_UTF16"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16</span></code></a>,
 | ||
| <a class="reference internal" href="#codecs.BOM_LE" title="codecs.BOM_LE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_LE</span></code></a> for <a class="reference internal" href="#codecs.BOM_UTF16_LE" title="codecs.BOM_UTF16_LE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16_LE</span></code></a> and <a class="reference internal" href="#codecs.BOM_BE" title="codecs.BOM_BE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_BE</span></code></a> for
 | ||
| <a class="reference internal" href="#codecs.BOM_UTF16_BE" title="codecs.BOM_UTF16_BE"><code class="xref py py-const docutils literal notranslate"><span class="pre">BOM_UTF16_BE</span></code></a>. The others represent the BOM in UTF-8 and UTF-32
 | ||
| encodings.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <section id="codec-base-classes">
 | ||
| <span id="id1"></span><h2>Codec Base Classes<a class="headerlink" href="#codec-base-classes" title="Link to this heading">¶</a></h2>
 | ||
| <p>The <a class="reference internal" href="#module-codecs" title="codecs: Encode and decode data and streams."><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code></a> module defines a set of base classes which define the
 | ||
| interfaces for working with codec objects, and can also be used as the basis
 | ||
| for custom codec implementations.</p>
 | ||
| <p>Each codec has to define four interfaces to make it usable as codec in Python:
 | ||
| stateless encoder, stateless decoder, stream reader and stream writer. The
 | ||
| stream reader and writers typically reuse the stateless encoder/decoder to
 | ||
| implement the file protocols. Codec authors also need to define how the
 | ||
| codec will handle encoding and decoding errors.</p>
 | ||
| <section id="error-handlers">
 | ||
| <span id="surrogateescape"></span><span id="id2"></span><h3>Error Handlers<a class="headerlink" href="#error-handlers" title="Link to this heading">¶</a></h3>
 | ||
| <p>To simplify and standardize error handling, codecs may implement different
 | ||
| error handling schemes by accepting the <em>errors</em> string argument:</p>
 | ||
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="s1">'German ß, ♬'</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">encoding</span><span class="o">=</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s1">'backslashreplace'</span><span class="p">)</span>
 | ||
| <span class="go">b'German \\xdf, \\u266c'</span>
 | ||
| <span class="gp">>>> </span><span class="s1">'German ß, ♬'</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">encoding</span><span class="o">=</span><span class="s1">'ascii'</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s1">'xmlcharrefreplace'</span><span class="p">)</span>
 | ||
| <span class="go">b'German &#223;, &#9836;'</span>
 | ||
| </pre></div>
 | ||
| </div>
 | ||
| <p id="index-1">The following error handlers can be used with all Python
 | ||
| <a class="reference internal" href="#standard-encodings"><span class="std std-ref">Standard Encodings</span></a> codecs:</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Value</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">'strict'</span></code></p></td>
 | ||
| <td><p>Raise <a class="reference internal" href="exceptions.html#UnicodeError" title="UnicodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeError</span></code></a> (or a subclass),
 | ||
| this is the default. Implemented in
 | ||
| <a class="reference internal" href="#codecs.strict_errors" title="codecs.strict_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">strict_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">'ignore'</span></code></p></td>
 | ||
| <td><p>Ignore the malformed data and continue without
 | ||
| further notice. Implemented in
 | ||
| <a class="reference internal" href="#codecs.ignore_errors" title="codecs.ignore_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">ignore_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">'replace'</span></code></p></td>
 | ||
| <td><p>Replace with a replacement marker. On
 | ||
| encoding, use <code class="docutils literal notranslate"><span class="pre">?</span></code> (ASCII character). On
 | ||
| decoding, use <code class="docutils literal notranslate"><span class="pre"><EFBFBD></span></code> (U+FFFD, the official
 | ||
| REPLACEMENT CHARACTER). Implemented in
 | ||
| <a class="reference internal" href="#codecs.replace_errors" title="codecs.replace_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">replace_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">'backslashreplace'</span></code></p></td>
 | ||
| <td><p>Replace with backslashed escape sequences.
 | ||
| On encoding, use hexadecimal form of Unicode
 | ||
| code point with formats <code class="samp docutils literal notranslate"><span class="pre">\x</span><em><span class="pre">hh</span></em></code>
 | ||
| <code class="samp docutils literal notranslate"><span class="pre">\u</span><em><span class="pre">xxxx</span></em></code> <code class="samp docutils literal notranslate"><span class="pre">\U</span><em><span class="pre">xxxxxxxx</span></em></code>.
 | ||
| On decoding, use hexadecimal form of byte
 | ||
| value with format <code class="samp docutils literal notranslate"><span class="pre">\x</span><em><span class="pre">hh</span></em></code>.
 | ||
| Implemented in
 | ||
| <a class="reference internal" href="#codecs.backslashreplace_errors" title="codecs.backslashreplace_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">backslashreplace_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">'surrogateescape'</span></code></p></td>
 | ||
| <td><p>On decoding, replace byte with individual
 | ||
| surrogate code ranging from <code class="docutils literal notranslate"><span class="pre">U+DC80</span></code> to
 | ||
| <code class="docutils literal notranslate"><span class="pre">U+DCFF</span></code>. This code will then be turned
 | ||
| back into the same byte when the
 | ||
| <code class="docutils literal notranslate"><span class="pre">'surrogateescape'</span></code> error handler is used
 | ||
| when encoding the data. (See <span class="target" id="index-2"></span><a class="pep reference external" href="https://peps.python.org/pep-0383/"><strong>PEP 383</strong></a> for
 | ||
| more.)</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <p id="index-3">The following error handlers are only applicable to encoding (within
 | ||
| <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encodings</span></a>):</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Value</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">'xmlcharrefreplace'</span></code></p></td>
 | ||
| <td><p>Replace with XML/HTML numeric character
 | ||
| reference, which is a decimal form of Unicode
 | ||
| code point with format <code class="samp docutils literal notranslate"><span class="pre">&#</span><em><span class="pre">num</span></em><span class="pre">;</span></code>.
 | ||
| Implemented in
 | ||
| <a class="reference internal" href="#codecs.xmlcharrefreplace_errors" title="codecs.xmlcharrefreplace_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">xmlcharrefreplace_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">'namereplace'</span></code></p></td>
 | ||
| <td><p>Replace with <code class="docutils literal notranslate"><span class="pre">\N{...}</span></code> escape sequences,
 | ||
| what appears in the braces is the Name
 | ||
| property from Unicode Character Database.
 | ||
| Implemented in <a class="reference internal" href="#codecs.namereplace_errors" title="codecs.namereplace_errors"><code class="xref py py-func docutils literal notranslate"><span class="pre">namereplace_errors()</span></code></a>.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <p id="index-4">In addition, the following error handler is specific to the given codecs:</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Value</p></th>
 | ||
| <th class="head"><p>Codecs</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">'surrogatepass'</span></code></p></td>
 | ||
| <td><p>utf-8, utf-16, utf-32,
 | ||
| utf-16-be, utf-16-le,
 | ||
| utf-32-be, utf-32-le</p></td>
 | ||
| <td><p>Allow encoding and decoding surrogate code
 | ||
| point (<code class="docutils literal notranslate"><span class="pre">U+D800</span></code> - <code class="docutils literal notranslate"><span class="pre">U+DFFF</span></code>) as normal
 | ||
| code point. Otherwise these codecs treat
 | ||
| the presence of surrogate code point in
 | ||
| <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> as an error.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.1: </span>The <code class="docutils literal notranslate"><span class="pre">'surrogateescape'</span></code> and <code class="docutils literal notranslate"><span class="pre">'surrogatepass'</span></code> error handlers.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.4: </span>The <code class="docutils literal notranslate"><span class="pre">'surrogatepass'</span></code> error handler now works with utf-16* and utf-32*
 | ||
| codecs.</p>
 | ||
| </div>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.5: </span>The <code class="docutils literal notranslate"><span class="pre">'namereplace'</span></code> error handler.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.5: </span>The <code class="docutils literal notranslate"><span class="pre">'backslashreplace'</span></code> error handler now works with decoding and
 | ||
| translating.</p>
 | ||
| </div>
 | ||
| <p>The set of allowed values can be extended by registering a new named error
 | ||
| handler:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.register_error">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">register_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_handler</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.register_error" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Register the error handling function <em>error_handler</em> under the name <em>name</em>.
 | ||
| The <em>error_handler</em> argument will be called during encoding and decoding
 | ||
| in case of an error, when <em>name</em> is specified as the errors parameter.</p>
 | ||
| <p>For encoding, <em>error_handler</em> will be called with a <a class="reference internal" href="exceptions.html#UnicodeEncodeError" title="UnicodeEncodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeEncodeError</span></code></a>
 | ||
| instance, which contains information about the location of the error. The
 | ||
| error handler must either raise this or a different exception, or return a
 | ||
| tuple with a replacement for the unencodable part of the input and a position
 | ||
| where encoding should continue. The replacement may be either <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> or
 | ||
| <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a>. If the replacement is bytes, the encoder will simply copy
 | ||
| them into the output buffer. If the replacement is a string, the encoder will
 | ||
| encode the replacement. Encoding continues on original input at the
 | ||
| specified position. Negative position values will be treated as being
 | ||
| relative to the end of the input string. If the resulting position is out of
 | ||
| bound an <a class="reference internal" href="exceptions.html#IndexError" title="IndexError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">IndexError</span></code></a> will be raised.</p>
 | ||
| <p>Decoding and translating works similarly, except <a class="reference internal" href="exceptions.html#UnicodeDecodeError" title="UnicodeDecodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeDecodeError</span></code></a> or
 | ||
| <a class="reference internal" href="exceptions.html#UnicodeTranslateError" title="UnicodeTranslateError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeTranslateError</span></code></a> will be passed to the handler and that the
 | ||
| replacement from the error handler will be put into the output directly.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>Previously registered error handlers (including the standard error handlers)
 | ||
| can be looked up by name:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.lookup_error">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">lookup_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.lookup_error" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the error handler previously registered under the name <em>name</em>.</p>
 | ||
| <p>Raises a <a class="reference internal" href="exceptions.html#LookupError" title="LookupError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">LookupError</span></code></a> in case the handler cannot be found.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>The following standard error handlers are also made available as module level
 | ||
| functions:</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.strict_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">strict_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.strict_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'strict'</span></code> error handling.</p>
 | ||
| <p>Each encoding or decoding error raises a <a class="reference internal" href="exceptions.html#UnicodeError" title="UnicodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeError</span></code></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.ignore_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">ignore_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.ignore_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'ignore'</span></code> error handling.</p>
 | ||
| <p>Malformed data is ignored; encoding or decoding is continued without
 | ||
| further notice.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.replace_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">replace_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.replace_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'replace'</span></code> error handling.</p>
 | ||
| <p>Substitutes <code class="docutils literal notranslate"><span class="pre">?</span></code> (ASCII character) for encoding errors or <code class="docutils literal notranslate"><span class="pre"><EFBFBD></span></code> (U+FFFD,
 | ||
| the official REPLACEMENT CHARACTER) for decoding errors.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.backslashreplace_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">backslashreplace_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.backslashreplace_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'backslashreplace'</span></code> error handling.</p>
 | ||
| <p>Malformed data is replaced by a backslashed escape sequence.
 | ||
| On encoding, use the hexadecimal form of Unicode code point with formats
 | ||
| <code class="samp docutils literal notranslate"><span class="pre">\x</span><em><span class="pre">hh</span></em></code> <code class="samp docutils literal notranslate"><span class="pre">\u</span><em><span class="pre">xxxx</span></em></code> <code class="samp docutils literal notranslate"><span class="pre">\U</span><em><span class="pre">xxxxxxxx</span></em></code>.
 | ||
| On decoding, use the hexadecimal form of
 | ||
| byte value with format <code class="samp docutils literal notranslate"><span class="pre">\x</span><em><span class="pre">hh</span></em></code>.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.5: </span>Works with decoding and translating.</p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.xmlcharrefreplace_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">xmlcharrefreplace_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.xmlcharrefreplace_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'xmlcharrefreplace'</span></code> error handling (for encoding within
 | ||
| <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encoding</span></a> only).</p>
 | ||
| <p>The unencodable character is replaced by an appropriate XML/HTML numeric
 | ||
| character reference, which is a decimal form of Unicode code point with
 | ||
| format <code class="samp docutils literal notranslate"><span class="pre">&#</span><em><span class="pre">num</span></em><span class="pre">;</span></code> .</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="codecs.namereplace_errors">
 | ||
| <span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">namereplace_errors</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">exception</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.namereplace_errors" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Implements the <code class="docutils literal notranslate"><span class="pre">'namereplace'</span></code> error handling (for encoding within
 | ||
| <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encoding</span></a> only).</p>
 | ||
| <p>The unencodable character is replaced by a <code class="docutils literal notranslate"><span class="pre">\N{...}</span></code> escape sequence. The
 | ||
| set of characters that appear in the braces is the Name property from
 | ||
| Unicode Character Database. For example, the German lowercase letter <code class="docutils literal notranslate"><span class="pre">'ß'</span></code>
 | ||
| will be converted to byte sequence <code class="docutils literal notranslate"><span class="pre">\N{LATIN</span> <span class="pre">SMALL</span> <span class="pre">LETTER</span> <span class="pre">SHARP</span> <span class="pre">S}</span></code> .</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.5.</span></p>
 | ||
| </div>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="stateless-encoding-and-decoding">
 | ||
| <span id="codec-objects"></span><h3>Stateless Encoding and Decoding<a class="headerlink" href="#stateless-encoding-and-decoding" title="Link to this heading">¶</a></h3>
 | ||
| <p>The base <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> class defines these methods which also define the
 | ||
| function interfaces of the stateless encoder and decoder:</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.Codec">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">Codec</span></span><a class="headerlink" href="#codecs.Codec" title="Link to this definition">¶</a></dt>
 | ||
| <dd><dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.Codec.encode">
 | ||
| <span class="sig-name descname"><span class="pre">encode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.Codec.encode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Encodes the object <em>input</em> and returns a tuple (output object, length consumed).
 | ||
| For instance, <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encoding</span></a> converts
 | ||
| a string object to a bytes object using a particular
 | ||
| character set encoding (e.g., <code class="docutils literal notranslate"><span class="pre">cp1252</span></code> or <code class="docutils literal notranslate"><span class="pre">iso-8859-1</span></code>).</p>
 | ||
| <p>The <em>errors</em> argument defines the error handling to apply.
 | ||
| It defaults to <code class="docutils literal notranslate"><span class="pre">'strict'</span></code> handling.</p>
 | ||
| <p>The method may not store state in the <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> instance. Use
 | ||
| <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> for codecs which have to keep state in order to make
 | ||
| encoding efficient.</p>
 | ||
| <p>The encoder must be able to handle zero length input and return an empty object
 | ||
| of the output object type in this situation.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.Codec.decode">
 | ||
| <span class="sig-name descname"><span class="pre">decode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.Codec.decode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Decodes the object <em>input</em> and returns a tuple (output object, length
 | ||
| consumed). For instance, for a <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encoding</span></a>, decoding converts
 | ||
| a bytes object encoded using a particular
 | ||
| character set encoding to a string object.</p>
 | ||
| <p>For text encodings and bytes-to-bytes codecs,
 | ||
| <em>input</em> must be a bytes object or one which provides the read-only
 | ||
| buffer interface – for example, buffer objects and memory mapped files.</p>
 | ||
| <p>The <em>errors</em> argument defines the error handling to apply.
 | ||
| It defaults to <code class="docutils literal notranslate"><span class="pre">'strict'</span></code> handling.</p>
 | ||
| <p>The method may not store state in the <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> instance. Use
 | ||
| <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> for codecs which have to keep state in order to make
 | ||
| decoding efficient.</p>
 | ||
| <p>The decoder must be able to handle zero length input and return an empty object
 | ||
| of the output object type in this situation.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="incremental-encoding-and-decoding">
 | ||
| <h3>Incremental Encoding and Decoding<a class="headerlink" href="#incremental-encoding-and-decoding" title="Link to this heading">¶</a></h3>
 | ||
| <p>The <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a> and <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a> classes provide
 | ||
| the basic interface for incremental encoding and decoding. Encoding/decoding the
 | ||
| input isn’t done with one call to the stateless encoder/decoder function, but
 | ||
| with multiple calls to the
 | ||
| <a class="reference internal" href="#codecs.IncrementalEncoder.encode" title="codecs.IncrementalEncoder.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">encode()</span></code></a>/<a class="reference internal" href="#codecs.IncrementalDecoder.decode" title="codecs.IncrementalDecoder.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> method of
 | ||
| the incremental encoder/decoder. The incremental encoder/decoder keeps track of
 | ||
| the encoding/decoding process during method calls.</p>
 | ||
| <p>The joined output of calls to the
 | ||
| <a class="reference internal" href="#codecs.IncrementalEncoder.encode" title="codecs.IncrementalEncoder.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">encode()</span></code></a>/<a class="reference internal" href="#codecs.IncrementalDecoder.decode" title="codecs.IncrementalDecoder.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> method is
 | ||
| the same as if all the single inputs were joined into one, and this input was
 | ||
| encoded/decoded with the stateless encoder/decoder.</p>
 | ||
| <section id="incrementalencoder-objects">
 | ||
| <span id="incremental-encoder-objects"></span><h4>IncrementalEncoder Objects<a class="headerlink" href="#incrementalencoder-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a> class is used for encoding an input in multiple
 | ||
| steps. It defines the following methods which every incremental encoder must
 | ||
| define in order to be compatible with the Python codec registry.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalEncoder">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">IncrementalEncoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalEncoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Constructor for an <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a> instance.</p>
 | ||
| <p>All incremental encoders must provide this constructor interface. They are free
 | ||
| to add additional keyword arguments, but only the ones defined here are used by
 | ||
| the Python codec registry.</p>
 | ||
| <p>The <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a> may implement different error handling schemes
 | ||
| by providing the <em>errors</em> keyword argument. See <a class="reference internal" href="#error-handlers"><span class="std std-ref">Error Handlers</span></a> for
 | ||
| possible values.</p>
 | ||
| <p>The <em>errors</em> argument will be assigned to an attribute of the same name.
 | ||
| Assigning to this attribute makes it possible to switch between different error
 | ||
| handling strategies during the lifetime of the <a class="reference internal" href="#codecs.IncrementalEncoder" title="codecs.IncrementalEncoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalEncoder</span></code></a>
 | ||
| object.</p>
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalEncoder.encode">
 | ||
| <span class="sig-name descname"><span class="pre">encode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">object</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">final</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalEncoder.encode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Encodes <em>object</em> (taking the current state of the encoder into account)
 | ||
| and returns the resulting encoded object. If this is the last call to
 | ||
| <a class="reference internal" href="#codecs.encode" title="codecs.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">encode()</span></code></a> <em>final</em> must be true (the default is false).</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalEncoder.reset">
 | ||
| <span class="sig-name descname"><span class="pre">reset</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalEncoder.reset" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Reset the encoder to the initial state. The output is discarded: call
 | ||
| <code class="docutils literal notranslate"><span class="pre">.encode(object,</span> <span class="pre">final=True)</span></code>, passing an empty byte or text string
 | ||
| if necessary, to reset the encoder and to get the output.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalEncoder.getstate">
 | ||
| <span class="sig-name descname"><span class="pre">getstate</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalEncoder.getstate" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the current state of the encoder which must be an integer. The
 | ||
| implementation should make sure that <code class="docutils literal notranslate"><span class="pre">0</span></code> is the most common
 | ||
| state. (States that are more complicated than integers can be converted
 | ||
| into an integer by marshaling/pickling the state and encoding the bytes
 | ||
| of the resulting string into an integer.)</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalEncoder.setstate">
 | ||
| <span class="sig-name descname"><span class="pre">setstate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">state</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalEncoder.setstate" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Set the state of the encoder to <em>state</em>. <em>state</em> must be an encoder state
 | ||
| returned by <a class="reference internal" href="#codecs.IncrementalEncoder.getstate" title="codecs.IncrementalEncoder.getstate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getstate()</span></code></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="incrementaldecoder-objects">
 | ||
| <span id="incremental-decoder-objects"></span><h4>IncrementalDecoder Objects<a class="headerlink" href="#incrementaldecoder-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a> class is used for decoding an input in multiple
 | ||
| steps. It defines the following methods which every incremental decoder must
 | ||
| define in order to be compatible with the Python codec registry.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalDecoder">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">IncrementalDecoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalDecoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Constructor for an <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a> instance.</p>
 | ||
| <p>All incremental decoders must provide this constructor interface. They are free
 | ||
| to add additional keyword arguments, but only the ones defined here are used by
 | ||
| the Python codec registry.</p>
 | ||
| <p>The <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a> may implement different error handling schemes
 | ||
| by providing the <em>errors</em> keyword argument. See <a class="reference internal" href="#error-handlers"><span class="std std-ref">Error Handlers</span></a> for
 | ||
| possible values.</p>
 | ||
| <p>The <em>errors</em> argument will be assigned to an attribute of the same name.
 | ||
| Assigning to this attribute makes it possible to switch between different error
 | ||
| handling strategies during the lifetime of the <a class="reference internal" href="#codecs.IncrementalDecoder" title="codecs.IncrementalDecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">IncrementalDecoder</span></code></a>
 | ||
| object.</p>
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalDecoder.decode">
 | ||
| <span class="sig-name descname"><span class="pre">decode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">object</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">final</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalDecoder.decode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Decodes <em>object</em> (taking the current state of the decoder into account)
 | ||
| and returns the resulting decoded object. If this is the last call to
 | ||
| <a class="reference internal" href="#codecs.decode" title="codecs.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> <em>final</em> must be true (the default is false). If <em>final</em> is
 | ||
| true the decoder must decode the input completely and must flush all
 | ||
| buffers. If this isn’t possible (e.g. because of incomplete byte sequences
 | ||
| at the end of the input) it must initiate error handling just like in the
 | ||
| stateless case (which might raise an exception).</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalDecoder.reset">
 | ||
| <span class="sig-name descname"><span class="pre">reset</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalDecoder.reset" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Reset the decoder to the initial state.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalDecoder.getstate">
 | ||
| <span class="sig-name descname"><span class="pre">getstate</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalDecoder.getstate" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the current state of the decoder. This must be a tuple with two
 | ||
| items, the first must be the buffer containing the still undecoded
 | ||
| input. The second must be an integer and can be additional state
 | ||
| info. (The implementation should make sure that <code class="docutils literal notranslate"><span class="pre">0</span></code> is the most common
 | ||
| additional state info.) If this additional state info is <code class="docutils literal notranslate"><span class="pre">0</span></code> it must be
 | ||
| possible to set the decoder to the state which has no input buffered and
 | ||
| <code class="docutils literal notranslate"><span class="pre">0</span></code> as the additional state info, so that feeding the previously
 | ||
| buffered input to the decoder returns it to the previous state without
 | ||
| producing any output. (Additional state info that is more complicated than
 | ||
| integers can be converted into an integer by marshaling/pickling the info
 | ||
| and encoding the bytes of the resulting string into an integer.)</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.IncrementalDecoder.setstate">
 | ||
| <span class="sig-name descname"><span class="pre">setstate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">state</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.IncrementalDecoder.setstate" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Set the state of the decoder to <em>state</em>. <em>state</em> must be a decoder state
 | ||
| returned by <a class="reference internal" href="#codecs.IncrementalDecoder.getstate" title="codecs.IncrementalDecoder.getstate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">getstate()</span></code></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| </section>
 | ||
| <section id="stream-encoding-and-decoding">
 | ||
| <h3>Stream Encoding and Decoding<a class="headerlink" href="#stream-encoding-and-decoding" title="Link to this heading">¶</a></h3>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> and <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> classes provide generic
 | ||
| working interfaces which can be used to implement new encoding submodules very
 | ||
| easily. See <code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.utf_8</span></code> for an example of how this is done.</p>
 | ||
| <section id="streamwriter-objects">
 | ||
| <span id="stream-writer-objects"></span><h4>StreamWriter Objects<a class="headerlink" href="#streamwriter-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> class is a subclass of <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> and defines the
 | ||
| following methods which every stream writer must define in order to be
 | ||
| compatible with the Python codec registry.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamWriter">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">StreamWriter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stream</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamWriter" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Constructor for a <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> instance.</p>
 | ||
| <p>All stream writers must provide this constructor interface. They are free to add
 | ||
| additional keyword arguments, but only the ones defined here are used by the
 | ||
| Python codec registry.</p>
 | ||
| <p>The <em>stream</em> argument must be a file-like object open for writing
 | ||
| text or binary data, as appropriate for the specific codec.</p>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> may implement different error handling schemes by
 | ||
| providing the <em>errors</em> keyword argument. See <a class="reference internal" href="#error-handlers"><span class="std std-ref">Error Handlers</span></a> for
 | ||
| the standard error handlers the underlying stream codec may support.</p>
 | ||
| <p>The <em>errors</em> argument will be assigned to an attribute of the same name.
 | ||
| Assigning to this attribute makes it possible to switch between different error
 | ||
| handling strategies during the lifetime of the <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> object.</p>
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamWriter.write">
 | ||
| <span class="sig-name descname"><span class="pre">write</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">object</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamWriter.write" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Writes the object’s contents encoded to the stream.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamWriter.writelines">
 | ||
| <span class="sig-name descname"><span class="pre">writelines</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">list</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamWriter.writelines" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Writes the concatenated iterable of strings to the stream (possibly by reusing
 | ||
| the <a class="reference internal" href="#codecs.StreamWriter.write" title="codecs.StreamWriter.write"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write()</span></code></a> method). Infinite or
 | ||
| very large iterables are not supported. The standard bytes-to-bytes codecs
 | ||
| do not support this method.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamWriter.reset">
 | ||
| <span class="sig-name descname"><span class="pre">reset</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamWriter.reset" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Resets the codec buffers used for keeping internal state.</p>
 | ||
| <p>Calling this method should ensure that the data on the output is put into
 | ||
| a clean state that allows appending of new fresh data without having to
 | ||
| rescan the whole stream to recover state.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>In addition to the above methods, the <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> must also inherit
 | ||
| all other methods and attributes from the underlying stream.</p>
 | ||
| </section>
 | ||
| <section id="streamreader-objects">
 | ||
| <span id="stream-reader-objects"></span><h4>StreamReader Objects<a class="headerlink" href="#streamreader-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> class is a subclass of <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> and defines the
 | ||
| following methods which every stream reader must define in order to be
 | ||
| compatible with the Python codec registry.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReader">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">StreamReader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stream</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReader" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Constructor for a <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> instance.</p>
 | ||
| <p>All stream readers must provide this constructor interface. They are free to add
 | ||
| additional keyword arguments, but only the ones defined here are used by the
 | ||
| Python codec registry.</p>
 | ||
| <p>The <em>stream</em> argument must be a file-like object open for reading
 | ||
| text or binary data, as appropriate for the specific codec.</p>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> may implement different error handling schemes by
 | ||
| providing the <em>errors</em> keyword argument. See <a class="reference internal" href="#error-handlers"><span class="std std-ref">Error Handlers</span></a> for
 | ||
| the standard error handlers the underlying stream codec may support.</p>
 | ||
| <p>The <em>errors</em> argument will be assigned to an attribute of the same name.
 | ||
| Assigning to this attribute makes it possible to switch between different error
 | ||
| handling strategies during the lifetime of the <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> object.</p>
 | ||
| <p>The set of allowed values for the <em>errors</em> argument can be extended with
 | ||
| <a class="reference internal" href="#codecs.register_error" title="codecs.register_error"><code class="xref py py-func docutils literal notranslate"><span class="pre">register_error()</span></code></a>.</p>
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReader.read">
 | ||
| <span class="sig-name descname"><span class="pre">read</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">chars</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">firstline</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReader.read" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Decodes data from the stream and returns the resulting object.</p>
 | ||
| <p>The <em>chars</em> argument indicates the number of decoded
 | ||
| code points or bytes to return. The <a class="reference internal" href="#codecs.StreamReader.read" title="codecs.StreamReader.read"><code class="xref py py-func docutils literal notranslate"><span class="pre">read()</span></code></a> method will
 | ||
| never return more data than requested, but it might return less,
 | ||
| if there is not enough available.</p>
 | ||
| <p>The <em>size</em> argument indicates the approximate maximum
 | ||
| number of encoded bytes or code points to read
 | ||
| for decoding. The decoder can modify this setting as
 | ||
| appropriate. The default value -1 indicates to read and decode as much as
 | ||
| possible. This parameter is intended to
 | ||
| prevent having to decode huge files in one step.</p>
 | ||
| <p>The <em>firstline</em> flag indicates that
 | ||
| it would be sufficient to only return the first
 | ||
| line, if there are decoding errors on later lines.</p>
 | ||
| <p>The method should use a greedy read strategy meaning that it should read
 | ||
| as much data as is allowed within the definition of the encoding and the
 | ||
| given size, e.g.  if optional encoding endings or state markers are
 | ||
| available on the stream, these should be read too.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReader.readline">
 | ||
| <span class="sig-name descname"><span class="pre">readline</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepends</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReader.readline" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Read one line from the input stream and return the decoded data.</p>
 | ||
| <p><em>size</em>, if given, is passed as size argument to the stream’s
 | ||
| <a class="reference internal" href="#codecs.StreamReader.read" title="codecs.StreamReader.read"><code class="xref py py-meth docutils literal notranslate"><span class="pre">read()</span></code></a> method.</p>
 | ||
| <p>If <em>keepends</em> is false line-endings will be stripped from the lines
 | ||
| returned.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReader.readlines">
 | ||
| <span class="sig-name descname"><span class="pre">readlines</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sizehint</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keepends</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReader.readlines" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Read all lines available on the input stream and return them as a list of
 | ||
| lines.</p>
 | ||
| <p>Line-endings are implemented using the codec’s <a class="reference internal" href="#codecs.decode" title="codecs.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">decode()</span></code></a> method and
 | ||
| are included in the list entries if <em>keepends</em> is true.</p>
 | ||
| <p><em>sizehint</em>, if given, is passed as the <em>size</em> argument to the stream’s
 | ||
| <a class="reference internal" href="#codecs.StreamReader.read" title="codecs.StreamReader.read"><code class="xref py py-meth docutils literal notranslate"><span class="pre">read()</span></code></a> method.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py method">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReader.reset">
 | ||
| <span class="sig-name descname"><span class="pre">reset</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReader.reset" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Resets the codec buffers used for keeping internal state.</p>
 | ||
| <p>Note that no stream repositioning should take place. This method is
 | ||
| primarily intended to be able to recover from decoding errors.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p>In addition to the above methods, the <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> must also inherit
 | ||
| all other methods and attributes from the underlying stream.</p>
 | ||
| </section>
 | ||
| <section id="streamreaderwriter-objects">
 | ||
| <span id="stream-reader-writer"></span><h4>StreamReaderWriter Objects<a class="headerlink" href="#streamreaderwriter-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamReaderWriter" title="codecs.StreamReaderWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReaderWriter</span></code></a> is a convenience class that allows wrapping
 | ||
| streams which work in both read and write modes.</p>
 | ||
| <p>The design is such that one can use the factory functions returned by the
 | ||
| <a class="reference internal" href="#codecs.lookup" title="codecs.lookup"><code class="xref py py-func docutils literal notranslate"><span class="pre">lookup()</span></code></a> function to construct the instance.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamReaderWriter">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">StreamReaderWriter</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stream</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Reader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Writer</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamReaderWriter" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Creates a <a class="reference internal" href="#codecs.StreamReaderWriter" title="codecs.StreamReaderWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReaderWriter</span></code></a> instance. <em>stream</em> must be a file-like
 | ||
| object. <em>Reader</em> and <em>Writer</em> must be factory functions or classes providing the
 | ||
| <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> and <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> interface resp. Error handling
 | ||
| is done in the same way as defined for the stream readers and writers.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p><a class="reference internal" href="#codecs.StreamReaderWriter" title="codecs.StreamReaderWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReaderWriter</span></code></a> instances define the combined interfaces of
 | ||
| <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> and <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> classes. They inherit all other
 | ||
| methods and attributes from the underlying stream.</p>
 | ||
| </section>
 | ||
| <section id="streamrecoder-objects">
 | ||
| <span id="stream-recoder-objects"></span><h4>StreamRecoder Objects<a class="headerlink" href="#streamrecoder-objects" title="Link to this heading">¶</a></h4>
 | ||
| <p>The <a class="reference internal" href="#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> translates data from one encoding to another,
 | ||
| which is sometimes useful when dealing with different encoding environments.</p>
 | ||
| <p>The design is such that one can use the factory functions returned by the
 | ||
| <a class="reference internal" href="#codecs.lookup" title="codecs.lookup"><code class="xref py py-func docutils literal notranslate"><span class="pre">lookup()</span></code></a> function to construct the instance.</p>
 | ||
| <dl class="py class">
 | ||
| <dt class="sig sig-object py" id="codecs.StreamRecoder">
 | ||
| <em class="property"><span class="k"><span class="pre">class</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">codecs.</span></span><span class="sig-name descname"><span class="pre">StreamRecoder</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stream</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Reader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Writer</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">errors</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'strict'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#codecs.StreamRecoder" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Creates a <a class="reference internal" href="#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> instance which implements a two-way conversion:
 | ||
| <em>encode</em> and <em>decode</em> work on the frontend — the data visible to
 | ||
| code calling <a class="reference internal" href="#codecs.StreamReader.read" title="codecs.StreamReader.read"><code class="xref py py-meth docutils literal notranslate"><span class="pre">read()</span></code></a> and <a class="reference internal" href="#codecs.StreamWriter.write" title="codecs.StreamWriter.write"><code class="xref py py-meth docutils literal notranslate"><span class="pre">write()</span></code></a>,
 | ||
| while <em>Reader</em> and <em>Writer</em>
 | ||
| work on the backend — the data in <em>stream</em>.</p>
 | ||
| <p>You can use these objects to do transparent transcodings, e.g., from Latin-1
 | ||
| to UTF-8 and back.</p>
 | ||
| <p>The <em>stream</em> argument must be a file-like object.</p>
 | ||
| <p>The <em>encode</em> and <em>decode</em> arguments must
 | ||
| adhere to the <a class="reference internal" href="#codecs.Codec" title="codecs.Codec"><code class="xref py py-class docutils literal notranslate"><span class="pre">Codec</span></code></a> interface. <em>Reader</em> and
 | ||
| <em>Writer</em> must be factory functions or classes providing objects of the
 | ||
| <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> and <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> interface respectively.</p>
 | ||
| <p>Error handling is done in the same way as defined for the stream readers and
 | ||
| writers.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <p><a class="reference internal" href="#codecs.StreamRecoder" title="codecs.StreamRecoder"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamRecoder</span></code></a> instances define the combined interfaces of
 | ||
| <a class="reference internal" href="#codecs.StreamReader" title="codecs.StreamReader"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamReader</span></code></a> and <a class="reference internal" href="#codecs.StreamWriter" title="codecs.StreamWriter"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamWriter</span></code></a> classes. They inherit all other
 | ||
| methods and attributes from the underlying stream.</p>
 | ||
| </section>
 | ||
| </section>
 | ||
| </section>
 | ||
| <section id="encodings-and-unicode">
 | ||
| <span id="encodings-overview"></span><h2>Encodings and Unicode<a class="headerlink" href="#encodings-and-unicode" title="Link to this heading">¶</a></h2>
 | ||
| <p>Strings are stored internally as sequences of code points in
 | ||
| range <code class="docutils literal notranslate"><span class="pre">U+0000</span></code>–<code class="docutils literal notranslate"><span class="pre">U+10FFFF</span></code>. (See <span class="target" id="index-5"></span><a class="pep reference external" href="https://peps.python.org/pep-0393/"><strong>PEP 393</strong></a> for
 | ||
| more details about the implementation.)
 | ||
| Once a string object is used outside of CPU and memory, endianness
 | ||
| and how these arrays are stored as bytes become an issue. As with other
 | ||
| codecs, serialising a string into a sequence of bytes is known as <em>encoding</em>,
 | ||
| and recreating the string from the sequence of bytes is known as <em>decoding</em>.
 | ||
| There are a variety of different text serialisation codecs, which are
 | ||
| collectivity referred to as <a class="reference internal" href="../glossary.html#term-text-encoding"><span class="xref std std-term">text encodings</span></a>.</p>
 | ||
| <p>The simplest text encoding (called <code class="docutils literal notranslate"><span class="pre">'latin-1'</span></code> or <code class="docutils literal notranslate"><span class="pre">'iso-8859-1'</span></code>) maps
 | ||
| the code points 0–255 to the bytes <code class="docutils literal notranslate"><span class="pre">0x0</span></code>–<code class="docutils literal notranslate"><span class="pre">0xff</span></code>, which means that a string
 | ||
| object that contains code points above <code class="docutils literal notranslate"><span class="pre">U+00FF</span></code> can’t be encoded with this
 | ||
| codec. Doing so will raise a <a class="reference internal" href="exceptions.html#UnicodeEncodeError" title="UnicodeEncodeError"><code class="xref py py-exc docutils literal notranslate"><span class="pre">UnicodeEncodeError</span></code></a> that looks
 | ||
| like the following (although the details of the error message may differ):
 | ||
| <code class="docutils literal notranslate"><span class="pre">UnicodeEncodeError:</span> <span class="pre">'latin-1'</span> <span class="pre">codec</span> <span class="pre">can't</span> <span class="pre">encode</span> <span class="pre">character</span> <span class="pre">'\u1234'</span> <span class="pre">in</span>
 | ||
| <span class="pre">position</span> <span class="pre">3:</span> <span class="pre">ordinal</span> <span class="pre">not</span> <span class="pre">in</span> <span class="pre">range(256)</span></code>.</p>
 | ||
| <p>There’s another group of encodings (the so called charmap encodings) that choose
 | ||
| a different subset of all Unicode code points and how these code points are
 | ||
| mapped to the bytes <code class="docutils literal notranslate"><span class="pre">0x0</span></code>–<code class="docutils literal notranslate"><span class="pre">0xff</span></code>. To see how this is done simply open
 | ||
| e.g. <code class="file docutils literal notranslate"><span class="pre">encodings/cp1252.py</span></code> (which is an encoding that is used primarily on
 | ||
| Windows). There’s a string constant with 256 characters that shows you which
 | ||
| character is mapped to which byte value.</p>
 | ||
| <p>All of these encodings can only encode 256 of the 1114112 code points
 | ||
| defined in Unicode. A simple and straightforward way that can store each Unicode
 | ||
| code point, is to store each code point as four consecutive bytes. There are two
 | ||
| possibilities: store the bytes in big endian or in little endian order. These
 | ||
| two encodings are called <code class="docutils literal notranslate"><span class="pre">UTF-32-BE</span></code> and <code class="docutils literal notranslate"><span class="pre">UTF-32-LE</span></code> respectively. Their
 | ||
| disadvantage is that if e.g. you use <code class="docutils literal notranslate"><span class="pre">UTF-32-BE</span></code> on a little endian machine you
 | ||
| will always have to swap bytes on encoding and decoding. <code class="docutils literal notranslate"><span class="pre">UTF-32</span></code> avoids this
 | ||
| problem: bytes will always be in natural endianness. When these bytes are read
 | ||
| by a CPU with a different endianness, then bytes have to be swapped though. To
 | ||
| be able to detect the endianness of a <code class="docutils literal notranslate"><span class="pre">UTF-16</span></code> or <code class="docutils literal notranslate"><span class="pre">UTF-32</span></code> byte sequence,
 | ||
| there’s the so called BOM (“Byte Order Mark”). This is the Unicode character
 | ||
| <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code>. This character can be prepended to every <code class="docutils literal notranslate"><span class="pre">UTF-16</span></code> or <code class="docutils literal notranslate"><span class="pre">UTF-32</span></code>
 | ||
| byte sequence. The byte swapped version of this character (<code class="docutils literal notranslate"><span class="pre">0xFFFE</span></code>) is an
 | ||
| illegal character that may not appear in a Unicode text. So when the
 | ||
| first character in a <code class="docutils literal notranslate"><span class="pre">UTF-16</span></code> or <code class="docutils literal notranslate"><span class="pre">UTF-32</span></code> byte sequence
 | ||
| appears to be a <code class="docutils literal notranslate"><span class="pre">U+FFFE</span></code> the bytes have to be swapped on decoding.
 | ||
| Unfortunately the character <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code> had a second purpose as
 | ||
| a <code class="docutils literal notranslate"><span class="pre">ZERO</span> <span class="pre">WIDTH</span> <span class="pre">NO-BREAK</span> <span class="pre">SPACE</span></code>: a character that has no width and doesn’t allow
 | ||
| a word to be split. It can e.g. be used to give hints to a ligature algorithm.
 | ||
| With Unicode 4.0 using <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code> as a <code class="docutils literal notranslate"><span class="pre">ZERO</span> <span class="pre">WIDTH</span> <span class="pre">NO-BREAK</span> <span class="pre">SPACE</span></code> has been
 | ||
| deprecated (with <code class="docutils literal notranslate"><span class="pre">U+2060</span></code> (<code class="docutils literal notranslate"><span class="pre">WORD</span> <span class="pre">JOINER</span></code>) assuming this role). Nevertheless
 | ||
| Unicode software still must be able to handle <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code> in both roles: as a BOM
 | ||
| it’s a device to determine the storage layout of the encoded bytes, and vanishes
 | ||
| once the byte sequence has been decoded into a string; as a <code class="docutils literal notranslate"><span class="pre">ZERO</span> <span class="pre">WIDTH</span>
 | ||
| <span class="pre">NO-BREAK</span> <span class="pre">SPACE</span></code> it’s a normal character that will be decoded like any other.</p>
 | ||
| <p>There’s another encoding that is able to encode the full range of Unicode
 | ||
| characters: UTF-8. UTF-8 is an 8-bit encoding, which means there are no issues
 | ||
| with byte order in UTF-8. Each byte in a UTF-8 byte sequence consists of two
 | ||
| parts: marker bits (the most significant bits) and payload bits. The marker bits
 | ||
| are a sequence of zero to four <code class="docutils literal notranslate"><span class="pre">1</span></code> bits followed by a <code class="docutils literal notranslate"><span class="pre">0</span></code> bit. Unicode characters are
 | ||
| encoded like this (with x being payload bits, which when concatenated give the
 | ||
| Unicode character):</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Range</p></th>
 | ||
| <th class="head"><p>Encoding</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">U-00000000</span></code> … <code class="docutils literal notranslate"><span class="pre">U-0000007F</span></code></p></td>
 | ||
| <td><p>0xxxxxxx</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">U-00000080</span></code> … <code class="docutils literal notranslate"><span class="pre">U-000007FF</span></code></p></td>
 | ||
| <td><p>110xxxxx 10xxxxxx</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">U-00000800</span></code> … <code class="docutils literal notranslate"><span class="pre">U-0000FFFF</span></code></p></td>
 | ||
| <td><p>1110xxxx 10xxxxxx 10xxxxxx</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">U-00010000</span></code> … <code class="docutils literal notranslate"><span class="pre">U-0010FFFF</span></code></p></td>
 | ||
| <td><p>11110xxx 10xxxxxx 10xxxxxx 10xxxxxx</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <p>The least significant bit of the Unicode character is the rightmost x bit.</p>
 | ||
| <p>As UTF-8 is an 8-bit encoding no BOM is required and any <code class="docutils literal notranslate"><span class="pre">U+FEFF</span></code> character in
 | ||
| the decoded string (even if it’s the first character) is treated as a <code class="docutils literal notranslate"><span class="pre">ZERO</span>
 | ||
| <span class="pre">WIDTH</span> <span class="pre">NO-BREAK</span> <span class="pre">SPACE</span></code>.</p>
 | ||
| <p>Without external information it’s impossible to reliably determine which
 | ||
| encoding was used for encoding a string. Each charmap encoding can
 | ||
| decode any random byte sequence. However that’s not possible with UTF-8, as
 | ||
| UTF-8 byte sequences have a structure that doesn’t allow arbitrary byte
 | ||
| sequences. To increase the reliability with which a UTF-8 encoding can be
 | ||
| detected, Microsoft invented a variant of UTF-8 (that Python calls
 | ||
| <code class="docutils literal notranslate"><span class="pre">"utf-8-sig"</span></code>) for its Notepad program: Before any of the Unicode characters
 | ||
| is written to the file, a UTF-8 encoded BOM (which looks like this as a byte
 | ||
| sequence: <code class="docutils literal notranslate"><span class="pre">0xef</span></code>, <code class="docutils literal notranslate"><span class="pre">0xbb</span></code>, <code class="docutils literal notranslate"><span class="pre">0xbf</span></code>) is written. As it’s rather improbable
 | ||
| that any charmap encoded file starts with these byte values (which would e.g.
 | ||
| map to</p>
 | ||
| <blockquote>
 | ||
| <div><div class="line-block">
 | ||
| <div class="line">LATIN SMALL LETTER I WITH DIAERESIS</div>
 | ||
| <div class="line">RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK</div>
 | ||
| <div class="line">INVERTED QUESTION MARK</div>
 | ||
| </div>
 | ||
| </div></blockquote>
 | ||
| <p>in iso-8859-1), this increases the probability that a <code class="docutils literal notranslate"><span class="pre">utf-8-sig</span></code> encoding can be
 | ||
| correctly guessed from the byte sequence. So here the BOM is not used to be able
 | ||
| to determine the byte order used for generating the byte sequence, but as a
 | ||
| signature that helps in guessing the encoding. On encoding the utf-8-sig codec
 | ||
| will write <code class="docutils literal notranslate"><span class="pre">0xef</span></code>, <code class="docutils literal notranslate"><span class="pre">0xbb</span></code>, <code class="docutils literal notranslate"><span class="pre">0xbf</span></code> as the first three bytes to the file. On
 | ||
| decoding <code class="docutils literal notranslate"><span class="pre">utf-8-sig</span></code> will skip those three bytes if they appear as the first
 | ||
| three bytes in the file. In UTF-8, the use of the BOM is discouraged and
 | ||
| should generally be avoided.</p>
 | ||
| </section>
 | ||
| <section id="standard-encodings">
 | ||
| <span id="id3"></span><h2>Standard Encodings<a class="headerlink" href="#standard-encodings" title="Link to this heading">¶</a></h2>
 | ||
| <p>Python comes with a number of codecs built-in, either implemented as C functions
 | ||
| or with dictionaries as mapping tables. The following table lists the codecs by
 | ||
| name, together with a few common aliases, and the languages for which the
 | ||
| encoding is likely used. Neither the list of aliases nor the list of languages
 | ||
| is meant to be exhaustive. Notice that spelling alternatives that only differ in
 | ||
| case or use a hyphen instead of an underscore are also valid aliases; therefore,
 | ||
| e.g. <code class="docutils literal notranslate"><span class="pre">'utf-8'</span></code> is a valid alias for the <code class="docutils literal notranslate"><span class="pre">'utf_8'</span></code> codec.</p>
 | ||
| <div class="impl-detail compound">
 | ||
| <p><strong>CPython implementation detail:</strong> Some common encodings can bypass the codecs lookup machinery to
 | ||
| improve performance. These optimization opportunities are only
 | ||
| recognized by CPython for a limited set of (case insensitive)
 | ||
| aliases: utf-8, utf8, latin-1, latin1, iso-8859-1, iso8859-1, mbcs
 | ||
| (Windows only), ascii, us-ascii, utf-16, utf16, utf-32, utf32, and
 | ||
| the same using underscores instead of dashes. Using alternative
 | ||
| aliases for these encodings may result in slower execution.</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.6: </span>Optimization opportunity recognized for us-ascii.</p>
 | ||
| </div>
 | ||
| </div>
 | ||
| <p>Many of the character sets support the same languages. They vary in individual
 | ||
| characters (e.g. whether the EURO SIGN is supported or not), and in the
 | ||
| assignment of characters to code positions. For the European languages in
 | ||
| particular, the following variants typically exist:</p>
 | ||
| <ul class="simple">
 | ||
| <li><p>an ISO 8859 codeset</p></li>
 | ||
| <li><p>a Microsoft Windows code page, which is typically derived from an 8859 codeset,
 | ||
| but replaces control characters with additional graphic characters</p></li>
 | ||
| <li><p>an IBM EBCDIC code page</p></li>
 | ||
| <li><p>an IBM PC code page, which is ASCII compatible</p></li>
 | ||
| </ul>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Codec</p></th>
 | ||
| <th class="head"><p>Aliases</p></th>
 | ||
| <th class="head"><p>Languages</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p>ascii</p></td>
 | ||
| <td><p>646, us-ascii</p></td>
 | ||
| <td><p>English</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>big5</p></td>
 | ||
| <td><p>big5-tw, csbig5</p></td>
 | ||
| <td><p>Traditional Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>big5hkscs</p></td>
 | ||
| <td><p>big5-hkscs, hkscs</p></td>
 | ||
| <td><p>Traditional Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp037</p></td>
 | ||
| <td><p>IBM037, IBM039</p></td>
 | ||
| <td><p>English</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp273</p></td>
 | ||
| <td><p>273, IBM273, csIBM273</p></td>
 | ||
| <td><p>German</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.4.</span></p>
 | ||
| </div>
 | ||
| </td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp424</p></td>
 | ||
| <td><p>EBCDIC-CP-HE, IBM424</p></td>
 | ||
| <td><p>Hebrew</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp437</p></td>
 | ||
| <td><p>437, IBM437</p></td>
 | ||
| <td><p>English</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp500</p></td>
 | ||
| <td><p>EBCDIC-CP-BE, EBCDIC-CP-CH,
 | ||
| IBM500</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp720</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Arabic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp737</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp775</p></td>
 | ||
| <td><p>IBM775</p></td>
 | ||
| <td><p>Baltic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp850</p></td>
 | ||
| <td><p>850, IBM850</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp852</p></td>
 | ||
| <td><p>852, IBM852</p></td>
 | ||
| <td><p>Central and Eastern Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp855</p></td>
 | ||
| <td><p>855, IBM855</p></td>
 | ||
| <td><p>Bulgarian, Byelorussian,
 | ||
| Macedonian, Russian, Serbian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp856</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Hebrew</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp857</p></td>
 | ||
| <td><p>857, IBM857</p></td>
 | ||
| <td><p>Turkish</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp858</p></td>
 | ||
| <td><p>858, IBM858</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp860</p></td>
 | ||
| <td><p>860, IBM860</p></td>
 | ||
| <td><p>Portuguese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp861</p></td>
 | ||
| <td><p>861, CP-IS, IBM861</p></td>
 | ||
| <td><p>Icelandic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp862</p></td>
 | ||
| <td><p>862, IBM862</p></td>
 | ||
| <td><p>Hebrew</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp863</p></td>
 | ||
| <td><p>863, IBM863</p></td>
 | ||
| <td><p>Canadian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp864</p></td>
 | ||
| <td><p>IBM864</p></td>
 | ||
| <td><p>Arabic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp865</p></td>
 | ||
| <td><p>865, IBM865</p></td>
 | ||
| <td><p>Danish, Norwegian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp866</p></td>
 | ||
| <td><p>866, IBM866</p></td>
 | ||
| <td><p>Russian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp869</p></td>
 | ||
| <td><p>869, CP-GR, IBM869</p></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp874</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Thai</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp875</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp932</p></td>
 | ||
| <td><p>932, ms932, mskanji, ms-kanji,
 | ||
| windows-31j</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp949</p></td>
 | ||
| <td><p>949, ms949, uhc</p></td>
 | ||
| <td><p>Korean</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp950</p></td>
 | ||
| <td><p>950, ms950</p></td>
 | ||
| <td><p>Traditional Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1006</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Urdu</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1026</p></td>
 | ||
| <td><p>ibm1026</p></td>
 | ||
| <td><p>Turkish</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1125</p></td>
 | ||
| <td><p>1125, ibm1125, cp866u, ruscii</p></td>
 | ||
| <td><p>Ukrainian</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.4.</span></p>
 | ||
| </div>
 | ||
| </td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1140</p></td>
 | ||
| <td><p>ibm1140</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1250</p></td>
 | ||
| <td><p>windows-1250</p></td>
 | ||
| <td><p>Central and Eastern Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1251</p></td>
 | ||
| <td><p>windows-1251</p></td>
 | ||
| <td><p>Bulgarian, Byelorussian,
 | ||
| Macedonian, Russian, Serbian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1252</p></td>
 | ||
| <td><p>windows-1252</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1253</p></td>
 | ||
| <td><p>windows-1253</p></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1254</p></td>
 | ||
| <td><p>windows-1254</p></td>
 | ||
| <td><p>Turkish</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1255</p></td>
 | ||
| <td><p>windows-1255</p></td>
 | ||
| <td><p>Hebrew</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1256</p></td>
 | ||
| <td><p>windows-1256</p></td>
 | ||
| <td><p>Arabic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>cp1257</p></td>
 | ||
| <td><p>windows-1257</p></td>
 | ||
| <td><p>Baltic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>cp1258</p></td>
 | ||
| <td><p>windows-1258</p></td>
 | ||
| <td><p>Vietnamese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>euc_jp</p></td>
 | ||
| <td><p>eucjp, ujis, u-jis</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>euc_jis_2004</p></td>
 | ||
| <td><p>jisx0213, eucjis2004</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>euc_jisx0213</p></td>
 | ||
| <td><p>eucjisx0213</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>euc_kr</p></td>
 | ||
| <td><p>euckr, korean, ksc5601,
 | ||
| ks_c-5601, ks_c-5601-1987,
 | ||
| ksx1001, ks_x-1001</p></td>
 | ||
| <td><p>Korean</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>gb2312</p></td>
 | ||
| <td><p>chinese, csiso58gb231280,
 | ||
| euc-cn, euccn, eucgb2312-cn,
 | ||
| gb2312-1980, gb2312-80,
 | ||
| iso-ir-58</p></td>
 | ||
| <td><p>Simplified Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>gbk</p></td>
 | ||
| <td><p>936, cp936, ms936</p></td>
 | ||
| <td><p>Unified Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>gb18030</p></td>
 | ||
| <td><p>gb18030-2000</p></td>
 | ||
| <td><p>Unified Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>hz</p></td>
 | ||
| <td><p>hzgb, hz-gb, hz-gb-2312</p></td>
 | ||
| <td><p>Simplified Chinese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso2022_jp</p></td>
 | ||
| <td><p>csiso2022jp, iso2022jp,
 | ||
| iso-2022-jp</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso2022_jp_1</p></td>
 | ||
| <td><p>iso2022jp-1, iso-2022-jp-1</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso2022_jp_2</p></td>
 | ||
| <td><p>iso2022jp-2, iso-2022-jp-2</p></td>
 | ||
| <td><p>Japanese, Korean, Simplified
 | ||
| Chinese, Western Europe, Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso2022_jp_2004</p></td>
 | ||
| <td><p>iso2022jp-2004,
 | ||
| iso-2022-jp-2004</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso2022_jp_3</p></td>
 | ||
| <td><p>iso2022jp-3, iso-2022-jp-3</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso2022_jp_ext</p></td>
 | ||
| <td><p>iso2022jp-ext, iso-2022-jp-ext</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso2022_kr</p></td>
 | ||
| <td><p>csiso2022kr, iso2022kr,
 | ||
| iso-2022-kr</p></td>
 | ||
| <td><p>Korean</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>latin_1</p></td>
 | ||
| <td><p>iso-8859-1, iso8859-1, 8859,
 | ||
| cp819, latin, latin1, L1</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_2</p></td>
 | ||
| <td><p>iso-8859-2, latin2, L2</p></td>
 | ||
| <td><p>Central and Eastern Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_3</p></td>
 | ||
| <td><p>iso-8859-3, latin3, L3</p></td>
 | ||
| <td><p>Esperanto, Maltese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_4</p></td>
 | ||
| <td><p>iso-8859-4, latin4, L4</p></td>
 | ||
| <td><p>Baltic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_5</p></td>
 | ||
| <td><p>iso-8859-5, cyrillic</p></td>
 | ||
| <td><p>Bulgarian, Byelorussian,
 | ||
| Macedonian, Russian, Serbian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_6</p></td>
 | ||
| <td><p>iso-8859-6, arabic</p></td>
 | ||
| <td><p>Arabic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_7</p></td>
 | ||
| <td><p>iso-8859-7, greek, greek8</p></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_8</p></td>
 | ||
| <td><p>iso-8859-8, hebrew</p></td>
 | ||
| <td><p>Hebrew</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_9</p></td>
 | ||
| <td><p>iso-8859-9, latin5, L5</p></td>
 | ||
| <td><p>Turkish</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_10</p></td>
 | ||
| <td><p>iso-8859-10, latin6, L6</p></td>
 | ||
| <td><p>Nordic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_11</p></td>
 | ||
| <td><p>iso-8859-11, thai</p></td>
 | ||
| <td><p>Thai languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_13</p></td>
 | ||
| <td><p>iso-8859-13, latin7, L7</p></td>
 | ||
| <td><p>Baltic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_14</p></td>
 | ||
| <td><p>iso-8859-14, latin8, L8</p></td>
 | ||
| <td><p>Celtic languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>iso8859_15</p></td>
 | ||
| <td><p>iso-8859-15, latin9, L9</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>iso8859_16</p></td>
 | ||
| <td><p>iso-8859-16, latin10, L10</p></td>
 | ||
| <td><p>South-Eastern Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>johab</p></td>
 | ||
| <td><p>cp1361, ms1361</p></td>
 | ||
| <td><p>Korean</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>koi8_r</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Russian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>koi8_t</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Tajik</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.5.</span></p>
 | ||
| </div>
 | ||
| </td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>koi8_u</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Ukrainian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>kz1048</p></td>
 | ||
| <td><p>kz_1048, strk1048_2002, rk1048</p></td>
 | ||
| <td><p>Kazakh</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.5.</span></p>
 | ||
| </div>
 | ||
| </td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>mac_cyrillic</p></td>
 | ||
| <td><p>maccyrillic</p></td>
 | ||
| <td><p>Bulgarian, Byelorussian,
 | ||
| Macedonian, Russian, Serbian</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>mac_greek</p></td>
 | ||
| <td><p>macgreek</p></td>
 | ||
| <td><p>Greek</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>mac_iceland</p></td>
 | ||
| <td><p>maciceland</p></td>
 | ||
| <td><p>Icelandic</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>mac_latin2</p></td>
 | ||
| <td><p>maclatin2, maccentraleurope,
 | ||
| mac_centeuro</p></td>
 | ||
| <td><p>Central and Eastern Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>mac_roman</p></td>
 | ||
| <td><p>macroman, macintosh</p></td>
 | ||
| <td><p>Western Europe</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>mac_turkish</p></td>
 | ||
| <td><p>macturkish</p></td>
 | ||
| <td><p>Turkish</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>ptcp154</p></td>
 | ||
| <td><p>csptcp154, pt154, cp154,
 | ||
| cyrillic-asian</p></td>
 | ||
| <td><p>Kazakh</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>shift_jis</p></td>
 | ||
| <td><p>csshiftjis, shiftjis, sjis,
 | ||
| s_jis</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>shift_jis_2004</p></td>
 | ||
| <td><p>shiftjis2004, sjis_2004,
 | ||
| sjis2004</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>shift_jisx0213</p></td>
 | ||
| <td><p>shiftjisx0213, sjisx0213,
 | ||
| s_jisx0213</p></td>
 | ||
| <td><p>Japanese</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>utf_32</p></td>
 | ||
| <td><p>U32, utf32</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>utf_32_be</p></td>
 | ||
| <td><p>UTF-32BE</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>utf_32_le</p></td>
 | ||
| <td><p>UTF-32LE</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>utf_16</p></td>
 | ||
| <td><p>U16, utf16</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>utf_16_be</p></td>
 | ||
| <td><p>UTF-16BE</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>utf_16_le</p></td>
 | ||
| <td><p>UTF-16LE</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>utf_7</p></td>
 | ||
| <td><p>U7, unicode-1-1-utf-7</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>utf_8</p></td>
 | ||
| <td><p>U8, UTF, utf8, cp65001</p></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>utf_8_sig</p></td>
 | ||
| <td></td>
 | ||
| <td><p>all languages</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.4: </span>The utf-16* and utf-32* encoders no longer allow surrogate code points
 | ||
| (<code class="docutils literal notranslate"><span class="pre">U+D800</span></code>–<code class="docutils literal notranslate"><span class="pre">U+DFFF</span></code>) to be encoded.
 | ||
| The utf-32* decoders no longer decode
 | ||
| byte sequences that correspond to surrogate code points.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.8: </span><code class="docutils literal notranslate"><span class="pre">cp65001</span></code> is now an alias to <code class="docutils literal notranslate"><span class="pre">utf_8</span></code>.</p>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="python-specific-encodings">
 | ||
| <h2>Python Specific Encodings<a class="headerlink" href="#python-specific-encodings" title="Link to this heading">¶</a></h2>
 | ||
| <p>A number of predefined codecs are specific to Python, so their codec names have
 | ||
| no meaning outside Python. These are listed in the tables below based on the
 | ||
| expected input and output types (note that while text encodings are the most
 | ||
| common use case for codecs, the underlying codec infrastructure supports
 | ||
| arbitrary data transforms rather than just text encodings). For asymmetric
 | ||
| codecs, the stated meaning describes the encoding direction.</p>
 | ||
| <section id="text-encodings">
 | ||
| <h3>Text Encodings<a class="headerlink" href="#text-encodings" title="Link to this heading">¶</a></h3>
 | ||
| <p>The following codecs provide <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> to <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a> encoding and
 | ||
| <a class="reference internal" href="../glossary.html#term-bytes-like-object"><span class="xref std std-term">bytes-like object</span></a> to <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> decoding, similar to the Unicode text
 | ||
| encodings.</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Codec</p></th>
 | ||
| <th class="head"><p>Aliases</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p>idna</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Implement <span class="target" id="index-6"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3490.html"><strong>RFC 3490</strong></a>,
 | ||
| see also
 | ||
| <a class="reference internal" href="#module-encodings.idna" title="encodings.idna: Internationalized Domain Names implementation"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.idna</span></code></a>.
 | ||
| Only <code class="docutils literal notranslate"><span class="pre">errors='strict'</span></code>
 | ||
| is supported.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>mbcs</p></td>
 | ||
| <td><p>ansi,
 | ||
| dbcs</p></td>
 | ||
| <td><p>Windows only: Encode the
 | ||
| operand according to the
 | ||
| ANSI codepage (CP_ACP).</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>oem</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Windows only: Encode the
 | ||
| operand according to the
 | ||
| OEM codepage (CP_OEMCP).</p>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.6.</span></p>
 | ||
| </div>
 | ||
| </td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>palmos</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Encoding of PalmOS 3.5.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>punycode</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Implement <span class="target" id="index-7"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3492.html"><strong>RFC 3492</strong></a>.
 | ||
| Stateful codecs are not
 | ||
| supported.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>raw_unicode_escape</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Latin-1 encoding with
 | ||
| <code class="samp docutils literal notranslate"><span class="pre">\u</span><em><span class="pre">XXXX</span></em></code> and
 | ||
| <code class="samp docutils literal notranslate"><span class="pre">\U</span><em><span class="pre">XXXXXXXX</span></em></code>
 | ||
| for other code points.
 | ||
| Existing
 | ||
| backslashes are not
 | ||
| escaped in any way.
 | ||
| It is used in the Python
 | ||
| pickle protocol.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>undefined</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Raise an exception for
 | ||
| all conversions, even
 | ||
| empty strings. The error
 | ||
| handler is ignored.</p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>unicode_escape</p></td>
 | ||
| <td></td>
 | ||
| <td><p>Encoding suitable as the
 | ||
| contents of a Unicode
 | ||
| literal in ASCII-encoded
 | ||
| Python source code,
 | ||
| except that quotes are
 | ||
| not escaped. Decode
 | ||
| from Latin-1 source code.
 | ||
| Beware that Python source
 | ||
| code actually uses UTF-8
 | ||
| by default.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.8: </span>“unicode_internal” codec is removed.</p>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="binary-transforms">
 | ||
| <span id="id4"></span><h3>Binary Transforms<a class="headerlink" href="#binary-transforms" title="Link to this heading">¶</a></h3>
 | ||
| <p>The following codecs provide binary transforms: <a class="reference internal" href="../glossary.html#term-bytes-like-object"><span class="xref std std-term">bytes-like object</span></a>
 | ||
| to <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a> mappings. They are not supported by <a class="reference internal" href="stdtypes.html#bytes.decode" title="bytes.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">bytes.decode()</span></code></a>
 | ||
| (which only produces <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> output).</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Codec</p></th>
 | ||
| <th class="head"><p>Aliases</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| <th class="head"><p>Encoder / decoder</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p>base64_codec <a class="footnote-reference brackets" href="#b64" id="id5" role="doc-noteref"><span class="fn-bracket">[</span>1<span class="fn-bracket">]</span></a></p></td>
 | ||
| <td><p>base64, base_64</p></td>
 | ||
| <td><p>Convert the operand to
 | ||
| multiline MIME base64 (the
 | ||
| result always includes a
 | ||
| trailing <code class="docutils literal notranslate"><span class="pre">'\n'</span></code>).</p>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.4: </span>accepts any
 | ||
| <a class="reference internal" href="../glossary.html#term-bytes-like-object"><span class="xref std std-term">bytes-like object</span></a>
 | ||
| as input for encoding and
 | ||
| decoding</p>
 | ||
| </div>
 | ||
| </td>
 | ||
| <td><p><a class="reference internal" href="base64.html#base64.encodebytes" title="base64.encodebytes"><code class="xref py py-meth docutils literal notranslate"><span class="pre">base64.encodebytes()</span></code></a> /
 | ||
| <a class="reference internal" href="base64.html#base64.decodebytes" title="base64.decodebytes"><code class="xref py py-meth docutils literal notranslate"><span class="pre">base64.decodebytes()</span></code></a></p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>bz2_codec</p></td>
 | ||
| <td><p>bz2</p></td>
 | ||
| <td><p>Compress the operand using
 | ||
| bz2.</p></td>
 | ||
| <td><p><a class="reference internal" href="bz2.html#bz2.compress" title="bz2.compress"><code class="xref py py-meth docutils literal notranslate"><span class="pre">bz2.compress()</span></code></a> /
 | ||
| <a class="reference internal" href="bz2.html#bz2.decompress" title="bz2.decompress"><code class="xref py py-meth docutils literal notranslate"><span class="pre">bz2.decompress()</span></code></a></p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>hex_codec</p></td>
 | ||
| <td><p>hex</p></td>
 | ||
| <td><p>Convert the operand to
 | ||
| hexadecimal
 | ||
| representation, with two
 | ||
| digits per byte.</p></td>
 | ||
| <td><p><a class="reference internal" href="binascii.html#binascii.b2a_hex" title="binascii.b2a_hex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">binascii.b2a_hex()</span></code></a> /
 | ||
| <a class="reference internal" href="binascii.html#binascii.a2b_hex" title="binascii.a2b_hex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">binascii.a2b_hex()</span></code></a></p></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>quopri_codec</p></td>
 | ||
| <td><p>quopri,
 | ||
| quotedprintable,
 | ||
| quoted_printable</p></td>
 | ||
| <td><p>Convert the operand to MIME
 | ||
| quoted printable.</p></td>
 | ||
| <td><p><a class="reference internal" href="quopri.html#quopri.encode" title="quopri.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quopri.encode()</span></code></a> with
 | ||
| <code class="docutils literal notranslate"><span class="pre">quotetabs=True</span></code> /
 | ||
| <a class="reference internal" href="quopri.html#quopri.decode" title="quopri.decode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quopri.decode()</span></code></a></p></td>
 | ||
| </tr>
 | ||
| <tr class="row-even"><td><p>uu_codec</p></td>
 | ||
| <td><p>uu</p></td>
 | ||
| <td><p>Convert the operand using
 | ||
| uuencode.</p></td>
 | ||
| <td></td>
 | ||
| </tr>
 | ||
| <tr class="row-odd"><td><p>zlib_codec</p></td>
 | ||
| <td><p>zip, zlib</p></td>
 | ||
| <td><p>Compress the operand using
 | ||
| gzip.</p></td>
 | ||
| <td><p><a class="reference internal" href="zlib.html#zlib.compress" title="zlib.compress"><code class="xref py py-meth docutils literal notranslate"><span class="pre">zlib.compress()</span></code></a> /
 | ||
| <a class="reference internal" href="zlib.html#zlib.decompress" title="zlib.decompress"><code class="xref py py-meth docutils literal notranslate"><span class="pre">zlib.decompress()</span></code></a></p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <aside class="footnote-list brackets">
 | ||
| <aside class="footnote brackets" id="b64" role="doc-footnote">
 | ||
| <span class="label"><span class="fn-bracket">[</span><a role="doc-backlink" href="#id5">1</a><span class="fn-bracket">]</span></span>
 | ||
| <p>In addition to <a class="reference internal" href="../glossary.html#term-bytes-like-object"><span class="xref std std-term">bytes-like objects</span></a>,
 | ||
| <code class="docutils literal notranslate"><span class="pre">'base64_codec'</span></code> also accepts ASCII-only instances of <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> for
 | ||
| decoding</p>
 | ||
| </aside>
 | ||
| </aside>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.2: </span>Restoration of the binary transforms.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.4: </span>Restoration of the aliases for the binary transforms.</p>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="text-transforms">
 | ||
| <span id="id6"></span><h3>Text Transforms<a class="headerlink" href="#text-transforms" title="Link to this heading">¶</a></h3>
 | ||
| <p>The following codec provides a text transform: a <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a> to <a class="reference internal" href="stdtypes.html#str" title="str"><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></a>
 | ||
| mapping. It is not supported by <a class="reference internal" href="stdtypes.html#str.encode" title="str.encode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">str.encode()</span></code></a> (which only produces
 | ||
| <a class="reference internal" href="stdtypes.html#bytes" title="bytes"><code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code></a> output).</p>
 | ||
| <table class="docutils align-default">
 | ||
| <thead>
 | ||
| <tr class="row-odd"><th class="head"><p>Codec</p></th>
 | ||
| <th class="head"><p>Aliases</p></th>
 | ||
| <th class="head"><p>Meaning</p></th>
 | ||
| </tr>
 | ||
| </thead>
 | ||
| <tbody>
 | ||
| <tr class="row-even"><td><p>rot_13</p></td>
 | ||
| <td><p>rot13</p></td>
 | ||
| <td><p>Return the Caesar-cypher
 | ||
| encryption of the
 | ||
| operand.</p></td>
 | ||
| </tr>
 | ||
| </tbody>
 | ||
| </table>
 | ||
| <div class="versionadded">
 | ||
| <p><span class="versionmodified added">Added in version 3.2: </span>Restoration of the <code class="docutils literal notranslate"><span class="pre">rot_13</span></code> text transform.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.4: </span>Restoration of the <code class="docutils literal notranslate"><span class="pre">rot13</span></code> alias.</p>
 | ||
| </div>
 | ||
| </section>
 | ||
| </section>
 | ||
| <section id="module-encodings.idna">
 | ||
| <span id="encodings-idna-internationalized-domain-names-in-applications"></span><h2><a class="reference internal" href="#module-encodings.idna" title="encodings.idna: Internationalized Domain Names implementation"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.idna</span></code></a> — Internationalized Domain Names in Applications<a class="headerlink" href="#module-encodings.idna" title="Link to this heading">¶</a></h2>
 | ||
| <p>This module implements <span class="target" id="index-8"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3490.html"><strong>RFC 3490</strong></a> (Internationalized Domain Names in
 | ||
| Applications) and <span class="target" id="index-9"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3492.html"><strong>RFC 3492</strong></a> (Nameprep: A Stringprep Profile for
 | ||
| Internationalized Domain Names (IDN)). It builds upon the <code class="docutils literal notranslate"><span class="pre">punycode</span></code> encoding
 | ||
| and <a class="reference internal" href="stringprep.html#module-stringprep" title="stringprep: String preparation, as per RFC 3453"><code class="xref py py-mod docutils literal notranslate"><span class="pre">stringprep</span></code></a>.</p>
 | ||
| <p>If you need the IDNA 2008 standard from <span class="target" id="index-10"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc5891.html"><strong>RFC 5891</strong></a> and <span class="target" id="index-11"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc5895.html"><strong>RFC 5895</strong></a>, use the
 | ||
| third-party <a class="extlink-pypi reference external" href="https://pypi.org/project/idna/">idna</a> module.</p>
 | ||
| <p>These RFCs together define a protocol to support non-ASCII characters in domain
 | ||
| names. A domain name containing non-ASCII characters (such as
 | ||
| <code class="docutils literal notranslate"><span class="pre">www.Alliancefrançaise.nu</span></code>) is converted into an ASCII-compatible encoding
 | ||
| (ACE, such as <code class="docutils literal notranslate"><span class="pre">www.xn--alliancefranaise-npb.nu</span></code>). The ACE form of the domain
 | ||
| name is then used in all places where arbitrary characters are not allowed by
 | ||
| the protocol, such as DNS queries, HTTP <em class="mailheader">Host</em> fields, and so
 | ||
| on. This conversion is carried out in the application; if possible invisible to
 | ||
| the user: The application should transparently convert Unicode domain labels to
 | ||
| IDNA on the wire, and convert back ACE labels to Unicode before presenting them
 | ||
| to the user.</p>
 | ||
| <p>Python supports this conversion in several ways:  the <code class="docutils literal notranslate"><span class="pre">idna</span></code> codec performs
 | ||
| conversion between Unicode and ACE, separating an input string into labels
 | ||
| based on the separator characters defined in <span class="target" id="index-12"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3490.html#section-3.1"><strong>section 3.1 of RFC 3490</strong></a>
 | ||
| and converting each label to ACE as required, and conversely separating an input
 | ||
| byte string into labels based on the <code class="docutils literal notranslate"><span class="pre">.</span></code> separator and converting any ACE
 | ||
| labels found into unicode. Furthermore, the <a class="reference internal" href="socket.html#module-socket" title="socket: Low-level networking interface."><code class="xref py py-mod docutils literal notranslate"><span class="pre">socket</span></code></a> module
 | ||
| transparently converts Unicode host names to ACE, so that applications need not
 | ||
| be concerned about converting host names themselves when they pass them to the
 | ||
| socket module. On top of that, modules that have host names as function
 | ||
| parameters, such as <a class="reference internal" href="http.client.html#module-http.client" title="http.client: HTTP and HTTPS protocol client (requires sockets)."><code class="xref py py-mod docutils literal notranslate"><span class="pre">http.client</span></code></a> and <a class="reference internal" href="ftplib.html#module-ftplib" title="ftplib: FTP protocol client (requires sockets)."><code class="xref py py-mod docutils literal notranslate"><span class="pre">ftplib</span></code></a>, accept Unicode host
 | ||
| names (<a class="reference internal" href="http.client.html#module-http.client" title="http.client: HTTP and HTTPS protocol client (requires sockets)."><code class="xref py py-mod docutils literal notranslate"><span class="pre">http.client</span></code></a> then also transparently sends an IDNA hostname in the
 | ||
| <em class="mailheader">Host</em> field if it sends that field at all).</p>
 | ||
| <p>When receiving host names from the wire (such as in reverse name lookup), no
 | ||
| automatic conversion to Unicode is performed: applications wishing to present
 | ||
| such host names to the user should decode them to Unicode.</p>
 | ||
| <p>The module <a class="reference internal" href="#module-encodings.idna" title="encodings.idna: Internationalized Domain Names implementation"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.idna</span></code></a> also implements the nameprep procedure, which
 | ||
| performs certain normalizations on host names, to achieve case-insensitivity of
 | ||
| international domain names, and to unify similar characters. The nameprep
 | ||
| functions can be used directly if desired.</p>
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="encodings.idna.nameprep">
 | ||
| <span class="sig-prename descclassname"><span class="pre">encodings.idna.</span></span><span class="sig-name descname"><span class="pre">nameprep</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">label</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#encodings.idna.nameprep" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Return the nameprepped version of <em>label</em>. The implementation currently assumes
 | ||
| query strings, so <code class="docutils literal notranslate"><span class="pre">AllowUnassigned</span></code> is true.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="encodings.idna.ToASCII">
 | ||
| <span class="sig-prename descclassname"><span class="pre">encodings.idna.</span></span><span class="sig-name descname"><span class="pre">ToASCII</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">label</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#encodings.idna.ToASCII" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Convert a label to ASCII, as specified in <span class="target" id="index-13"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3490.html"><strong>RFC 3490</strong></a>. <code class="docutils literal notranslate"><span class="pre">UseSTD3ASCIIRules</span></code> is
 | ||
| assumed to be false.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| <dl class="py function">
 | ||
| <dt class="sig sig-object py" id="encodings.idna.ToUnicode">
 | ||
| <span class="sig-prename descclassname"><span class="pre">encodings.idna.</span></span><span class="sig-name descname"><span class="pre">ToUnicode</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">label</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#encodings.idna.ToUnicode" title="Link to this definition">¶</a></dt>
 | ||
| <dd><p>Convert a label to Unicode, as specified in <span class="target" id="index-14"></span><a class="rfc reference external" href="https://datatracker.ietf.org/doc/html/rfc3490.html"><strong>RFC 3490</strong></a>.</p>
 | ||
| </dd></dl>
 | ||
| 
 | ||
| </section>
 | ||
| <section id="module-encodings.mbcs">
 | ||
| <span id="encodings-mbcs-windows-ansi-codepage"></span><h2><a class="reference internal" href="#module-encodings.mbcs" title="encodings.mbcs: Windows ANSI codepage"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.mbcs</span></code></a> — Windows ANSI codepage<a class="headerlink" href="#module-encodings.mbcs" title="Link to this heading">¶</a></h2>
 | ||
| <p>This module implements the ANSI codepage (CP_ACP).</p>
 | ||
| <div class="availability docutils container">
 | ||
| <p><a class="reference internal" href="intro.html#availability"><span class="std std-ref">Availability</span></a>: Windows.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.2: </span>Before 3.2, the <em>errors</em> argument was ignored; <code class="docutils literal notranslate"><span class="pre">'replace'</span></code> was always used
 | ||
| to encode, and <code class="docutils literal notranslate"><span class="pre">'ignore'</span></code> to decode.</p>
 | ||
| </div>
 | ||
| <div class="versionchanged">
 | ||
| <p><span class="versionmodified changed">Changed in version 3.3: </span>Support any error handler.</p>
 | ||
| </div>
 | ||
| </section>
 | ||
| <section id="module-encodings.utf_8_sig">
 | ||
| <span id="encodings-utf-8-sig-utf-8-codec-with-bom-signature"></span><h2><a class="reference internal" href="#module-encodings.utf_8_sig" title="encodings.utf_8_sig: UTF-8 codec with BOM signature"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.utf_8_sig</span></code></a> — UTF-8 codec with BOM signature<a class="headerlink" href="#module-encodings.utf_8_sig" title="Link to this heading">¶</a></h2>
 | ||
| <p>This module implements a variant of the UTF-8 codec. On encoding, a UTF-8 encoded
 | ||
| BOM will be prepended to the UTF-8 encoded bytes. For the stateful encoder this
 | ||
| is only done once (on the first write to the byte stream). On decoding, an
 | ||
| optional UTF-8 encoded BOM at the start of the data will be skipped.</p>
 | ||
| </section>
 | ||
| </section>
 | ||
| 
 | ||
| 
 | ||
|             <div class="clearer"></div>
 | ||
|           </div>
 | ||
|         </div>
 | ||
|       </div>
 | ||
|       <div class="sphinxsidebar" role="navigation" aria-label="Main">
 | ||
|         <div class="sphinxsidebarwrapper">
 | ||
|   <div>
 | ||
|     <h3><a href="../contents.html">Table of Contents</a></h3>
 | ||
|     <ul>
 | ||
| <li><a class="reference internal" href="#"><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code> — Codec registry and base classes</a><ul>
 | ||
| <li><a class="reference internal" href="#codec-base-classes">Codec Base Classes</a><ul>
 | ||
| <li><a class="reference internal" href="#error-handlers">Error Handlers</a></li>
 | ||
| <li><a class="reference internal" href="#stateless-encoding-and-decoding">Stateless Encoding and Decoding</a></li>
 | ||
| <li><a class="reference internal" href="#incremental-encoding-and-decoding">Incremental Encoding and Decoding</a><ul>
 | ||
| <li><a class="reference internal" href="#incrementalencoder-objects">IncrementalEncoder Objects</a></li>
 | ||
| <li><a class="reference internal" href="#incrementaldecoder-objects">IncrementalDecoder Objects</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#stream-encoding-and-decoding">Stream Encoding and Decoding</a><ul>
 | ||
| <li><a class="reference internal" href="#streamwriter-objects">StreamWriter Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamreader-objects">StreamReader Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamreaderwriter-objects">StreamReaderWriter Objects</a></li>
 | ||
| <li><a class="reference internal" href="#streamrecoder-objects">StreamRecoder Objects</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#encodings-and-unicode">Encodings and Unicode</a></li>
 | ||
| <li><a class="reference internal" href="#standard-encodings">Standard Encodings</a></li>
 | ||
| <li><a class="reference internal" href="#python-specific-encodings">Python Specific Encodings</a><ul>
 | ||
| <li><a class="reference internal" href="#text-encodings">Text Encodings</a></li>
 | ||
| <li><a class="reference internal" href="#binary-transforms">Binary Transforms</a></li>
 | ||
| <li><a class="reference internal" href="#text-transforms">Text Transforms</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| <li><a class="reference internal" href="#module-encodings.idna"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.idna</span></code> — Internationalized Domain Names in Applications</a></li>
 | ||
| <li><a class="reference internal" href="#module-encodings.mbcs"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.mbcs</span></code> — Windows ANSI codepage</a></li>
 | ||
| <li><a class="reference internal" href="#module-encodings.utf_8_sig"><code class="xref py py-mod docutils literal notranslate"><span class="pre">encodings.utf_8_sig</span></code> — UTF-8 codec with BOM signature</a></li>
 | ||
| </ul>
 | ||
| </li>
 | ||
| </ul>
 | ||
| 
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Previous topic</h4>
 | ||
|     <p class="topless"><a href="struct.html"
 | ||
|                           title="previous chapter"><code class="xref py py-mod docutils literal notranslate"><span class="pre">struct</span></code> — Interpret bytes as packed binary data</a></p>
 | ||
|   </div>
 | ||
|   <div>
 | ||
|     <h4>Next topic</h4>
 | ||
|     <p class="topless"><a href="datatypes.html"
 | ||
|                           title="next chapter">Data Types</a></p>
 | ||
|   </div>
 | ||
|   <div role="note" aria-label="source link">
 | ||
|     <h3>This Page</h3>
 | ||
|     <ul class="this-page-menu">
 | ||
|       <li><a href="../bugs.html">Report a Bug</a></li>
 | ||
|       <li>
 | ||
|         <a href="https://github.com/python/cpython/blob/main/Doc/library/codecs.rst"
 | ||
|             rel="nofollow">Show Source
 | ||
|         </a>
 | ||
|       </li>
 | ||
|     </ul>
 | ||
|   </div>
 | ||
|         </div>
 | ||
| <div id="sidebarbutton" title="Collapse sidebar">
 | ||
| <span>«</span>
 | ||
| </div>
 | ||
| 
 | ||
|       </div>
 | ||
|       <div class="clearer"></div>
 | ||
|     </div>  
 | ||
|     <div class="related" role="navigation" aria-label="Related">
 | ||
|       <h3>Navigation</h3>
 | ||
|       <ul>
 | ||
|         <li class="right" style="margin-right: 10px">
 | ||
|           <a href="../genindex.html" title="General Index"
 | ||
|              >index</a></li>
 | ||
|         <li class="right" >
 | ||
|           <a href="../py-modindex.html" title="Python Module Index"
 | ||
|              >modules</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="datatypes.html" title="Data Types"
 | ||
|              >next</a> |</li>
 | ||
|         <li class="right" >
 | ||
|           <a href="struct.html" title="struct — Interpret bytes as packed binary data"
 | ||
|              >previous</a> |</li>
 | ||
| 
 | ||
|           <li><img src="../_static/py.svg" alt="Python logo" style="vertical-align: middle; margin-top: -1px"/></li>
 | ||
|           <li><a href="https://www.python.org/">Python</a> »</li>
 | ||
|           <li class="switchers">
 | ||
|             <div class="language_switcher_placeholder"></div>
 | ||
|             <div class="version_switcher_placeholder"></div>
 | ||
|           </li>
 | ||
|           <li>
 | ||
|               
 | ||
|           </li>
 | ||
|     <li id="cpython-language-and-version">
 | ||
|       <a href="../index.html">3.13.3 Documentation</a> »
 | ||
|     </li>
 | ||
| 
 | ||
|           <li class="nav-item nav-item-1"><a href="index.html" >The Python Standard Library</a> »</li>
 | ||
|           <li class="nav-item nav-item-2"><a href="binary.html" >Binary Data Services</a> »</li>
 | ||
|         <li class="nav-item nav-item-this"><a href=""><code class="xref py py-mod docutils literal notranslate"><span class="pre">codecs</span></code> — Codec registry and base classes</a></li>
 | ||
|                 <li class="right">
 | ||
|                     
 | ||
| 
 | ||
|     <div class="inline-search" role="search">
 | ||
|         <form class="inline-search" action="../search.html" method="get">
 | ||
|           <input placeholder="Quick search" aria-label="Quick search" type="search" name="q" id="search-box" />
 | ||
|           <input type="submit" value="Go" />
 | ||
|         </form>
 | ||
|     </div>
 | ||
|                      |
 | ||
|                 </li>
 | ||
|             <li class="right">
 | ||
| <label class="theme-selector-label">
 | ||
|     Theme
 | ||
|     <select class="theme-selector" oninput="activateTheme(this.value)">
 | ||
|         <option value="auto" selected>Auto</option>
 | ||
|         <option value="light">Light</option>
 | ||
|         <option value="dark">Dark</option>
 | ||
|     </select>
 | ||
| </label> |</li>
 | ||
|             
 | ||
|       </ul>
 | ||
|     </div>  
 | ||
|     <div class="footer">
 | ||
|     © 
 | ||
|       <a href="../copyright.html">
 | ||
|     
 | ||
|     Copyright
 | ||
|     
 | ||
|       </a>
 | ||
|      2001-2025, Python Software Foundation.
 | ||
|     <br />
 | ||
|     This page is licensed under the Python Software Foundation License Version 2.
 | ||
|     <br />
 | ||
|     Examples, recipes, and other code in the documentation are additionally licensed under the Zero Clause BSD License.
 | ||
|     <br />
 | ||
|     
 | ||
|       See <a href="/license.html">History and License</a> for more information.<br />
 | ||
|     
 | ||
|     
 | ||
|     <br />
 | ||
| 
 | ||
|     The Python Software Foundation is a non-profit corporation.
 | ||
| <a href="https://www.python.org/psf/donations/">Please donate.</a>
 | ||
| <br />
 | ||
|     <br />
 | ||
|       Last updated on Apr 08, 2025 (14:33 UTC).
 | ||
|     
 | ||
|       <a href="/bugs.html">Found a bug</a>?
 | ||
|     
 | ||
|     <br />
 | ||
| 
 | ||
|     Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 8.2.3.
 | ||
|     </div>
 | ||
| 
 | ||
|   </body>
 | ||
| </html> |