<?php
include_once $_SERVER['DOCUMENT_ROOT'] . '/include/shared-manual.inc';
$TOC = array();
$TOC_DEPRECATED = array();
$PARENTS = array();
include_once dirname(__FILE__) ."/toc/ref.mbstring.inc";
$setup = array (
  'home' => 
  array (
    0 => 'index.php',
    1 => 'PHP Manual',
  ),
  'head' => 
  array (
    0 => 'UTF-8',
    1 => 'en',
  ),
  'this' => 
  array (
    0 => 'function.mb-detect-encoding.php',
    1 => 'mb_detect_encoding',
    2 => 'Detect character encoding',
  ),
  'up' => 
  array (
    0 => 'ref.mbstring.php',
    1 => 'Multibyte String Functions',
  ),
  'prev' => 
  array (
    0 => 'function.mb-decode-numericentity.php',
    1 => 'mb_decode_numericentity',
  ),
  'next' => 
  array (
    0 => 'function.mb-detect-order.php',
    1 => 'mb_detect_order',
  ),
  'alternatives' => 
  array (
  ),
  'source' => 
  array (
    'lang' => 'en',
    'path' => 'reference/mbstring/functions/mb-detect-encoding.xml',
  ),
  'history' => 
  array (
  ),
);
$setup["toc"] = $TOC;
$setup["toc_deprecated"] = $TOC_DEPRECATED;
$setup["parents"] = $PARENTS;
manual_setup($setup);

contributors($setup);

?>
<div id="function.mb-detect-encoding" class="refentry">
 <div class="refnamediv">
  <h1 class="refname">mb_detect_encoding</h1>
  <p class="verinfo">(PHP 4 &gt;= 4.0.6, PHP 5, PHP 7, PHP 8)</p><p class="refpurpose"><span class="refname">mb_detect_encoding</span> &mdash; <span class="dc-title">Detect character encoding</span></p>

 </div>

 <div class="refsect1 description" id="refsect1-function.mb-detect-encoding-description">
  <h3 class="title">Description</h3>
  <div class="methodsynopsis dc-description">
   <span class="methodname"><strong>mb_detect_encoding</strong></span>(<span class="methodparam"><span class="type"><a href="language.types.string.php" class="type string">string</a></span> <code class="parameter">$string</code></span>, <span class="methodparam"><span class="type"><span class="type"><a href="language.types.array.php" class="type array">array</a></span>|<span class="type"><a href="language.types.string.php" class="type string">string</a></span>|<span class="type"><a href="language.types.null.php" class="type null">null</a></span></span> <code class="parameter">$encodings</code><span class="initializer"> = <strong><code><a href="reserved.constants.php#constant.null">null</a></code></strong></span></span>, <span class="methodparam"><span class="type"><a href="language.types.boolean.php" class="type bool">bool</a></span> <code class="parameter">$strict</code><span class="initializer"> = <strong><code><a href="reserved.constants.php#constant.false">false</a></code></strong></span></span>): <span class="type"><span class="type"><a href="language.types.string.php" class="type string">string</a></span>|<span class="type"><a href="language.types.singleton.php" class="type false">false</a></span></span></div>

  <p class="para rdfs-comment">
   Detects the most likely character encoding for <span class="type"><a href="language.types.string.php" class="type string">string</a></span> <code class="parameter">string</code>
   from a list of candidates.
  </p>
  <p class="para">
   As of PHP 8.1 this function uses heuristics to detect which of the valid text encodings in the specified
   list is most likely to be correct and may not be in order of <code class="parameter">encodings</code> provided.
  </p>
  <p class="para">
   Automatic detection of the intended character encoding can never be entirely reliable;
   without some additional information, it is similar to decoding an encrypted string
   without the key. It is always preferable to use an indication of character encoding
   stored or transmitted with the data, such as a &quot;Content-Type&quot; HTTP header.
  </p>
  <p class="para">
   This function is most useful with multibyte encodings, where not all sequences of
   bytes form a valid string. If the input string contains such a sequence, that
   encoding will be rejected.
  </p>

  <div class="warning"><strong class="warning">Warning</strong>
   <h1 class="title">The result is not accurate</h1>
   <p class="para">
    The name of this function is misleading, it performs &quot;guessing&quot; rather than &quot;detection&quot;.
   </p>
   <p class="para">
    The guesses are far from accurate, and therefore you cannot use this function to accurately
    detect the correct character encoding.
   </p>
  </div>
 </div>


 <div class="refsect1 parameters" id="refsect1-function.mb-detect-encoding-parameters">
  <h3 class="title">Parameters</h3>
  <p class="para">
   <dl>
    
     <dt><code class="parameter">string</code></dt>
     <dd>
      <p class="para">
       The <span class="type"><a href="language.types.string.php" class="type string">string</a></span> being inspected.
      </p>
     </dd>
    
    
     <dt><code class="parameter">encodings</code></dt>
     <dd>
      <p class="para">
       A list of character encodings to try. The list may be specified as
       an array of strings, or a single string separated by commas.
      </p>
      <p class="para">
       If <code class="parameter">encodings</code> is omitted or <strong><code><a href="reserved.constants.php#constant.null">null</a></code></strong>,
       the current detect_order (set with the <a href="mbstring.configuration.php#ini.mbstring.detect-order" class="link">
       mbstring.detect_order</a> configuration option, or <span class="function"><a href="function.mb-detect-order.php" class="function">mb_detect_order()</a></span>
       function) will be used.
      </p>
     </dd>
    
    
     <dt><code class="parameter">strict</code></dt>
     <dd>
      <p class="para">
       Controls the behaviour when <code class="parameter">string</code>
       is not valid in any of the listed <code class="parameter">encodings</code>.
       If <code class="parameter">strict</code> is set to <strong><code><a href="reserved.constants.php#constant.false">false</a></code></strong>, the closest matching
       encoding will be returned; if <code class="parameter">strict</code> is set to <strong><code><a href="reserved.constants.php#constant.true">true</a></code></strong>,
       <strong><code><a href="reserved.constants.php#constant.false">false</a></code></strong> will be returned.
      </p>
      <p class="para">
       The default value for <code class="parameter">strict</code> can be set
       with the <a href="mbstring.configuration.php#ini.mbstring.strict-detection" class="link">
       mbstring.strict_detection</a> configuration option.
      </p>
     </dd>
    
   </dl>
  </p>
 </div>


 <div class="refsect1 returnvalues" id="refsect1-function.mb-detect-encoding-returnvalues">
  <h3 class="title">Return Values</h3>
  <p class="para">
   The detected character encoding, or <strong><code><a href="reserved.constants.php#constant.false">false</a></code></strong> if the string is not valid
   in any of the listed encodings.
  </p>
 </div>


 <div class="refsect1 changelog" id="refsect1-function.mb-detect-encoding-changelog">
  <h3 class="title">Changelog</h3>
  <table class="doctable informaltable">
   
    <thead>
     <tr>
      <th>Version</th>
      <th>Description</th>
     </tr>

    </thead>

    <tbody class="tbody">
     <tr>
      <td>8.2.0</td>
      <td>
       <span class="function"><strong>mb_detect_encoding()</strong></span> will no longer return
       the following non text encodings:
       <code class="literal">&quot;Base64&quot;</code>, <code class="literal">&quot;QPrint&quot;</code>,
       <code class="literal">&quot;UUencode&quot;</code>, <code class="literal">&quot;HTML entities&quot;</code>,
       <code class="literal">&quot;7 bit&quot;</code> and <code class="literal">&quot;8 bit&quot;</code>.
      </td>
     </tr>

    </tbody>
   
  </table>

 </div>


 <div class="refsect1 examples" id="refsect1-function.mb-detect-encoding-examples">
  <h3 class="title">Examples</h3>
  <p class="para">
   <div class="example" id="example-1">
    <p><strong>Example #1 <span class="function"><strong>mb_detect_encoding()</strong></span> example</strong></p>
    <div class="example-contents">
<div class="annotation-interactive phpcode"><code><span style="color: #000000"><span style="color: #0000BB">&lt;?php<br /><br />$str </span><span style="color: #007700">= </span><span style="color: #DD0000">"\x95\xB6\x8E\x9A\x83\x52\x81\x5B\x83\x68"</span><span style="color: #007700">;<br /><br /></span><span style="color: #FF8000">// Detect character encoding with current detect_order<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">));<br /><br /></span><span style="color: #FF8000">// "auto" is expanded according to mbstring.language<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, </span><span style="color: #DD0000">"auto"</span><span style="color: #007700">));<br /><br /></span><span style="color: #FF8000">// Specify "encodings" parameter by list separated by comma<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, </span><span style="color: #DD0000">"JIS, eucjp-win, sjis-win"</span><span style="color: #007700">));<br /><br /></span><span style="color: #FF8000">// Use array to specify "encodings" parameter<br /></span><span style="color: #0000BB">$encodings </span><span style="color: #007700">= [<br />  </span><span style="color: #DD0000">"ASCII"</span><span style="color: #007700">,<br />  </span><span style="color: #DD0000">"JIS"</span><span style="color: #007700">,<br />  </span><span style="color: #DD0000">"EUC-JP"<br /></span><span style="color: #007700">];<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, </span><span style="color: #0000BB">$encodings</span><span style="color: #007700">));<br /></span><span style="color: #0000BB">?&gt;</span></span></code></div>
    </div>

    <div class="example-contents"><p>The above example will output:</p></div>
    <div class="example-contents screen">
<div class="annotation-interactive examplescode"><pre class="examplescode">string(5) &quot;ASCII&quot;
string(5) &quot;ASCII&quot;
string(8) &quot;SJIS-win&quot;
string(5) &quot;ASCII&quot;</pre>
</div>
    </div>
   </div>
  </p>
  <p class="para">
   <div class="example" id="example-2">
    <p><strong>Example #2 Effect of <code class="parameter">strict</code> parameter</strong></p>
    <div class="example-contents">
     <div class="annotation-interactive phpcode"><code><span style="color: #000000"><span style="color: #0000BB">&lt;?php<br /></span><span style="color: #FF8000">// 'áéóú' encoded in ISO-8859-1<br /></span><span style="color: #0000BB">$str </span><span style="color: #007700">= </span><span style="color: #DD0000">"\xE1\xE9\xF3\xFA"</span><span style="color: #007700">;<br /><br /></span><span style="color: #FF8000">// The string is not valid ASCII or UTF-8, but UTF-8 is considered a closer match<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ASCII'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">], </span><span style="color: #0000BB">false</span><span style="color: #007700">));<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ASCII'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">], </span><span style="color: #0000BB">true</span><span style="color: #007700">));<br /><br /></span><span style="color: #FF8000">// If a valid encoding is found, the strict parameter does not change the result<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ASCII'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-1'</span><span style="color: #007700">], </span><span style="color: #0000BB">false</span><span style="color: #007700">));<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ASCII'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-1'</span><span style="color: #007700">], </span><span style="color: #0000BB">true</span><span style="color: #007700">));<br /></span><span style="color: #0000BB">?&gt;</span></span></code></div>
    </div>

    <div class="example-contents"><p>The above example will output:</p></div>
    <div class="example-contents screen">
<div class="annotation-interactive examplescode"><pre class="examplescode">string(5) &quot;UTF-8&quot;
bool(false)
string(10) &quot;ISO-8859-1&quot;
string(10) &quot;ISO-8859-1&quot;</pre>
</div>
    </div>
   </div>
  </p>
  <p class="para">
   In some cases, the same sequence of bytes may form a valid string in multiple
   character encodings, and it is impossible to know which interpretation was
   intended. For instance, among many others, the byte sequence &quot;\xC4\xA2&quot; could be:
  </p>
  <p class="para">
   <ul class="simplelist">
    <li>
     "Ä¢" (U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS followed by U+00A2 CENT SIGN)
     encoded in any of ISO-8859-1, ISO-8859-15, or Windows-1252
    </li>
    <li>
     "ФЂ" (U+0424 CYRILLIC CAPITAL LETTER EF followed by U+0402 CYRILLIC CAPITAL LETTER
     DJE) encoded in ISO-8859-5
    </li>
    <li>
     "Ģ" (U+0122 LATIN CAPITAL LETTER G WITH CEDILLA) encoded in UTF-8
    </li>
   </ul>
  </p>
  <p class="para">
   <div class="example" id="example-3">
    <p><strong>Example #3 Effect of order when multiple encodings match</strong></p>
    <div class="example-contents">
     <div class="annotation-interactive phpcode"><code><span style="color: #000000"><span style="color: #0000BB">&lt;?php<br />$str </span><span style="color: #007700">= </span><span style="color: #DD0000">"\xC4\xA2"</span><span style="color: #007700">;<br /><br /></span><span style="color: #FF8000">// The string is valid in all three encodings, but the first one listed may not always be the one returned<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">]));<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-1'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-5'</span><span style="color: #007700">])); </span><span style="color: #FF8000">// as of php8.1 this returns ISO-8859-1 instead of UTF-8<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ISO-8859-1'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-5'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">]));<br /></span><span style="color: #0000BB">var_dump</span><span style="color: #007700">(</span><span style="color: #0000BB">mb_detect_encoding</span><span style="color: #007700">(</span><span style="color: #0000BB">$str</span><span style="color: #007700">, [</span><span style="color: #DD0000">'ISO-8859-5'</span><span style="color: #007700">, </span><span style="color: #DD0000">'UTF-8'</span><span style="color: #007700">, </span><span style="color: #DD0000">'ISO-8859-1'</span><span style="color: #007700">]));<br /></span><span style="color: #0000BB">?&gt;</span></span></code></div>
    </div>

    <div class="example-contents"><p>The above example will output:</p></div>
    <div class="example-contents screen">
<div class="annotation-interactive examplescode"><pre class="examplescode">string(5) &quot;UTF-8&quot;
string(10) &quot;ISO-8859-1&quot;
string(10) &quot;ISO-8859-1&quot;
string(10) &quot;ISO-8859-5&quot;</pre>
</div>
    </div>
   </div>
  </p>
 </div>


 <div class="refsect1 seealso" id="refsect1-function.mb-detect-encoding-seealso">
  <h3 class="title">See Also</h3>
  <p class="para">
   <ul class="simplelist">
    <li><span class="function"><a href="function.mb-detect-order.php" class="function" rel="rdfs-seeAlso">mb_detect_order()</a> - Set/Get character encoding detection order</span></li>
   </ul>
  </p>
 </div>


</div><?php manual_footer($setup); ?>