
    c-                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ  ee
          dedefd            Z ee
          dedefd            Z ee
          dedee         fd            Z ee
          dedefd            Z  ee
          dedefd            Z! ee
          dedefd            Z" ee
          dedefd            Z# ee
          dedefd            Z$ ee
          dedefd            Z% ee
          dedefd            Z&dedefdZ' ee
          dedefd            Z( ee
          dedefd            Z) ee
          dedefd            Z* ee
          dedefd            Z+ ee
          dedefd            Z, e e-e          
          dedefd            Z. ee
          dedefd            Z/dCd!e0d"e1dee         fd#Z2 ed$
          d%edefd&            Z3d!e0deee         e0f         fd'Z4d(edefd)Z5dDd+ed,edefd-Z6d.edee         fd/Z7d0ed1ede8fd2Z9d0ed1edefd3Z:d4ej;        d5fd%ed6e1d7eddfd8Z<	 dEd9e0d:ed;e=d<e1d=ed>ed?e0d@edAee         deeddf         fdBZ>dS )F    N)IncrementalDecoder)aliases)	lru_cache)findall)	GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize	characterreturnc                     	 t          j        |           }n# t          $ r Y dS w xY wd|v pd|v pd|v pd|v pd|v pd|v S )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEunicodedataname
ValueErrorr   descriptions     :/usr/lib/python3/dist-packages/charset_normalizer/utils.pyis_accentuatedr       s    &+I66   uu 	# 	';&	'[(	' {*	' +		'
 ;&    
%%c                     t          j        |           }|s| S |                    d          }t          t	          |d         d                    S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr+   &   sO    !/	::J !'',,Es58R  !!!    c                 f    t          |           }t          j                    D ]\  }}||v r|c S dS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger3   1   sO    
 YM!8!>!@!@  
II%% & 4r,   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFLATINr   r   s     r   is_latinr6   ?   sF    &+I66   uuk!!r!   c                 V    	 |                      d           n# t          $ r Y dS w xY wdS )NasciiFT)encodeUnicodeEncodeErrorr   s    r   is_asciir<   H   sE    !!!!   uu4s    
&&c                 d    t          j        |           }d|v rdS t          |           }|dS d|v S )NPTFPunctuationr   categoryr3   r   character_categorycharacter_ranges      r   is_punctuationrE   Q   sG    )29==
   t%29%=%=OuO++r,   c                 l    t          j        |           }d|v sd|v rdS t          |           }|dS d|v S )NSNTFFormsr@   rB   s      r   	is_symbolrJ   `   sR    )29==
   C+=$=$=t%29%=%=Ouo%%r,   c                 0    t          |           }|dS d|v S )NF	Emoticons)r3   )r   rD   s     r   is_emoticonrM   o   s%    %29%=%=Ou/))r,   c                 f    |                                  s| dv rdS t          j        |           }d|v S )N>      ｜+,;<>TZ)isspacer   rA   r   rC   s     r   is_separatorrX   y   sC     i+KKKt)29==$$$r,   c                 V    |                                  |                                 k    S N)islowerisupperr;   s    r   is_case_variabler]      s%    )"3"3"5"555r,   c                 6    t          j        |           }|dk    S )NCo)r   rA   rW   s     r   is_private_use_onlyr`      s    )29==%%r,   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFCJKr   r   character_names     r   is_cjkre      sH    $))44   uu N""r!   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHIRAGANAr   rc   s     r   is_hiraganarh      H    $))44   uu ''r!   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFKATAKANAr   rc   s     r   is_katakanarl      ri   r!   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHANGULr   rc   s     r   	is_hangulro      sH    $))44   uu ~%%r!   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFTHAIr   rc   s     r   is_thairr      sH    $))44   uu ^##r!   r1   c                 D     t           fdt          D                       S )Nc              3       K   | ]}|v V  	d S rZ    ).0keywordr1   s     r   	<genexpr>z-is_unicode_range_secondary.<locals>.<genexpr>   s(      TTw*$TTTTTTr,   )anyr   )r1   s   `r   is_unicode_range_secondaryrz      s'    TTTT4STTTTTTr,   c                 r    |                                  du o!|                                 du o| dk    o| dk    S )NFu   ﻿)rV   isprintabler;   s    r   is_unprintabler~      sR     	u$ 	"!!##u,	"	" !	r,      sequencesearch_zonec           	         t          | t                    st          t          |           }t	          t
          | dt          ||                                       dd                    }t          |          dk    rdS |D ][}|                                	                    dd          }t          j                    D ]\  }}||k    r|c c S ||k    r|c c S \dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr8   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r/   )r   r   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s    h&& x==G ',3w,,,-44WX4NN G
 7||qt% 
% 
%/5577??SII
 .5]__ 	% 	%)NM!333$$$$$$ 222$$$$$$ 3	% 4r,      r   c                     | dv p>t          t          j        d                    |                     j        t
                    S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r   )r   s    r   is_multi_byte_encodingr      sL    
  
 
  
 5 5d ; ;<<O#
 
r,   c                     t           D ]I}t           |         }t          |t                    r|g}|D ]}|                     |          r||fc c S  JdS )z9
    Identify and extract SIG/BOM in given sequence.
    )Nr,   )r   r   r   
startswith)r   iana_encodingmarksmarks       r   identify_sig_or_bomr     s    
 ( + ++9-+HeU## 	GE 	+ 	+D""4(( +$d******+	+ 9r,   r   c                 
    | dvS )N>   r   r   ru   )r   s    r   should_strip_sig_or_bomr     s     444r,   Tcp_namestrictc                     |                                                      dd          } t          j                    D ]\  }}| ||fv r|c S |r"t	          d                    |                     | S )Nr   r   z Unable to retrieve IANA for '{}')r   r   r   r/   r   r   )r   r   r   r   s       r   	iana_namer     s    mmoo%%c3//G
 *1 ! !%~}555     6  M;BB7KKLLLNr,   decoded_sequencec                     t                      }| D ])}t          |          }||                    |           *t          |          S rZ   )setr3   addlist)r   rangesr   rD   s       r   
range_scanr   +  sQ    uuF% $ $	)6y)A)A"

?####<<r,   iana_name_aiana_name_bc                    t          |           st          |          rdS t          j        d                    |                     j        }t          j        d                    |                    j        } |d          } |d          }d}t          d          D ]C}t          |g          }|                    |          |                    |          k    r|dz  }D|dz  S )	Ng        r   r   r   r      r      )r   r   r   r   r   ranger   r   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr   9  s   k** .D[.Q.Q s'k**   'k**    )y999D(y999D!"3ZZ ' '$aSzz;;}%%])C)CCC!Q&! 3&&r,   c                 2    | t           v o|t           |          v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   R  s%     	-- 	?1+>>r,   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringc                     t          j        |           }|                    |           t          j                    }|                    t          j        |                     |                    |           d S rZ   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlers        r   set_logging_handlerr   ]  sm     t$$F
OOE#%%G*=99:::
gr,   	sequencesr   offsets
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadc	              #     K   |r!|du r|D ]}	||	|	|z            }
|
s d S |
V  d S |D ]}	|	|z   }|t          |           dz   k    r| |	|	|z            }|r	|du r||z   }|                    ||rdnd          }
|ru|	dk    rot          |d          }|r]|
d |         |vrQt          |	|	dz
  d	          D ]<}| ||         }|r	|du r||z   }|                    |d          }
|
d |         |v r n=|
V  d S )
NF   r   r   r   r   r$      )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r   cut_sequence_chunksr   k  s       +0E99 	 	A#AJ$67E KKKK		 	  $	 $	AJI3y>>A---$QZ%78L# :(8E(A(A*\9 ''#8Fxxh (  E % "Q.1*b.A.A& $"5556oMM"1a!eR00 	" 	"'09'=/ F4D4M4M+6+EL , 3 3M( 3 S S !8"8!89_LL!E M KKKKI$	 $	r,   )r   )TrZ   )?r   r   r   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   r   r	   r
   r   r   _multibytecodecr   constantr   r   r   r   r   r   strboolr    r+   r3   r6   r<   rE   rJ   rM   rX   r]   r`   re   rh   rl   ro   rr   r   rz   r~   r   r(   r   r   r   r   r   r   floatr   r   INFOr   r   r   ru   r,   r   <module>r      s            % % % % % % % % % % % %             ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 7 7 7 7 7 7                *+++c d    ,+ *+++"S "S " " " ,+" *+++
S 
Xc] 
 
 
 ,+
 *+++" " " " " ,+" *+++     ,+ *+++,c ,d , , , ,+, *+++& & & & & ,+& *+++*3 *4 * * * ,+* *+++%C %D % % % ,+% *+++6 6 6 6 6 ,+6&3 &4 & & & & *+++#c #d # # # ,+# *+++(3 (4 ( ( ( ,+( *+++(3 (4 ( ( ( ,+( *+++& & & & & ,+& *+++$s $t $ $ $ ,+$ 33.//000U3 U4 U U U 10U *+++c d    ,+ U  QT    @ 3     (% E(3-2F,G    $53 54 5 5 5 5 s D C      c    's ' ' ' ' ' '2s       %D 
  
	   . &*7 777 7 	7
 7 7 7  7 c]7 sD$7 7 7 7 7 7r,   