
    b/                         d dl Z d dlZd dlZ	 	 d dlZn# e$ r d dlZY nw xY wd Zn# e$ r dZdZY nw xY wddlmZm	Z	m
Z
 dZdZdZdZd	Zd
Z ej        d          Z ej        d          Zd ZdS )    Nc                 :    t          j        |           d         pdS )Nencoding )chardetdetect)datas    6/usr/lib/python3/dist-packages/feedparser/encodings.pylazy_chardet_encodingr
   *   s    ~d##J/525       )CharacterEncodingOverrideCharacterEncodingUnknownNonXMLContentTypes   Los    < ?s   < ? s      <s   <   z  z^<\?xml[^>]*?>s#   ^<\?.*encoding=[\'"](.*?)[\'"].*\?>c                 (	   d}d}|dd         t           j        k    rd}|dd         }n(|dd         t           j        k    rd}|dd         }n|dd         t           j        k    r |dd         t          k    rd}|dd         }n|dd         t           j        k    r |dd         t          k    rd}|dd         }n|dd	         t           j        k    rd
}|d	d         }nm|dd         t          k    rd}nW|dd         t          k    rd}nA|dd         t          k    rd}n+|dd         t          k    rd}n|dd         t          k    rd}|}	 |r(|                    |                              d
          }t                              |          }n# t           t"          f$ r d}Y nw xY w|rG|                                d                             d
                                          }|r|dv r|}|                     d          pd}t+          j        |          \  }}|                    dd                              dd          }	t1          |	t2                    r|	                    d
d          }	d}
d}d}||v s*|                    d          r|                    d          r	d}
|	p|pd
}n^||v s*|                    d          r|                    d          rd}
|	pd}n)|                    d          r|	pd}n| r	d| vr|pd}n|pd
}|                                dk    rd}|                                dk    rd}d}| r#|
s!d| v rd| d         z  }nd}t9          |          }d}g }|||t:          d
ddfD ]}t=          |          r ||          }|s||v r$|                    |           	 |                    |          }d}d }t@          !                    |          rt@          "                    ||          }n|d!z   |z   }|                    d
          } n# t           t"          f$ r Y w xY w|stG          d"|d#|d$z             }d}n||k    rtI          d%|d&|          }|}||d'<   |r
d(|d)<   ||d*<   |S )+z|Detect and convert the character encoding to UTF-8.

    http_headers is a dictionary
    data is a raw string (not Unicode)r   N   zutf-32bezutf-32le   zutf-16bezutf-16le   zutf-8cp037r   )u16zutf-16utf16utf_16u32zutf-32utf32utf_32ziso-10646-ucs-2ziso-10646-ucs-4csucs4	csunicodezucs-2zucs-4zcontent-typecharset'ignore)zapplication/xmlzapplication/xml-dtdz&application/xml-external-parsed-entity)ztext/xmlztext/xml-external-parsed-entityzapplication/z+xmlr   ztext/zus-asciiz
iso-8859-1gb2312gb18030z%s is not an XML media typezno Content-type specifiedzwindows-1252z
iso-8859-2z&<?xml version='1.0' encoding='utf-8'?>
z#document encoding unknown, I tried z, z8, utf-8, windows-1252, and iso-8859-2 but nothing workedzdocument declared as z, but parsed as r   Tbozobozo_exception)%codecsBOM_UTF32_BEBOM_UTF32_LEBOM_UTF16_BE
ZERO_BYTESBOM_UTF16_LEBOM_UTF8EBCDIC_MARKERUTF16BE_MARKERUTF16LE_MARKERUTF32BE_MARKERUTF32LE_MARKERdecodeencodeRE_XML_PI_ENCODINGmatchUnicodeDecodeErrorLookupErrorgroupslowergetcgiparse_headerreplace
isinstancebytes
startswithendswithr   r
   callableappendRE_XML_DECLARATIONsearchsubr   r   )http_headersr   resultbom_encodingxml_encodingtempdataxml_encoding_matchhttp_content_typeparamshttp_encodingacceptable_content_typeapplication_content_typestext_content_typesrfc3023_encodingerrormsgknown_encodingtried_encodingsproposed_encodingnew_declarations                       r	   convert_to_utf8rY   G   s"   ^ LL BQBx6&&&!ABBx	bqbV(	(	(!ABBx	bqbV(	(	(T!A#Y*-D-D!ABBx	bqbV(	(	(T!A#Y*-D-D!ABBx	bqbV_	$	$ABBx	bqb]	"	"	bqb^	#	#!	bqb^	#	#!	bqb^	#	#!	bqb^	#	#!H	@ 	A{{<0077@@H 055h?? , " " " "	"  	()002215<<WEEKKMM 	(\ .
 
 
 (L %((88>B # 01B C CvJJy"--55c2>>M-'' @%,,Wh??!KH!:::%00@@ ; *226:: ; #$(CLCG!333%0099 4 *226:: 4 #$(6J		%	%g	.	. 3(6J	 3.<<'7<'27 8++$x''  E '4 '\))/,~2NNCC-C!#&& NO.l3Wnl\  %&& 	8 1 1$ 7 7  	//0111	;;011D NJO!((.. 5)--otDD&-4;;w''DE #K0 	 	 	D	  
-(1|||--. . 	.	.	.))0023 3 -)F: )v#( Ks$   *F F10F1P00QQ)r:   r%   recchardetr   ImportErrorr
   
exceptionsr   r   r   r,   r-   r.   r/   r0   r)   compilerC   r3   rY    r   r	   <module>r`      s?  : 


  				
6"""""   6 6 6 6	  ! ! !G !          $$$$$
  RZ 122   RZ GHH S S S S Ss     ' 	 '  ' 	33