o
     JAfðC  ã                   @   s¸   d Z ddlZddlZddlmZ dgZe d¡Ze d¡Ze d¡Z	e d¡Z
e d	¡Ze d
¡Ze d¡Ze d¡Ze d¡Ze dej¡Ze d
¡Ze d¡ZG dd„ dejƒZdS )zA parser for HTML and XHTML.é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]ú>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   sà   e Zd ZdZdZddœdd„Zdd„ Zd	d
„ Zdd„ ZdZ	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zd7dd„Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ Zd3d4„ Zd5d6„ ZdS )8r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptÚstyleT)Úconvert_charrefsc                C   s   || _ |  ¡  dS )zÆInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   Úreset)Úselfr   © r	   ú"/usr/lib/python3.10/html/parser.pyÚ__init__V   s   zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d| _tj | ¡ dS )z1Reset this instance.  Loses all unprocessed data.Ú z???N)ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_markupbaseÚ
ParserBaser   ©r   r	   r	   r
   r   _   s
   zHTMLParser.resetc                 C   s   | j | | _ |  d¡ dS )z‘Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   Úgoahead©r   Údatar	   r	   r
   Úfeedg   s   zHTMLParser.feedc                 C   s   |   d¡ dS )zHandle any buffered data.é   N)r   r   r	   r	   r
   Úclosep   s   zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)Ú_HTMLParser__starttag_textr   r	   r	   r
   Úget_starttag_textv   s   zHTMLParser.get_starttag_textc                 C   s$   |  ¡ | _t d| j tj¡| _d S )Nz</\s*%s\s*>)Úlowerr   ÚreÚcompileÚIr   )r   Úelemr	   r	   r
   Úset_cdata_modez   s   
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S ©N)r   r   r   r   r	   r	   r
   Úclear_cdata_mode~   s   
zHTMLParser.clear_cdata_modec                 C   s2  | j }d}t|ƒ}||k rá| jr;| js;| d|¡}|dk r:| dt||d ƒ¡}|dkr8t d¡ 	||¡s8n©|}n| j
 	||¡}|rI| ¡ }n| jrNn“|}||k ro| jrf| jsf|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||kr{nf|j}|d|ƒrt ||¡r|  |¡}	n>|d|ƒr›|  |¡}	n3|d|ƒr¦|  |¡}	n(|d|ƒr±|  |¡}	n|d	|ƒr¼|  |¡}	n|d
 |k rÌ|  d¡ |d
 }	nn|	dk r|s×n
| d|d
 ¡}	|	dk rô| d|d
 ¡}	|	dk ró|d
 }	n|	d
7 }	| jr| js|  t|||	… ƒ¡ n	|  |||	… ¡ |  ||	¡}nÁ|d|ƒrlt ||¡}|rO| ¡ dd… }
|  |
¡ | ¡ }	|d|	d
 ƒsH|	d
 }	|  ||	¡}q	d||d … v rk|  |||d … ¡ |  ||d ¡}nu|d|ƒrÝt ||¡}|rœ| d
¡}
|  |
¡ | ¡ }	|d|	d
 ƒs•|	d
 }	|  ||	¡}q	t ||¡}|rÇ|rÆ| ¡ ||d … krÆ| ¡ }	|	|kr¾|}	|  ||d
 ¡}n|d
 |k rÜ|  d¡ |  ||d
 ¡}nn||k s|r||k r| js| jr| js|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\s;]z</ú<!--z<?z<!r   r   z&#é   éÿÿÿÿú;)r   Úlenr   r   ÚfindÚrfindÚmaxr   r   Úsearchr   ÚstartÚhandle_datar   Z	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r   r>   r   ÚiÚnÚjZampposr5   r3   ÚkÚnamer	   r	   r
   r   …   sÆ   
ÿ€







€




™kzHTMLParser.goaheadc                 C   sž   | j }|||d … dkr|  |¡S |||d … dkr!|  |¡S |||d …  ¡ dkrJ| d|d ¡}|dkr;dS |  ||d	 |… ¡ |d
 S |  |¡S )Né   r(   é   z<![é	   z	<!doctyper   r*   r)   r   )r   r8   Zparse_marked_sectionr   r-   Úhandle_declÚparse_bogus_comment)r   rB   r   Úgtposr	   r	   r
   r:   ÿ   s   


z!HTMLParser.parse_html_declarationr   c                 C   sD   | j }| d|d ¡}|dkrdS |r|  ||d |… ¡ |d S )Nr   r)   r*   r   )r   r-   Úhandle_comment)r   rB   Úreportr   Úposr	   r	   r
   rK     s   zHTMLParser.parse_bogus_commentc                 C   sH   | j }t ||d ¡}|sdS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr)   r*   )r   Úpicloser0   r1   Ú	handle_pir>   )r   rB   r   r5   rD   r	   r	   r
   r9      s   zHTMLParser.parse_pic                 C   sØ  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}| ¡ }| d¡ ¡  | _}||k rt	 ||¡}|s=nS| ddd¡\}	}
}|
sLd }n-|d d… d  kr^|dd … kssn |d d… d  krq|dd … kryn n|dd… }|rt
|ƒ}| |	 ¡ |f¡ | ¡ }||k s4|||…  ¡ }|dvrÍ|  ¡ \}}d	| j v r»|| j  d	¡ }t| j ƒ| j  d	¡ }n|t| j ƒ }|  |||… ¡ |S | d
¡rÚ|  ||¡ |S |  ||¡ || jv rê|  |¡ |S )Nr   r   r)   rH   ú'r*   ú")r   ú/>Ú
rT   )r   Úcheck_for_whole_start_tagr   Útagfind_tolerantr5   r>   r<   r   r   Úattrfind_tolerantr   ÚappendÚstripZgetposÚcountr,   r.   r2   ÚendswithÚhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr"   )r   rB   Úendposr   Úattrsr5   rE   ÚtagÚmÚattrnameÚrestZ	attrvaluer>   ÚlinenoÚoffsetr	   r	   r
   r6   ,  sV   
&(ó

ÿ
ý

zHTMLParser.parse_starttagc                 C   s²   | j }t ||¡}|rU| ¡ }|||d … }|dkr|d S |dkr?| d|¡r-|d S | d|¡r5dS ||kr;|S |d S |dkrEdS |dv rKdS ||krQ|S |d S td	ƒ‚)
Nr   r   ú/rT   r)   r*   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   Úlocatestarttagend_tolerantr5   r>   r3   ÚAssertionError)r   rB   r   rc   rD   Únextr	   r	   r
   rV   _  s.   z$HTMLParser.check_for_whole_start_tagc                 C   s  | j }t ||d ¡}|sdS | ¡ }t ||¡}|s`| jd ur+|  |||… ¡ |S t ||d ¡}|sH|||d … dkrC|d S |  	|¡S | 
d¡ ¡ }| d| ¡ ¡}|  |¡ |d S | 
d¡ ¡ }| jd ur||| jkr||  |||… ¡ |S |  |¡ |  ¡  |S )Nr   r*   r)   rH   z</>r   )r   Ú	endendtagr0   r>   Ú
endtagfindr5   r   r2   rW   rK   r<   r   r-   Úhandle_endtagr$   )r   rB   r   r5   rL   Z	namematchZtagnamer!   r	   r	   r
   r7     s6   





zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S r#   )r^   rn   ©r   rb   ra   r	   r	   r
   r]   ©  s   zHTMLParser.handle_startendtagc                 C   ó   d S r#   r	   ro   r	   r	   r
   r^   ®  ó   zHTMLParser.handle_starttagc                 C   rp   r#   r	   )r   rb   r	   r	   r
   rn   ²  rq   zHTMLParser.handle_endtagc                 C   rp   r#   r	   ©r   rF   r	   r	   r
   r=   ¶  rq   zHTMLParser.handle_charrefc                 C   rp   r#   r	   rr   r	   r	   r
   r@   º  rq   zHTMLParser.handle_entityrefc                 C   rp   r#   r	   r   r	   r	   r
   r2   ¾  rq   zHTMLParser.handle_datac                 C   rp   r#   r	   r   r	   r	   r
   rM   Â  rq   zHTMLParser.handle_commentc                 C   rp   r#   r	   )r   Zdeclr	   r	   r
   rJ   Æ  rq   zHTMLParser.handle_declc                 C   rp   r#   r	   r   r	   r	   r
   rQ   Ê  rq   zHTMLParser.handle_pic                 C   rp   r#   r	   r   r	   r	   r
   Úunknown_declÍ  rq   zHTMLParser.unknown_decl)r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r_   r   r   r   r   r   r   r"   r$   r   r:   rK   r9   r6   rV   r7   r]   r^   rn   r=   r@   r2   rM   rJ   rQ   rs   r	   r	   r	   r
   r   >   s8    		z
3"()rw   r   r   Zhtmlr   Ú__all__r   r   rA   r?   r;   r4   rP   ZcommentcloserW   rX   ÚVERBOSEri   rl   rm   r   r   r	   r	   r	   r
   Ú<module>   s,    








ÿò

