o
    ds                  	   @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlmZ d dlZd dlZd dlmZ d dlm  mZ d dlZd d	lmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z% d d
l&m'Z'm(Z( d dl)m*Z* d dl+Zd dl+m,Z, ej-g dddd Z.dd Z/e0de0ddd Z1dd Z2e0de0de0ddd Z3ej45dej6de0de0dgdej6de0ddgG dd dZ7dS )     )partial)reload)BytesIOStringION)Path)Iterator)URLError)is_platform_windows)NA	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csvto_datetime)ArrowStringArrayStringArray)file_path_to_url	read_html)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 C   s   |ddd| j S )z6Parametrized fixture for HTML encoding test filenames.iodatahtml_encoding)param)requestdatapath r   ^/var/www/html/visualizacion-main/env/lib/python3.10/site-packages/pandas/tests/io/test_html.pyhtml_encoding_file*   s   
r    c                 O   s   t | t |ksJ dt |  dt | d}ttdd | |}|s(J |t| |D ]\}}tj||g|R i | |jrEJ dq-d S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 S   s   t | to	t |tS N)
isinstancer   )xyr   r   r   <lambda>@   s    z(assert_framelist_equal.<locals>.<lambda>zframes are both empty)lenallmapziptmassert_frame_equalempty)list1list2argskwargsmsgboth_framesframe_iframe_jr   r   r   assert_framelist_equal7   s(   r5   bs4html5libc                 C   s`   dd l }| |dd tjtdd t|dddd	d
d W d    d S 1 s)w   Y  d S )Nr   __version__z4.2zPandas requires versionmatchr   r   html	spam.htmlr6   flavor)r6   setattrpytestraisesImportErrorr   )monkeypatchr   r6   r   r   r   test_bs4_version_failsK   s
   "rD   c                  C   sV   d} d}d| d }t jt|d t| d|d W d    d S 1 s$w   Y  d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr9   googler:   r>   r@   rA   
ValueErrorr   )urlr>   r1   r   r   r   test_invalid_flavorU   s   "rJ   lxmlc                 C   s<   | dddd}t |ddgd}t |ddgd}t|| d S )	Nr   r   r;   valid_markup.htmlr   rK   )	index_colr>   r6   r   r5   )r   filenamedfs_lxmldfs_bs4r   r   r   test_same_ordering^   s   rR   r>   )marksc                   @   s   e Zd Zejdd Zejdd Zejdddd Zd	d
 Zdd Z	ej
jejddddd Zej
jejddddd Zej
jdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Z d6d7 Z!d8d9 Z"d:d; Z#d<d= Z$ej
jejd>d? Z%ej
jej
jejd@dA Z&ej
jdBdC Z'ej
jdDdE Z(dFdG Z)ej
jdHdI Z*ej
jdJdK Z+ej
jdLdM Z,ej
jdNdO Z-ej
jdPdQ Z.ej
jdRdS Z/ej
jdTdU Z0dVdW Z1ej
jejdXdddYdZ Z2ej
jejdXddd[d\ Z3d]d^ Z4d_d` Z5dadb Z6dcdd Z7dedf Z8dgdh Z9ej
jdidj Z:ej
jdkdl Z;dmdn Z<dodp Z=dqdr Z>dsdt Z?dudv Z@dwdx ZAdydz ZBd{d| ZCd}d~ ZDdd ZEdd ZFdd ZGdd ZHej
Idddgdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdd ZPdd ZQej
jdd ZRdd ZSdd ZTej
IddeUdgdfdeUdgeUdgfgdd ZVej
Wddd ZXdd ZYdd ZZej
jdd Z[dd Z\dd Z]ej
Idg ddd Z^dd Z_dd Z`dd ZadS )TestReadHtmlc                 C      |ddddS )Nr   r   r;   r<   r   selfr   r   r   r   	spam_datap      zTestReadHtml.spam_datac                 C   rU   )Nr   r   r;   banklist.htmlr   rV   r   r   r   banklist_datat   rY   zTestReadHtml.banklist_dataT)autousec                 c   s    t t|d| _d V  d S )Nr=   )r   r   )rW   r>   r   r   r   set_defaultsx   s   
zTestReadHtml.set_defaultsc                 C   sV   t jdddd ddddjt}| }| j|dd	id
dd
 }t || d S )N      c                  W   s
   t j S r!   )nprandomrand)r/   r   r   r   r%      s   
 z2TestReadHtml.test_to_html_compat.<locals>.<lambda>F)
data_gen_fc_idx_namesr_idx_namesz{:.3f}class	dataframer   )attrsrM   )	r*   makeCustomDataframeapplymapformatastypefloatto_htmlr   r+   )rW   dfoutresr   r   r   test_to_html_compat}   s   z TestReadHtml.test_to_html_compatc              
      s  t tdtjdgddtg dddtdtjdgddtg d	ddg d
g dg dg dd}|dkrOttjg dtjd}ttjddtgtjd}nt	dt
g d}t
g d}|jdd}td| | j||dd }W d    n1 sw   Y  t tdtjdgddtg dddtdtjdgddtg d	ddtddtgddtg ddd||d|dkrdd lddlm  t  fddjD t| d S )N   r_   Int64)dtype)rs      r_         ?      @Float64)rw   g       @rx   )TFN)TFTabc)r{   r|   N)r{   r|   r}   defghpythonr{   r|   pyarrowFindexzmode.string_storagedtype_backendr   Tboolean)ArrowExtensionArrayc                    s$   i | ]}| j | d dqS )T)from_pandas)array).0colr   expectedpar   r   
<dictcomp>   s    z3TestReadHtml.test_dtype_backend.<locals>.<dictcomp>)r   r   r`   nanr   r   object_r
   r@   importorskipr   rn   pdoption_contextr   r   pandas.arraysr   columnsr*   r+   )rW   string_storager   ro   string_arraystring_array_narp   resultr   r   r   test_dtype_backend   sR   
zTestReadHtml.test_dtype_backendThttps://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html)rI   check_before_testc                 C   .   d}| j |dd}| j |dd}t|| d S )Nr   First Federal Bank of Floridar9   Metcalf BankrN   rW   rI   df1df2r   r   r   test_banklist_url   s   	zTestReadHtml.test_banklist_url\https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/html/spam.htmlc                 C   r   )Nr   	.*Water.*r9   UnitrN   r   r   r   r   test_spam_url   s
   
zTestReadHtml.test_spam_urlc                 C   s6   | j |dddid}| j |dddid}t|| d S )Nz.*Florida.*idtabler:   rh   r   rN   )rW   r[   r   r   r   r   r   test_banklist   s   zTestReadHtml.test_banklistc                 C   sV   | j |dd}| j |dd}t|| |d jd dksJ |d jd dks)J d S )Nr   r9   r   r   r   r   
ProximatesNutrient)r   r5   ilocr   rW   rX   r   r   r   r   r   	test_spam   s
   
zTestReadHtml.test_spamc                 C   s&   |  |}|D ]	}t|tsJ qd S r!   r   r"   r   )rW   rX   dfsro   r   r   r   test_spam_no_match   s   
zTestReadHtml.test_spam_no_matchc                 C   s.   | j |ddid}|D ]	}t|tsJ qd S )Nr   r   )rh   r   )rW   r[   r   ro   r   r   r   test_banklist_no_match  s   z#TestReadHtml.test_banklist_no_matchc                 C   s4   | j |dddd }|jd dksJ |jrJ d S )Nr   rv   r:   headerr   r   )r   r   r,   )rW   rX   ro   r   r   r   test_spam_header  s   zTestReadHtml.test_spam_headerc                 C   .   | j |ddd}| j |ddd}t|| d S Nr   rs   r:   skiprowsr   rN   r   r   r   r   test_skiprows_int     zTestReadHtml.test_skiprows_intc                 C   6   | j |dtdd}| j |dtdd}t|| d S Nr   rv   r   r   )r   ranger5   r   r   r   r   test_skiprows_range     z TestReadHtml.test_skiprows_rangec                 C   s6   | j |dddgd}| j |dddgd}t|| d S Nr   rs   rv   r   r   rN   r   r   r   r   test_skiprows_list  r   zTestReadHtml.test_skiprows_listc                 C   s6   | j |dddhd}| j |dddhd}t|| d S r   rN   r   r   r   r   test_skiprows_set  r   zTestReadHtml.test_skiprows_setc                 C   r   r   rN   r   r   r   r   test_skiprows_slice#  r   z TestReadHtml.test_skiprows_slicec                 C   r   r   r   slicer5   r   r   r   r   test_skiprows_slice_short)  r   z&TestReadHtml.test_skiprows_slice_shortc                 C   s<   | j |dtddd}| j |dtdddd}t|| d S )	Nr   rv      r   r   r^   rs   r   r   r   r   r   test_skiprows_slice_long/  s   z%TestReadHtml.test_skiprows_slice_longc                 C   s:   | j |dtdd}| j |dtdd}t|| d S r   )r   r`   aranger5   r   r   r   r   test_skiprows_ndarray5  s   z"TestReadHtml.test_skiprows_ndarrayc                 C   sD   t jtdd | j|ddd W d    d S 1 sw   Y  d S )Nz%is not a valid type for skipping rowsr9   r   asdfr   )r@   rA   	TypeErrorr   )rW   rX   r   r   r   test_skiprows_invalid;  s   "z"TestReadHtml.test_skiprows_invalidc                 C   r   Nr   r   r:   rM   r   rN   r   r   r   r   
test_index?  s   zTestReadHtml.test_indexc                 C   2   | j |dddd}| j |dddd}t|| d S Nr   rs   r   )r:   r   rM   r   rN   r   r   r   r   test_header_and_index_no_typesD     z+TestReadHtml.test_header_and_index_no_typesc                 C   r   r   rN   r   r   r   r    test_header_and_index_with_typesI  r   z-TestReadHtml.test_header_and_index_with_typesc                 C   r   r   rN   r   r   r   r   test_infer_typesN  s   zTestReadHtml.test_infer_typesc                 C   s   t |dd}t| }W d    n1 sw   Y  t |dd}t| }W d    n1 s3w   Y  | j|dd}| j|dd}t|| d S NzUTF-8)encodingr   r9   r   )openr   readr   r5   )rW   rX   r   data1data2r   r   r   r   r   test_string_ioT  s   zTestReadHtml.test_string_ioc                 C   s^   t |dd}| }W d    n1 sw   Y  | j|dd}| j|dd}t|| d S r   )r   r   r   r5   )rW   rX   r   r   r   r   r   r   r   test_string_  s   
zTestReadHtml.test_stringc                 C   s   t |dd}| j|dd}W d    n1 sw   Y  t |dd}| j|dd}W d    n1 s5w   Y  t|| d S r   )r   r   r5   )rW   rX   r   r   r   r   r   r   test_file_likeh  s   zTestReadHtml.test_file_likec                 C   sB   t jtdd | jddd W d    d S 1 sw   Y  d S )Nz#urlopen error unknown url type: gitr9   zgit://github.comr   )r@   rA   r   r   rW   r   r   r   test_bad_url_protocolq  s   "z"TestReadHtml.test_bad_url_protocolc                 C   sJ   d}t jttf|d | jddd W d    d S 1 sw   Y  d S )NzNName or service not known|Temporary failure in name resolution|No tables foundr9   zhttp://www.a23950sdfa908sd.comr   )r@   rA   r   rH   r   rW   r1   r   r   r   test_invalid_urlw  s
   "zTestReadHtml.test_invalid_urlc                 C   sN   |}| j ttj|dddid}t|tsJ |D ]	}t|ts$J qd S )NFirstr   r   r   )r   r   ospathabspathr"   listr   rW   r[   rI   r   ro   r   r   r   test_file_url  s   zTestReadHtml.test_file_urlc                 C   sL   |}t jtdd | j|dddid W d    d S 1 sw   Y  d S )NzNo tables foundr9   r   r   	tasdfabler   rG   )rW   r[   rI   r   r   r   test_invalid_table_attrs  s   
"z%TestReadHtml.test_invalid_table_attrsc                 K   s   | j |fdddid|S )NMetcalfr   r   r   r   )rW   r   r0   r   r   r   
_bank_data  s   zTestReadHtml._bank_datac                 C   *   | j |ddgdd }t|jtsJ d S )Nr   rs   r   r   r"   r   r   rW   r[   ro   r   r   r   test_multiindex_header     z#TestReadHtml.test_multiindex_headerc                 C   r   )Nr   rs   rM   )r   r"   r   r   r   r   r   r   test_multiindex_index  r   z"TestReadHtml.test_multiindex_indexc                 C   s@   | j |ddgddgdd }t|jtsJ t|jtsJ d S )Nr   rs   )r   rM   )r   r"   r   r   r   r   r   r   r   test_multiindex_header_index  s   z)TestReadHtml.test_multiindex_header_indexc                 C   ,   | j |ddgddd }t|jtsJ d S Nr   rs   )r   r   r   r   r   r   r   &test_multiindex_header_skiprows_tuples     z3TestReadHtml.test_multiindex_header_skiprows_tuplesc                 C   r  r  r   r   r   r   r   test_multiindex_header_skiprows  r  z,TestReadHtml.test_multiindex_header_skiprowsc                 C   sB   | j |ddgddgddd }t|jtsJ t|jtsJ d S )Nr   rs   )r   rM   r   )r   r"   r   r   r   r   r   r   r   %test_multiindex_header_index_skiprows  s   z2TestReadHtml.test_multiindex_header_index_skiprowsc                 C   sZ   |}| j ttj|ttdddid}t|tsJ |D ]	}t|t	s*J q!d S )NFloridar   r   r   )
r   r   r   r   r   recompiler"   r   r   r   r   r   r   test_regex_idempotency  s   z#TestReadHtml.test_regex_idempotencyc                 C   sH   d}t jt|d | j|ddd W d    d S 1 sw   Y  d S )Nz\(you passed a negative value\)r9   Waterr   r   rG   rW   rX   r1   r   r   r   test_negative_skiprows  s   "z#TestReadHtml.test_negative_skiprowshttps://docs.python.org/2/c                 C   s&   d}| j |dd}t|dksJ d S )Nr  Pythonr9   rs   r   r&   )rW   rI   r   r   r   r   test_multiple_matches  s   z"TestReadHtml.test_multiple_matchesc                 C   s<   d}| j |dd}dd |D }t|tddgksJ d S )Nr  r  r9   c                 S   s   g | ]}|j d  dd qS )r   r   r^   )r   )r   ro   r   r   r   
<listcomp>  s    z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>RepoWhat)r   sorted)rW   rI   r   zzr   r   r   test_python_docs_table  s   z#TestReadHtml.test_python_docs_tablec                 C   s"   d}|  |}t|dksJ dS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        rs   Nr  )rW   r;   r   r   r   r   test_empty_tables  s   
zTestReadHtml.test_empty_tablesc                 C   s:   |  dd }tddgddggddgd	}t|| d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   rs   rv   r_   r^   ABr   r   r   r   r*   r+   rW   r   r   r   r   r   test_multiple_tbody  s   z TestReadHtml.test_multiple_tbodyc                 C   s0   |  dd }tddidgd}t|| dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   r   Nr  r  r   r   r   test_header_and_one_column  s   z'TestReadHtml.test_header_and_one_columnc                 C   s4   |  dd }tg dgg dd}t|| dS )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   )UkraineOdessa  )CountryMunicipalityYearr  Nr  r  r   r   r   test_thead_without_tr*  s   z"TestReadHtml.test_thead_without_trc                 C   s   d}t ddggddgd}t ddgddggddgd}|jd	d
}|jdd
}| |d }| |d }t|| t|| dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>bodyAbodyBr  r  r  footAfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r   rk   r   r*   r+   )rW   data_template	expected1	expected2r   r   result1result2r   r   r   test_tfoot_readH  s   zTestReadHtml.test_tfoot_readc                 C   s4   | j dddd }tddggdd}t|| d S )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textr%  )SIr   r  r  r   r   r   &test_parse_header_of_non_string_columnn  s   z3TestReadHtml.test_parse_header_of_non_string_columnc                    s   ddl m   fdd}| j|dddidd }t|d	d
ddttdd}|j|jks.J g d}g d}||||}||}	|}
ddg}|
| t	|
|< t
|
|	 d S )Nr   _remove_whitespacec                    s"   z | W S  t y   |  Y S w r!   )AttributeErrorr#   r;  r   r   try_remove_ws  s
   
z8TestReadHtml.test_banklist_header.<locals>.try_remove_wsr   r   r   r   r   r   csvzbanklist.csv)Updated DateClosing Date
converters)
z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.)
zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings BankrB  rA  )pandas.io.htmlr<  r   r   r   shaperj   replaceapplyr   r*   r+   )rW   r[   r   r?  ro   ground_trutholdnewdfnewgtnew	converted	date_colsr   r;  r   test_banklist_header  s    
z!TestReadHtml.test_banklist_headerc                 C   sl   d}t |}| }W d    n1 sw   Y  ||v s J | j|dddidd }|| v s4J d S )NzGold Canyonr   r   r   r   )r   r   r   	to_string)rW   r[   gcr   raw_textro   r   r   r   test_gold_canyon  s   

zTestReadHtml.test_gold_canyonc                 C   s4   | j dddd }| j dddd }t|| d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   r   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r*   r+   )rW   r   r   r   r   r   test_different_number_of_cols  s    "z*TestReadHtml.test_different_number_of_colsc                 C   s4   |  dd }tg dgg dd}t|| d S )NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   rz   )r  r  Cr9  r  r  r   r   r   test_colspan_rowspan_1  s   z#TestReadHtml.test_colspan_rowspan_1c                 C   8   | j dddd }tg dgg dd}t|| d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   r   )r  r  r  ZrW  )XzX.1YrZ  Wr  r  r  r   r   r    test_colspan_rowspan_copy_values  s   z-TestReadHtml.test_colspan_rowspan_copy_valuesc                 C   rY  )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   r   )r  r  r  r  D)r  r  zB.1zB.2rW  r  r  r  r   r   r   test_colspan_rowspan_both_not_1=  s   z,TestReadHtml.test_colspan_rowspan_both_not_1c                 C   s8   | j dddd }tddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   r   rW  r  r  r  r  r  r   r   r   test_rowspan_at_end_of_row[  s   z'TestReadHtml.test_rowspan_at_end_of_rowc                 C   s>   | j dddd }tddgddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   r   r  r  r  r  r  r   r   r   test_rowspan_only_rowsv  s   
z#TestReadHtml.test_rowspan_only_rowsc                 C   T   |  dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   r  r  r{   r|   rs   levelscodesrv   r  r   r   r   r*   r+   rW   r   r   r   r   r   r   +test_header_inferred_from_rows_with_only_th  s   $z8TestReadHtml.test_header_inferred_from_rows_with_only_thc                 C   sd   t dtdddi}| }| j|dgdd}t||d  | j|dgdd}t||d  d S )Ndate1/1/2001
   periodsrs   r   parse_datesrM   )r   r   rn   r   r*   r+   )rW   ro   r   rq   r   r   r   test_parse_dates_list  s   z"TestReadHtml.test_parse_dates_listc                 C   sn   t tddd}t|dd |dd d}| j| dd	d
gid	d}td|i}t||d  d S )Nrk  rl  rm  c                 S      t |  S r!   )strrj  r>  r   r   r   r%         z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>c                 S   rr  r!   )rs  timer>  r   r   r   r%     rt  )rj  ru  datetimers   rv   ro  r   )r   r   r   r(   r   rn   r*   r+   )rW   	raw_datesro   rq   newdfr   r   r   test_parse_dates_combine  s   z%TestReadHtml.test_parse_dates_combinec                 C   s   |dddd}t j|sJ t| dt j|s%J t| d| j|ddd	d
 }|jdks6J d|jd v s?J |d jt	dksKJ t	
|jd dsVJ d S )Nr   r   r;   wikipedia_states.htmlz is not a filez is an empty fileArizonars   r   r   )<      Unnamedr   sq mifloat64)r   r  HzPN$A)r   r   isfilereprgetsizer   rG  r   ru   r`   allcloselocrW   r   r   r   r   r   r   test_wikipedia_states_table  s   z(TestReadHtml.test_wikipedia_states_tablec                 C   sp   |dddd}| j |dddd }|jdksJ d	|jd
 d v s#J |jjdks+J t|jd ds6J d S )Nr   r   r;   rz  r{  r   r   )r|     r~  r   rs   rv   )Alaska)zTotal area[2]r  r  )r   rG  r   nlevelsr`   r  r  r  r   r   r    test_wikipedia_states_multiindex  s   z-TestReadHtml.test_wikipedia_states_multiindexc                 C   sB   | j dddgd}tddggtddgd	}t|d | d S )
NaK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   rs   r   r{   r|   )Unnamed: 0_level_0r  )zUnnamed: 1_level_0r  r9  )r   r   r   from_tuplesr*   r+   r  r   r   r   %test_parser_error_on_empty_header_row  s   z2TestReadHtml.test_parser_error_on_empty_header_rowc                 C   sL   | j dddd }tddidgd}|d jtdksJ t|| d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   r  gClg0@r!  r  )r   r   ru   r`   r*   r+   r  r   r   r   test_decimal_rows  s   zTestReadHtml.test_decimal_rowsargFc                 C   sL   t d}tjt|d | j||d W d    d S 1 sw   Y  d S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr9   r   )r  escaper@   rA   r   r   )rW   rX   r  r1   r   r   r   test_bool_header_arg  s   "z!TestReadHtml.test_bool_header_argc                 C   s6   | j ddtidd }tdddgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r{   rC  r   z0.763z0.244)r   rs  r   r*   r+   r  r   r   r   test_converters  s   zTestReadHtml.test_convertersc                 C   s6   | j ddgdd }tddtjgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)	na_valuesr   r{   g"~j?r   r   r`   r   r*   r+   r  r   r   r   test_na_values!  s   zTestReadHtml.test_na_valuesc                 C   sh   d}t dddgi}| j|ddd }t|| t dtjtjgi}| j|ddd }t|| d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r{   zN/Ar
   F)keep_default_nar   T)r   r   r*   r+   r`   r   )rW   	html_dataexpected_dfhtml_dfr   r   r   test_keep_default_na:  s   z!TestReadHtml.test_keep_default_nac                 C   s>   |  dd }tddgtjtjggddgd}t|| d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r{   r|   r  r  r  r  r  r   r   r   test_preserve_empty_rowsS  s    z%TestReadHtml.test_preserve_empty_rowsc                 C   rc  )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   r  r  r{   r|   rs   rd  rv   r  rg  rh  r   r   r   ,test_ignore_empty_rows_when_inferring_headerk  s   $z9TestReadHtml.test_ignore_empty_rows_when_inferring_headerc                 C   sJ   t g dd}g dg dg|_|jdd}| |d }t|| d S )N))HillaryD   r_  )BernieJ   r_  )DonaldE   R)r   )r  AgeParty)NamezUnnamed: 1_level_1zUnnamed: 2_level_1Fr   r   )r   r   rn   r   r*   r+   )rW   r  r;   r  r   r   r   test_multiple_header_rows  s   z&TestReadHtml.test_multiple_header_rowsc                 C   s@   |dddd}| j |dd}t|tsJ t|d tsJ d S )Nr   r   r;   rL   r   r   )r   r"   r   r   )rW   r   rO   r   r   r   r   test_works_on_valid_markup  s   z'TestReadHtml.test_works_on_valid_markupc                 C   s&   |dddd}| j |dddgd d S )	Nr   r   r;   rZ   r   rK   r7   rF   r   )rW   r   r[   r   r   r   test_fallback_success  s   z"TestReadHtml.test_fallback_successc                 C   s:   t ddd}ttjdd|d}| }d|v sJ d S )Nz
2000-01-01rl  rm  r^   r   )r   r   r`   ra   randnrn   )rW   rngro   r   r   r   r   test_to_html_timestamp  s   z#TestReadHtml.test_to_html_timestampc                 C   s   t dddg}| }|jdd}|jdd}|jdd}|jdd}|jdd}d|v s0J ||ks6J ||ks<J ||ksBJ d	|v sHJ d
|vsNJ d|vsTJ ||ksZJ d S )Nrs   rv   )r  r  T)borderr   Fz border="1"z border="2"z border="0"z border)r   rn   )rW   ro   out_border_defaultout_border_trueout_border_explicit_defaultout_border_nondefaultout_border_zeroout_border_falser   r   r   test_to_html_borderless  s   z$TestReadHtml.test_to_html_borderlesszdisplayed_only,exp0,exp1fooNzfoo  bar  baz  quxc                 C   sV   t d}| j||d}t|d | |d ur!t|d | d S t|dks)J d S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>)displayed_onlyr   rs   )r   r   r*   r+   r&   )rW   r  exp0exp1r   r   r   r   r   test_displayed_only  s   	z TestReadHtml.test_displayed_onlyz\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningc           
      C   s  t j|}t j|d }|d\}}z]t|d}| j| |dd }W d    n1 s2w   Y  t|d}| jt	| |dd }W d    n1 sUw   Y  | j||dd }	t
|| t
||	 W d S  ty   t rd|v sd|v rt   w )Nr   _rb)r   rM   1632)r   r   basenamesplitextsplitr   r   r   popr   r*   r+   	Exceptionr	   r@   skip)
rW   r    	base_pathrootr  r   fobjfrom_stringfrom_file_likefrom_filenamer   r   r   test_encode  s>   
zTestReadHtml.test_encodec                 C   s   | j jddkrtd G dd dt}|d}|  |s!J tjtdd |  | W d    d S 1 s9w   Y  d S )	Nr>   rK   zNot applicable for lxmlc                   @   s   e Zd Zdd ZdS )zFTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                 S      dS NFr   r   r   r   r   seekable     zOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekableN)__name__
__module____qualname__r  r   r   r   r   UnseekableStringIO   s    r  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr9   )r   keywordsgetr@   r  r   rA   rH   )rW   r  badr   r   r   test_parse_failure_unseekable  s   
"z*TestReadHtml.test_parse_failure_unseekablec                 C   s>   G dd d}|d}|d}|  |sJ |  |sJ d S )Nc                   @   sF   e Zd ZdddZdddZdd Zd	d
 Zdd ZdefddZ	dS )z9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFilereturnNc                 S   s   || _ d| _d S r  )r   at_end)rW   r   r   r   r   __init__  s   
zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__c                 S   s   | j rdn| j}d| _ |S )Nr.  T)r  r   )rW   sizer   r   r   r   r     s   z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.readc                 S   s
   d| _ d S r  )r  )rW   offsetr   r   r   seek  s   
z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekc                 S   r  )NTr   r   r   r   r   r    r  zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekablec                 S   s   d S r!   r   r   r   r   r   __next__"  r  zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__next__c                 S   s   | S r!   r   r   r   r   r   __iter__%  s   zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__iter__)r  Nr!   )
r  r  r  r  r   r  r  r  r   r  r   r   r   r   MockFile  s    

r  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>r   )rW   r  goodr  r   r   r   test_parse_failure_rewinds  s
   z'TestReadHtml.test_parse_failure_rewindsc                 C   s   G dd dt j}ttjj |dddd}|| j|fd}|| j|fd}|  |  | s6| r?	 | s6| s6d |j	  u rL|j	u sOJ  J d S )Nc                       s   e Zd Z fddZ  ZS )z@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc              
      sF   zt    W n ty } z
|| _W Y d }~d S d }~ww d | _d S r!   )superrunr  err)rW   r  	__class__r   r   r  5  s   
zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run)r  r  r  r  __classcell__r   r   r  r   ErrorThread4  s    r  r   r   r;   rL   )targetr/   )
	threadingThreadr   pandasr   r;   r   startis_aliver  )rW   r   r  rO   helper_thread1helper_thread2r   r   r   test_importcheck_thread_safety0  s   
$z+TestReadHtml.test_importcheck_thread_safetyc                 C   sB   |dddd}t |}| |d }| |d }t|| d S )Nr   r   r;   r<   r   )r   r   r*   r+   )rW   r   file_path_string	file_pathr   r   r   r   r   test_parse_path_objectK  s
   z#TestReadHtml.test_parse_path_objectc                 C   s0   |  dd }tdggdgd}t|| d S )Nz
            <table>
                <tr>
                    <th>A</th>
                </tr>
                <tr>
                    <td>word1<br>word2</td>
                </tr>
            </table>
        r   zword1 word2r  r  r  r  r   r   r   test_parse_br_as_spaceS  s   z#TestReadHtml.test_parse_br_as_space)r'   bodyr   r/  c           	      C   s   d}g dg dg dg dg dg dd}|d	 }|d
 }|d }|dkr4|d }|d }|d }n|dkr=|d }n|dkrF|d }n|dkrN|d }| j ||dd }t||g|d}t|| d S )Na  
          <table>
            <tr>
              <th>HTTP</th>
              <th>FTP</th>
              <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
            </tr>
            <tr>
              <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
              <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
              <td>Linkless</td>
            </tr>
            <tfoot>
              <tr>
                <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
                <td>
                  Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
                </td>
              </tr>
            </tfoot>
          </table>
          )HTTPFTPLinkless))r  N)r  N)r  z'https://en.wiktionary.org/wiki/linkless)	WikipediaSURROUNDING Debian TEXTr  ))r  zhttps://en.wikipedia.org/)r  zftp://ftp.us.debian.org/)r  N)Footer$Multiple links: Only first captured.N))r   z)https://en.wikipedia.org/wiki/Page_footer)r  1N)head_ignorehead_extractbody_ignorebody_extractfooter_ignorefooter_extractr  r  r  r'   r  r  r  r  r/  r   extract_linksr   r9  r  )	rW   r  gh_13141_datagh_13141_expecteddata_expfoot_exphead_expr   r   r   r   r   test_extract_linksf  s0   


zTestReadHtml.test_extract_linksc                 C   sD   d}t jt|d t|dd W d    d S 1 sw   Y  d S )NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r9   	incorrectr	  rG   r  r   r   r   test_extract_links_bad  
   "z#TestReadHtml.test_extract_links_badc                 C   s2   d}| j |ddd }tdgg}t|| d S )Nz
        <table>
          <tr>
            <td>
              <a href='https://google.com'>Google.com</a>
            </td>
          </tr>
        </table>
        r'   r	  r   )z
Google.comzhttps://google.comr  )rW   r   r   r   r   r   r    test_extract_links_all_no_header  s   	z-TestReadHtml.test_extract_links_all_no_headerc                 C   sD   d}t jt|d tddd W d    d S 1 sw   Y  d S )NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r9   testnumpyr   rG   r   r   r   r   test_invalid_dtype_backend  r  z'TestReadHtml.test_invalid_dtype_backend)br  r  r  r@   fixturerX   r[   r]   rr   r   marknetworkr*   r   r   slowr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r
  r  r  r  r  r  r"  r)  r5  r:  rQ  rU  rV  rX  r^  r`  ra  rb  ri  rq  ry  r  r  r  r  parametrizer  r  r  r  r  r  r  r  r  r  r  r   r  filterwarningsr  r  r  r  r  r  r  r  r  r  r   r   r   r   rT   h   s   



7

		
	







&
/
D!




 
"

CrT   )8	functoolsr   	importlibr   r   r   r   r   pathlibr   r  r  typingr   urllib.errorr   r  r`   r@   pandas.compatr	   pandas.util._test_decoratorsutil_test_decoratorstdr  r   r
   r   r   r   r   r   r   r   pandas._testing_testingr*   pandas.core.arraysr   r   pandas.io.commonr   rF  r   r  r    r5   
skip_if_norD   rJ   rR   r  r  r   rT   r   r   r   r   <module>   sP    (

	