o
    d:                  	   @  s(  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZ	ddl
mZmZmZmZ ddlmZ ddlmZ ddlZdd	lmZ dd
lmZ dZdZdZdZg dZdZdZdZ dZ!de de  de de! d	Z"de de dZ#dZ$d)ddZ%d*d!d"Z&d#d$ Z'd%d& Z(G d'd( d(eej)Z*dS )+a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)abc)datetimeN)CompressionOptionsDatetimeNaTTypeFilePath
ReadBuffer)Appender)find_stack_level)
get_handle)
ReaderBasezPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
zRead observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
datestrstrreturnr   c                 C  s(   zt | dW S  ty   tj Y S w )z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)r    r%   \/var/www/html/visualizacion-main/env/lib/python3.10/site-packages/pandas/io/sas/sas_xport.py_parse_date   s
   
r'   sc                 C  s@   i }d}|D ]\}}| |||    ||< ||7 }q|d= |S )a  
    Parameters
    ----------
    s: str
        Fixed-length string to split
    parts: list of (name, length) pairs
        Used to break up string, name '_' will be filtered from output.

    Returns
    -------
    Dict of name:contents of string at given location.
    r   r   )strip)r(   partsoutstartr   lengthr%   r%   r&   _split_line   s   
r.   c                 C  sT   |dkr(t t| t d}t d| dd|  }|j|d}| |d< |S | S )N   S8Sz,Sdtypef0)npzeroslenr3   view)vecnbytesvec1r3   vec2r%   r%   r&   _handle_truncated_float_vec   s   	r=   c           	      C  s  t d}| j|d}|d }|d }|d@ }t jt| t jd}d|t |d@ < d|t |d	@ < d
|t |d@ < ||L }||? |d@ dd
|  > B }|dM }||d? d@ d d> | d d> |d@ B O }t jt|fdd}||d< ||d< |jdd}|d}|S )zf
    Parse a vector of float values representing IBM 8 byte floats into
    native 8 byte floats.
    z>u4,>u4r2   r4   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r5   r3   r8   r6   r7   uint8whereemptyastype)	r9   r3   r;   xport1xport2ieee1shiftieee2ieeer%   r%   r&   _parse_float_vec   s*   
	 	
rS   c                   @  sz   e Zd ZeZ				d"d#ddZd$ddZdd Zdd Zd%ddZ	d&ddZ
d'd%ddZdd Zeed'd(d d!ZdS ))XportReaderN
ISO-8859-1inferfilepath_or_bufferFilePath | ReadBuffer[bytes]encoding
str | Nonecompressionr   r    Nonec                 C  s^   || _ d| _|| _|| _t|d|d|d| _| jj| _z|   W d S  t	y.   | 
   w )Nr   rbF)rY   is_textr[   )	_encoding_lines_read_index
_chunksizer   handleshandlerW   _read_header	Exceptionclose)selfrW   indexrY   	chunksizer[   r%   r%   r&   __init__   s$   
zXportReader.__init__c                 C  s   | j   d S N)rc   rg   rh   r%   r%   r&   rg     s   zXportReader.closec                 C  s   | j d S )NP   )rW   readdecoderm   r%   r%   r&   _get_row  s   zXportReader._get_rowc              	   C  sF  | j d |  }|tkrd|v rtdtd|  }ddgddgd	dgd
dgddgg}t||}|d dkr>tdt|d |d< || _|  }t|d d |d< |  }|  }|t	}|t
k}	|rl|	sptdt|dd }
ddgddgddgddgd	dgd
dgddgg}t|  |}ddgd
dgddgddgg}|t|  | t|d |d< t|d |d< || _ddd}t|  dd }|
| }|d r|d|d  7 }| j |}g }d}t||
krh|d |
 ||
d  }}|d}td|}ttt|}|d
= ||d   |d < |d! }|d  dkr:|d"k s0|dkr:d#| d$}t|| D ]\}}z	| ||< W q> tyV   Y q>w ||d! 7 }||g7 }t||
ks|  }|tksutd%|| _|| _| j  | _|  | _ d&d' | jD | _!d(d' t"| jD }t#$|}|| _%d S ))Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrD   versionr/   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)r?   r@   6   :   rn      z>hhhh8s40s8shhh2s8shhl52sr   r   r@   zFloating field width z is not between 2 and 8.zObservation header not found.c                 S  s   g | ]}|d    qS )r   )rp   ).0xr%   r%   r&   
<listcomp>      z,XportReader._read_header.<locals>.<listcomp>c                 S  s,   g | ]\}}d t | dt |d  fqS )r(   r1   r   )r   )r   ifieldr%   r%   r&   r     s    )&rW   seekrq   _correct_line1r"   r.   r'   	file_info
startswith_correct_header1_correct_header2intupdatemember_inforo   r7   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr)   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobscolumns	enumerater5   r3   _dtype)rh   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructr   flmsgkvheaderdtypelr3   r%   r%   r&   re   !  s   "

	



"




zXportReader._read_headerpd.DataFramec                 C  s   | j | jpddS )Nr?   nrows)ro   rb   rm   r%   r%   r&   __next__  s   zXportReader.__next__r   c                 C  s   | j dd | j  | j }|d dkrtjdt d | jdkr.| j | j || j S | j dd | j d}t	j
|t	jd}t	|dk}t|dkrSd}nd	t| }| j | j || | j S )
z
        Get number of records in file.

        This is maybe suboptimal because we have to seek to the end of
        the file.

        Side effect: returns file position to record_start.
        r   r@   rn   zxport file may be corrupted.)
stacklevelir2   l     @@  r/   )rW   r   r   r   warningswarnr
   r   ro   r5   
frombufferuint64flatnonzeror7   )rh   total_records_lengthlast_card_bytes	last_cardixtail_padr%   r%   r&   r     s&   	

zXportReader._record_countc                 C  s   |du r| j }| j|dS )a  
        Reads lines from Xport file and returns as dataframe

        Parameters
        ----------
        size : int, defaults to None
            Number of lines to read.  If None, reads whole file.

        Returns
        -------
        DataFrame
        Nr   )rb   ro   )rh   sizer%   r%   r&   	get_chunk  s   zXportReader.get_chunkc                 C  sl   |j dd}|d dk|d dk@ |d dk@ }|d dk|d d	k@ |d d
kB |d dkB }||M }|S )Nzu1,u1,u2,u4r2   r>   r   f2f3r4   rF   Z   _   .   )r8   )rh   r9   r   missmiss1r%   r%   r&   _missing_double  s   $

zXportReader._missing_doubler   
int | Nonec                   sd  |d u r j }t| j  j }| j }|dkr   t j|}tj	| j
|d}i }t jD ]U\}}|dt|  }	 j| d }
|
dkret|	 j| d }	 |	}t|	}tj||< n j| d dkrdd	 |	D } jd ur fd
d	|D }|||i q5t|} jd u rtt j j| |_n| j}  j|7  _|S )Nr   )r3   countr(   r   r~   r   r   c                 S  s   g | ]}|  qS r%   )rstripr   yr%   r%   r&   r     s    z$XportReader.read.<locals>.<listcomp>c                   s   g | ]}|  jqS r%   )rp   r_   r   rm   r%   r&   r     r   )r   minr`   r   rg   StopIterationrW   ro   r5   r   r   r   r   r   r   r=   r   rS   nanr_   r   r#   	DataFramera   Indexrangeri   	set_index)rh   r   
read_linesread_lenrawdatadf_datajr   r9   r   r   r   dfr%   rm   r&   ro     s:   




zXportReader.read)NrU   NrV   )rW   rX   rY   rZ   r[   r   r    r\   )r    r\   )r    r   )r    r   rl   )r   r   r    r   )__name__
__module____qualname___xport_reader_doc__doc__rk   rg   rq   re   r   r   r   r   r	   _read_method_docro   r%   r%   r%   r&   rT      s     

n
&rT   )r   r   r    r   )r(   r   )+r   
__future__r   collectionsr   r   r   r   numpyr5   pandas._typingr   r   r   r   pandas.util._decoratorsr	   pandas.util._exceptionsr
   pandasr#   pandas.io.commonr   pandas.io.sas.sasreaderr   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r   r'   r.   r=   rS   IteratorrT   r%   r%   r%   r&   <module>   s\    	

	9