
    6j                         d Z ddlZddlZddlmZ ddlmZmZ ddlmZ ddl	m
Z
 ddlmZ  ej        d	          Zd
ededefdZdedefdZdedefdZdeez  defdZdedz  dedz  fdZdS )z
Utilities.    N)suppress)datetimetimezone)unescape)unquote   )DDGSExceptionz<.*?>
html_bytesqueryreturnc                 .   dD ]~\  }}}t          t                    5  |                     |          |z   }|                     ||          }| ||                                         cddd           c S # 1 swxY w Y   d|d}t	          |          )zExtract vqd from html bytes.))s   vqd="      ")s   vqd=      &)s   vqd='r      'Nz_extract_vqd() query=z Could not extract vqd.)r   
ValueErrorindexdecoder	   )r
   r   c1c1_lenc2startendmsgs           L/root/.hermes/hermes-agent/venv/lib64/python3.11/site-packages/ddgs/utils.py_extract_vqdr      s     2 2FB
 j!! 	2 	2$$R((61E""2u--CeCi(//11	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2
 <E
;
;
;C


s   A
A66A:	=A:	urlc                 P    | r#t          |                               dd          ndS )z(Unquote URL and replace spaces with '+'. + )r   replace)r   s    r   _normalize_urlr$      s(    -0873<<S)))b8    rawc                 6   | sdS t                               d|           }t          |          }t          j        d|          }d t          |          D             }|r|                    |          }d                    |                                          S )zNormalize text.

    Strip HTML tags, unescape HTML entities, normalize Unicode,
    remove "c" category characters, and collapse whitespace.
    r"   NFCc                 j    i | ]0}t          j        |          d          dk     t          |          d1S )r   CN)unicodedatacategoryord).0chs     r   
<dictcomp>z#_normalize_text.<locals>.<dictcomp>7   s;    YYY2k6J26N6Nq6QUX6X6XR$6X6X6Xr%   r    )	_REGEX_STRIP_TAGSsubr   r+   	normalizeset	translatejoinsplit)r&   text	c_to_nones      r   _normalize_textr:   $   s      r   S))D D>>D  --D ZYTYYYI )~~i(( 88DJJLL!!!r%   datec                     t          | t                    r1t          j        | t          j                                                  n| S )z8Normalize date from integer to ISO format if applicable.)
isinstanceintr   fromtimestampr   utc	isoformat)r;   s    r   _normalize_daterB   ?   s:    EOPTVYEZEZd8!$55??AAA`ddr%   proxyc                     | dk    rdn| S )z.Expand "tb" to a full proxy URL if applicable.tbzsocks5h://127.0.0.1:9150 )rC   s    r   _expand_proxy_tb_aliasrG   D   s    ).$%%EAr%   )__doc__rer+   
contextlibr   r   r   htmlr   urllib.parser   
exceptionsr	   compiler1   bytesstrr   r$   r:   r>   rB   rG   rF   r%   r   <module>rQ      sg     				           ' ' ' ' ' ' ' '                   % % % % % %BJw'' U 3 3     9 9 9 9 9 9
" " " " " "6e#) e e e e e
B#* Bt B B B B B Br%   