
    6j"	                     8    d Z ddlZddlmZ  G d d          ZdS )zSimple filter ranker.    N)Finalc            	           e Zd ZU dZ ej        d          Zeed<   dde	ddfdZ
d	edee         fd
Zdedee         defdZdeeeef                  d	edeeeef                  fdZdS )SimpleFilterRankeraA  Simple filter ranker.

    1) Pull any doc with 'wikipedia.org' in its href to the top.
    2) Bucket the rest according to where query tokens appear:
       - both title & body/description
       - title only
       - body only
       - neither
    3) Return wikipedia-top + both + title-only + body-only + neither.
    z\W+	_splitter   min_token_lengthreturnNc                     || _         d S N)r   )selfr   s     Q/root/.hermes/hermes-agent/venv/lib64/python3.11/site-packages/ddgs/similarity.py__init__zSimpleFilterRanker.__init__   s     0    queryc                 t      fd j                             |                                          D             S )z7Split on non-word characters & filter out short tokens.c                 D    h | ]}t          |          j        k    |S  )lenr   ).0tokenr   s     r   	<setcomp>z5SimpleFilterRanker._extract_tokens.<locals>.<setcomp>   s-    nnn%#e**X\XmJmJmJmJmJmr   )r   splitlower)r   r   s   ` r   _extract_tokensz"SimpleFilterRanker._extract_tokens   s5    nnnn4>#7#7#F#Fnnnnr   texttokensc                 b    |                                 t          fd|D                       S )z:Check if any token is a substring of the lower-cased text.c              3       K   | ]}|v V  	d S r   r   )r   tok
lower_texts     r   	<genexpr>z4SimpleFilterRanker._has_any_token.<locals>.<genexpr>   s(      773*$777777r   )r   any)r   r   r   r    s      @r   _has_any_tokenz!SimpleFilterRanker._has_any_token   s3    ZZ\\
7777777777r   docsc                    |                      |          }g }g }g }g }g }|D ]}	|	                    dd          }
|	                    dd          |	                    d|	                    dd                    }t          fddD                       rud|
v r|                    |	           |                     |          }|                     ||          }|r|r|                    |	           |r|                    |	           |r|                    |	           |                    |	           ||z   |z   |z   |z   S )	z,Rank a list of docs based on a query string.href titlebodydescriptionc              3       K   | ]}|v V  	d S r   r   )r   xr(   s     r   r!   z*SimpleFilterRanker.rank.<locals>.<genexpr>2   s'      BB!1:BBBBBBr   )z	Category:	Wikimediazwikipedia.org)r   getallappendr#   )r   r$   r   r   	wiki_hitsboth
title_only	body_onlyneitherdocr&   r)   	hit_titlehit_bodyr(   s                 @r   rankzSimpleFilterRanker.rank!   s   %%e,,	
	 	$ 	$C7762&&DGGGR((E776377="#=#=>>D BBBB'ABBBBB  $&&  %%% ++E6::I**488H $X $C     $!!#&&&& $  %%%%s#### 4*,y87BBr   )r   )__name__
__module____qualname____doc__recompiler   r   __annotations__intr   strsetr   boolr#   listdictr9   r   r   r   r   r      s
        	 	 "rz&))Iu)))1 1 1T 1 1 1 1oS oSX o o o o83 8C 8T 8 8 8 8
'Cd38n- 'Cc 'Cd4S>>R 'C 'C 'C 'C 'C 'Cr   r   )r=   r>   typingr   r   r   r   r   <module>rH      sl      				      AC AC AC AC AC AC AC AC AC ACr   