
    /i                    &   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.Z.d dl/Z/d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7  ejp                  e9      Z:g dZ;g dZ<ddddddddd d!d"d#d$Z=d%e>d&e>fd'Z?d^d(e@d)e>d*e>fd+ZAd,eeB   fd-ZCd.eeB   d,eeB   fd/ZDd0eBd,eeB   fd1ZEd0eBd,eeB   fd2ZFd_d3eBd4eBd5e>d,ee   fd6ZGd7 ZH ed89      ZId: ZJd;eBd,eBfd<ZKd;eBd,eBfd=ZLd>eBd,ee>   fd?ZMd5e>d@e>d,ee>e>f   fdAZNd5e>d@ee>   d,eBfdBZO	 d_dCe(d;eBd@ee>   d4eBd3eBd5e>dDeeB   d,ePfdEZQd`dFeBdGe@d,ePfdHZRdI ZSdadJePdKePd,ej                  fdLZUdM ZJdN ZVdbdOeBdDeBd,ePfdPZWdQeBd,ee>   fdRZXdcd0eBdSe@d,ePfdTZYd4eBd3eBd5e>dUeZd,ee   f
dVZ[d4eBd3eBd5e>dUeZd,ee   f
dWZ\dddXe1dYe3dZe@d,ee>   fd[Z] G d\ d]      Z^y)e    N)datetime	timedelta)OptionalListDictTuple)urlparse	urlencode)UUID)ThreadPoolExecutor)BeautifulSoup)	webdriver)Service)Options)ChromeDriverManager)WebDriverWait)By)Keys)AsyncSession)select)HTTPExceptionstatus)Product)Vendor)	Violation)ScrapingResult)zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0zrMozilla/5.0 (Macintosh; Intel Mac OS X 14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Safari/605.1.15zNMozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0z}Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0)zsocks4://103.146.170.233:5678zsocks4://103.165.64.86:4153zhttp://108.165.152.59:80zsocks4://103.204.54.50:1080z|text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7zgzip, deflate, brzes-ES,es;q=0.9,en;q=0.8z	max-age=0z@"Not_A Brand";v="8", "Chromium";v="122", "Google Chrome";v="122"z?0z	"Windows"documentnavigatenonez?11)AcceptzAccept-EncodingzAccept-LanguagezCache-Controlz	Sec-Ch-UazSec-Ch-Ua-MobilezSec-Ch-Ua-PlatformzSec-Fetch-DestzSec-Fetch-ModezSec-Fetch-SitezSec-Fetch-UserzUpgrade-Insecure-Requestsmin_secondsmax_secondsc                 Z    t        j                  | |      }t        j                  |       y)z1Introduce a random delay to mimic human behavior.N)randomuniformtimesleep)r"   r#   delays      B/var/www/html/marco-python-backend/app/services/scraper_service.pyhuman_delayr+   H   s    NN;4EJJu    attempt
base_delay	max_delayc                     |d| z  z  }t        j                  d|dz        }t        ||z   |      }t        j                  |       y)zc
    Calculate exponential backoff delay.
    Useful for retry logic when hitting rate limits.
       r   g?N)r%   r&   minr'   r(   )r-   r.   r/   r)   jitterfinal_delays         r*   exponential_backoff_delayr5   N   sB    
 !w,'E^^Aus{+Fefni0KJJ{r,   returnc                      t         j                  j                  d      xs t         j                  j                  d      } | r| S t        rt	        j
                  t              }|S y)zx
    Get a free proxy from environment or predefined list.
    For production, consider using a paid proxy service.
    
HTTP_PROXY
http_proxyN)osenvirongetFREE_PROXIESr%   choice)	env_proxyproxys     r*   get_free_proxyrA   Z   sJ     

|,L

|0LIl+r,   r@   c                 Z    | sy| j                  d      s| j                  d      r| S d|  S )zFormat proxy URL for Selenium.Nzhttp://socks)
startswith)r@   s    r*   get_selenium_proxy_urlrE   k   s5    	"e&6&6w&?UGr,   queryc                 D   g }	 t         j                  d       t        j                  j	                  d      }|st         j                  d       |S d}|| dddd}t        j                  ||d	      }|j                          |j                         }|j	                  d
g       dd D ]  }d|v s|j                  |d           t         j                  dt        |       d       |S # t        $ r,}t         j                  dt        |              Y d}~|S d}~ww xY w)z
    Search using Tavily API (best for AI/automation).
    Tavily is designed for AI agents and is much more reliable than browser automation.
    z'[Discovery] Using Tavily API for searchTAVILY_API_KEYzB[Discovery] TAVILY_API_KEY not configured - skipping Tavily searchzhttps://api.tavily.com/searchF   )api_keyrF   include_answermax_resultsinclude_raw_content   )jsontimeoutresultsNurlu+   [Discovery] ✓ Tavily API search returned z resultsz&[Discovery] Tavily API search failed: )loggerinfor:   r;   r<   warningrequestspostraise_for_statusrO   appendlen	Exceptionstr)	rF   rQ   rJ   rR   payloadresponsedataresultes	            r*   search_tavily_apirb   t   s   
 GJ=>**..!12NN_`N- ##(
 ==7B?!!#}} hhy"-bq1 	.Fve}-	. 	A#g,xXY N  J?AxHIINJs%   AC* AC* 08C* *	D3!DDc                     t        |       S )z
    Search using Tavily API instead of Selenium (avoids detection completely).
    Falls back to API-based search when Google blocks you.
    )rb   )rF   s    r*   search_via_apird      s    
 U##r,   product_namebarcodemspc                 B
   g }	 t         j                  d       t        j                  j	                  d      }|s,t         j                  d       t         j                  d       |S |dk(  r,t         j                  d       t         j                  d       |S |  }t         j                  d| d	       d
}d|d|d}	 t        j                  ||d      }|j                          |j                         }	|	s|S d|	v r)t         j                  d|	j	                  d              |S |	j	                  dg       }|st         j                  d       dD ]  }||	v s|	|   s t         j                  d       |rd|v r| j                         }	 d||d}t        j                  ||d      }|j                          |j                         }|j	                  dg       }|r%t         j                  dt        |       d       |}nt         j                  d       t#        |dd       D ]e  \  }}	 |j	                  d d!      }|j	                  d"d#      xs |j	                  d$d#      }|j	                  d%d#      }|j	                  d&      }|y|j	                  d'd#      }|re	 t        |      j%                  d(d#      j%                  d)d#      j%                  d*d#      j%                  d+d,      j                         }t'        |      }|s||||d-d.}|r|r||k  rd/|d0<   d/|d1<   nd2|d0<   d2|d1<   n
d3|d0<   d3|d1<   |j-                  |       t         j                  d4|d5z    d6| d7|dd8  d9|rd*|d:nd; d<|d0   j/                          
       h t         j                  d>t1        dt        |             d?t        |       d@t        |       dA       |S # t        j                  j                  $ r-}
t         j                  dt        |
              |cY d}
~
S d}
~
wt        $ r-}t         j                  dt        |              |cY d}~S d}~ww xY w# t         $ r,}t         j                  dt        |              Y d}~nd}~ww xY w# t        t(        t*        f$ r d}Y w xY w# t         $ r/}t         j                  d=| d6t        |              Y d}~d}~ww xY w# t         $ r.}t         j                  dBt        |       dCD       Y d}~|S d}~ww xY w)Ea  
    Search using SERP API Google Shopping Light Engine.
    Uses direct HTTP requests to SERP API (more reliable than library).
    
    Args:
        product_name: Product name for search
        barcode: Product barcode
        msp: Minimum Selling Price (optional, used for violation detection)
    
    Returns:
        List of dictionaries with product and vendor information
    zG[Discovery SERP] Using SERP API Google Shopping Light Engine for searchSERP_API_KEYuD   [Discovery SERP] ❌ SERP_API_KEY not found in environment variablesz@[Discovery SERP] Please add SERP_API_KEY=<your_key> to .env fileyour_serp_api_key_hereu=   [Discovery SERP] ❌ SERP_API_KEY is set to placeholder valuezV[Discovery SERP] Please replace 'your_serp_api_key_here' with your actual SERP API keyz [Discovery SERP] Search query: ''zhttps://serpapi.com/search.jsongoogle_shopping_lightes)engineqglrJ      )paramsrP   z&[Discovery SERP] HTTP request failed: Nz0[Discovery SERP] Failed to parse JSON response: errorz[Discovery SERP] API Error: shopping_resultsu=   [Discovery SERP] ⚠️ No 'shopping_results' key in response)rQ   productsitemssearch_resultsorganic_resultszW[Discovery SERP] No results found. Trying fallback: searching with product name only... )rn   ro   rJ   u7   [Discovery SERP] ✓ Fallback search successful! Found z results with product name onlyuI   [Discovery SERP] ⚠️ Fallback search also returned no shopping_resultsz)[Discovery SERP] Fallback search failed: rN   sourceUnknownproduct_link linktitleextracted_priceprice$u   €u   ₹,.serp_api_shopping_light)vendor_name
vendor_urlscraped_priceproduct_titlerz   	violationr   compliance_status	compliantunknown   [Discovery SERP] ✓ Result    :  - <   
 - Price: .2fN/A - Status: z&[Discovery SERP] Error parsing result u:   [Discovery SERP] ✓ SERP API search completed. Processed z out of z available results, returned z for analysisz)[Discovery SERP] SERP API search failed: Texc_info)rS   rT   r:   r;   r<   rs   rV   rX   rO   
exceptionsRequestExceptionr\   
ValueErrorrU   striprZ   r[   	enumeratereplacefloatAttributeError	TypeErrorrY   upperr2   )re   rf   rg   rQ   rJ   search_queryserp_urlrr   r^   r_   	req_error
json_errorrt   alt_keyname_only_queryfallback_paramsfallback_responsefallback_datafallback_resultsfallback_erroridxr`   r   r|   r   r   	price_strprice_cleanresult_dictra   s                                 r*   search_serp_apir      s    GSZ]^**..0 LL_`LL[\N..LLXYLLqrN '6|nAFG 5 .	
		||HVRHH%%'==?D N d?LL78I7JKLN  88$6;  NNZ\ a d?tG}
 NNtv 3,."."4"4"6f"9,#*'O
 )1Xo_a(b%%668$5$:$:$<M'4'8'89KR'P$'&]^abr^s]t  uT  %U  V+;()rt
 %%5cr%:; 4	KC3 %jj9=%zz."=WFTVAW

7B/ 

#45= &

7B 7I )*-i.*@*@b*I*Q*QRWY[*\*d*dejln*o*w*wx{  ~A  +B  +H  +H  +JK$)+$6E $ $/".%*%*7 Ss{0;H-;F$780;H-;F$78,5K)7@K 34{+:37)2k]RUVbcfdfVgUhhr  INuxy~  @C  yD  tE  TY  sZ  Ze  fq  rz  f{  fA  fA  fC  eD  E  Fa4	l 	PQTUWY\]mYnQoPppxy|  ~N  zO  yP  Pm  nq  ry  nz  m{  {H  I  	J
 Ne ""33 	LLA#i.AQRSN 	LLKCPZOK\]^N	` ! fNN%NsSaObNc#deef* !+NIF )$(E)B  !GuBsSTvhWX  Z@QITXYYNZs  A!S' &0S' %S' =8O 5S' 9,S' &2S' S' -S' BQ S' /A1R,!A$RR,S' BR,AS' Q3"PQS' Q'"Q	Q
S' QS' 	R !RS' RS' R)%R,(R))R,,	S$5$SS' S$$S' '	T0#TTc                     t        j                         } | dk7  ryt        j                  d       g d}g }|D ]=  }	 t	        j
                  d|gdd      }|j                  dk7  r|j                  |       ? |r=t        j                  d	d
j                  |              t        j                  d       yd}	 t	        j
                  ddgdd      }|j                  dk(  rB|j                  j                         j                         }t        j                  d|        d}|sFg d}|D ]=  }	t         j"                  j%                  |	      s#t        j                  d|	        d} n |st        j                  d       y	 t	        j
                  ddgddd      }|j                  dk(  rK|j                  j                         r1t        j                  d|j                  j                                 yt        j                  d|j&                          t        j                  d       y# t        $ r |j                  |       Y w xY w# t        $ r,}t        j                  dt        |              Y d}~_d}~ww xY w# t        j(                  $ r t        j                  d       Y yt        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)zFEnsure required system dependencies are installed for Chrome on Linux.LinuxTzF[Chrome Dependencies] Checking Chrome and system utilities on Linux...)readlinkdirnamecatbasenamegrepsedawkwhichr1   capture_outputrP   r   u7   [Chrome Dependencies] ✗ Missing core shell commands: z, zZ[Chrome Dependencies] DevOps must install: sudo apt-get install -y coreutils grep sed gawkFgoogle-chromerI   u0   [Chrome Dependencies] ✓ Chrome found in PATH: z,[Chrome Dependencies] Could not check PATH: N)/usr/bin/google-chrome/usr/bin/google-chrome-stable/usr/bin/chromiumu+   [Chrome Dependencies] ✓ Chrome found at: uG   [Chrome Dependencies] ✗ Chrome NOT found - DevOps must install Chrome	--version
   r   textrP   u-   [Chrome Dependencies] ✓ Chrome executable: u3   [Chrome Dependencies] ✗ Chrome --version failed: zT[Chrome Dependencies] This indicates missing shell commands (readlink, dirname, cat)uT   [Chrome Dependencies] ✗ Chrome --version timed out (likely missing shell commands)u3   [Chrome Dependencies] ✗ Could not verify Chrome: )platformsystemrS   rT   
subprocessrun
returncoderY   r[   rs   joinstdoutdecoder   debugr\   r:   pathexistsstderrTimeoutExpired)
r   required_commandsmissing_commandscmdr`   chrome_foundchrome_pathra   pathsr   s
             r*   ensure_chrome_dependenciesr   L  s   __F
KKXY Y  )	)^^WcN4QRSF  A% '',	) NtyyYiOjNklmqr LN/ :4YZ[! --..0668KKKJ;-XYL
 ` 	Dww~~d#I$PQ#		 ^_+ >tZ^hjk!fmm&9&9&;KKGH[H[H]G^_`LLNv}}o^_LLopS  	)##C(	)   NCCF8LMMN4 $$ kl J3q6(STsO   :H3:A+I A4J ;7J 3II	J	!JJ	(K)6K)>!K$$K)r   )max_workersc                 ^   	 t        | d      ri| j                  r\| j                  }t        j                  j	                  |      r0t        j                  |d       t        j                  d|        yyyy# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY wz(Clean up Chrome session temporary files._user_data_dirTignore_errorsz,[Chrome Cleanup] Removed session directory: z$[Chrome Cleanup] Failed to cleanup: Nhasattrr   r:   r   r   shutilrmtreerS   rT   r[   rU   r\   driveruser_data_dirra   s      r*   cleanup_chrome_sessionr         H6+,1F1F"11Mww~~m,m4@J=/Z[ - 2G,
  H=c!fXFGGH   A2A8 8	B,!B''B,rR   c                 P    t        |       }|j                  j                  dd      S )zExtract domain name from URL.www.r}   )r	   netlocr   )rR   parseds     r*   get_domain_from_urlr     s"    c]F==  ,,r,   c                 :   	 t        | j                         j                               }|j                  j	                  dd      j                  d      d   }|j                  j                  d      }| | S #  | j                         j                         cY S xY w)z
    Normalize a URL for comparison purposes.
    Removes www, trailing slashes, query parameters to compare similar URLs.
    r   r}   :r   /)r	   lowerr   r   r   splitr   rstrip)rR   r   domainr   s       r*   normalize_url_for_comparisonr     s    
	##))+++-.&&vr288=qA{{!!#&$  #yy{  ""s   A5A8 8 Br   c                     	 t        j                  ddt        |       j                               }t	        |j                  dd            S # t        t        f$ r Y yw xY w)z Normalize price string to float.z[^\d.,]r}   r   r   N)resubr\   r   r   r   r   r   )r   cleans     r*   normalize_pricer     sS    z2s9~';';'=>U]]3,--' s   AA AAr   c                 Z    t        | |z
  d      }t        | dkD  r|| z  dz  ndd      }||fS )z5Calculate price difference and percentage difference.r1   r   d   )round)rg   r   
difference
percentages       r*   calculate_price_differencer    s<    s]*A.J37
S(3.1EJz!!r,   c                 "    |y|| k  ry|| kD  ryy)z6Determine compliance status based on price comparison.r   r   complainr    )rg   r   s     r*   determine_compliance_statusr    s$    s		r,   dbr   c                   K   ddl m} 	 ||r|sy|rt        t              j	                   |t        j
                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(              }| j                  |       d{   }	|	j                         j                         }
|
r&t        j                  d| d| d| d| d	| d
       y|rt        t              j	                   |t        j
                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(              }| j                  |       d{   }	|	j                         j                         }
|
r%t        j                  d| d| d|dd  d|        yy7 )7 M# t         $ r+}t        j#                  dt%        |              Y d}~yd}~ww xY ww)aV  
    Check if a violation record with MATCHING vendor already exists for this product/price.
    Prevents duplicates created by finding the same vendor from different sources
    (e.g., registered vendor scraping vs SERP API discovery).
    
    Considers it a duplicate if BOTH of these match:
    - product_name
    - barcode_number
    - msp
    - scraped_price
    - vendor_name (NOT url, since same vendor can be found via different URLs)
    
    This prevents duplicate violations for the same vendor selling at the same price.
    
    Returns True if duplicate found, False otherwise.
    r   )and_NFz [Duplicate] Vendor match found: z	 selling z (barcode: z) @ z (MSP: )Tz#[Duplicate] Exact URL match found: z), URL: r   	, Price: z[Duplicate Check] Error: )
sqlalchemyr	  r   r   wherebarcode_numberre   rg   r   r   executescalarsfirstrS   rT   rR   r[   rU   r\   )r  rR   r   rf   re   rg   r   r	  stmtr`   existingra   s               r*   check_duplicate_violationr    s    2  , | )$**,,7**l:MMS(++}<))[8D ::d++F~~'--/H>{m9UaTbbmnumvvz  |I  {J  JQ  RU  QV  VW  X  Y )$**,,7MMS(++}<**l:MMS(D ::d++F~~'--/HA,{[bZccklopsqsltkuu~  @M  N  O  P3 ,$ ,  23q6(;<sl   H	G H	BG $G%A	G .H	/BG GAG H	G G 	H!H<H	HH	r   min_gbc                 >   	 ddl } |j                  |       }|j                  dz  }||k  rt        j	                  d|dd| d       yt        j                  d	|dd
| d       y# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)z.Check if there's enough disk space for Chrome.r   Ni   @u%   [Disk Space] ✗ Insufficient space: r   zGB available, need GBFu   [Disk Space] ✓ Available: zGB (threshold: zGB)Tz)[Disk Space] Could not check disk space: )	r   
disk_usagefreerS   rs   rT   r[   rU   r\   )r   r  r   statavailable_gbra   s         r*   check_disk_spacer    s     v  &yyI.& LL@c@RReflemmopq2<2DOTZS[[^_` B3q6(KLs   AA( 
A( (	B1!BBc                     t        j                         } | dk7  ry	 t        j                  g ddd       t        j                  d       y# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)	z0Kill any orphaned Chrome/ChromeDriver processes.r   N)pkillz-9z-fchromeTrI   r   z2[Process Cleanup] Killed orphaned Chrome processesz,[Process Cleanup] Could not kill processes: )	r   r   r   r   rS   rT   r[   r   r\   )r   ra   s     r*   !cleanup_orphaned_chrome_processesr   /  sk    __FN6tUVWHI NCCF8LMMNs   /A 	B !A;;B headless	use_proxyc                    t               st        d      t        dd      st        d      t                t	               }| rw|j                  d       |j                  d       |j                  d       |j                  d	       |j                  d
       |j                  d       |j                  d       |rFt               }|r:t        |      }|j                  d|        t        j                  d|dd         |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d        |j                  d!       |j                  d"       |j                  d#       |j                  d$       |j                  d%       |j                  d&       |j                  d'       |j                  d(       |j                  d)       |j                  d*       |j                  d+       |j                  d,       |j                  d-       |j                  d.       |j                  d/       |j                  d0       |j                  d1       |j                  d2       |j                  d3       |j                  d4       |j                  d5       |j                  d6       |j                  d7       |j                  d8       |j                  d9       |j                  d:       |j                  d;       |j                  d<       |j                  d=       |j                  d>       |j                  d?       |j                  d@       |j                  dA       |j                  dB       |j                  dC       |j                  dD       |j                  dE       |j                  dF       |j                  dG       |j                  dH       |j                  dI       |j                  dJ       |j                  dK       |j                  dL       |j                  dM       |j                  dN       |j                  dO       |j                  dP       |j                  dQ       |j                  dR       |j                  dS       |j                  dT       |j                  dU       t        j                  t              }t        j                  dV|ddW  dX       |j                  dY|        |j                  dZd[g       |j                  d\d]       d^t        j                  d_<   d`t        j                  da<   dbdl}t#        |j%                               ddc }dd| }	 t        j&                  |def       t        j&                  dgdef       t        j&                  dhdef       |j                  dj|        t        j                  j-                  dk      xs t        j                  j-                  dl      }	|	rt        j                  dm|	        |	s=dnD ]8  }
t/        j0                  |
      }|s|}	t        j                  do|
 dp|	         n |	st3        j4                         }t        j                  dq| dX       |drk(  rdsdtg}n|duk(  rg dv}n|dwk(  rdxdyg}ng }|D ]=  }t        j6                  j9                  |      s#|}	t        j                  dz|	         n |	st        j+                  d{       |	rt        j                  d||	        	 t        j6                  j9                  |	      rt        j                  d}|	        t;        j<                  |	d~gdeded      }|j>                  dbk(  r1t        j                  d|j@                  jC                                 n=t        j+                  d|jD                          nt        jG                  d|	        d}	|	r |	|_%        t        j                  d|	        nt        j+                  d       	 t        j                  d       tM               jO                         }t        j                  d|        tQ        |      }t        j                  d       tS        jT                  ||      }t        j                  d       ||_+        |S # t(        $ r,}t        j+                  dit#        |              Y d}~Ad}~ww xY w# t:        jH                  $ r t        j+                  d       Y 5t(        $ r,}t        j+                  dt#        |              Y d}~dd}~ww xY w# t(        $ r}t        jG                  dt#        |       de       	 dtY               v rNt        j6                  j9                  |      r/t/        jZ                  |de       t        j                  d|         # t(        $ r Y  w xY wd}~ww xY w)zGInitialize and configure Chrome WebDriver with anti-detection measures.zOChrome dependencies check failed. DevOps must install Chrome via setup scripts./tmpr   )r  z;Insufficient disk space in /tmp. DevOps must free up space.z--headless=newz--window-size=1920,1080z--start-maximizedz--disable-gpuz--no-sandboxz--disable-setuid-sandboxz--disable-dev-shm-usagez--proxy-server=z[Chrome Init] Using proxy: Nrq   z---disable-blink-features=AutomationControlledz--disable-software-rasterizerz--incognitoz--disable-gpu-sandboxz--disable-extensionsz--disable-pluginsz--disable-plugins-discoveryz--disable-print-previewz4--disable-component-extensions-with-background-pagesz--no-default-browser-checkz--disable-background-networkingz--disable-syncz--disable-translatez--hide-scrollbarsz--metrics-recording-onlyz--mute-audioz--no-first-runz"--safebrowsing-disable-auto-updatez--disable-accelerated-2d-canvasz--no-zygotez%--disable-background-timer-throttlingz(--disable-backgrounding-occluded-windowsz --disable-renderer-backgroundingz>--disable-features=TranslateUI,IsolateOrigins,site-per-processz!--disable-ipc-flooding-protectionz--disable-default-appsz--password-store=basicz--use-mock-keychainz--disable-web-securityz --allow-running-insecure-contentz--disable-webglz--disable-threaded-animationz--disable-threaded-scrollingz!--disable-in-process-stack-tracesz--disable-histogram-customizerz--disable-gl-extensionsz!--disable-composited-antialiasingz--disable-canvas-aaz--disable-3d-apisz"--disable-accelerated-video-decodez#--disable-background-media-downloadz--disable-domain-reliabilityz(--disable-client-side-phishing-detectionz--disable-component-updatez--disable-hang-monitorz--disable-prompt-on-repostz3--force-fieldtrials=SiteIsolationExtensions/Controlz--disable-back-forward-cachez--disable-popup-blockingz --disable-session-crashed-bubblez--disable-infobarsz--disable-breakpadz--disable-crash-reporterz--disable-default-tracingz--disable-media-sessionz--no-service-autorunz--disable-audioz@--disable-features=VizDisplayCompositor,AudioServiceOutOfProcessz--disable-preconnectz--disable-client-hintsz--remote-debugging-port=9222z"--remote-debugging-address=0.0.0.0z--crash-dumps-dir=/tmpz--data-path=/tmp/chrome-dataz"--disk-cache-dir=/tmp/chrome-cachez--disable-loggingz--disable-logging-redirectz--log-level=3z--single-processz [Chrome Init] Using user agent: 2   ...z--user-agent=excludeSwitcheszenable-automationuseAutomationExtensionFz:99DISPLAYz	/dev/nullDBUS_SESSION_BUS_ADDRESSr      z/tmp/chrome-session-T)exist_okz/tmp/chrome-cachez/tmp/chrome-dataz*Could not create Chrome temp directories: z--user-data-dir=
CHROME_BINGOOGLE_CHROME_BINz3[Chrome Detection] Found via environment variable: )r   zchromium-browserchromiumr  z.[Chrome Detection] Found in PATH using 'which z': z-[Chrome Detection] Checking common paths for Windowsz5C:\Program Files\Google\Chrome\Application\chrome.exez;C:\Program Files (x86)\Google\Chrome\Application\chrome.exer   )r   r   z/usr/bin/chromium-browserr   z/opt/google/chrome/chromez/snap/bin/chromiumDarwinz</Applications/Google Chrome.app/Contents/MacOS/Google Chromez2/Applications/Chromium.app/Contents/MacOS/Chromiumz"[Chrome Detection] Found at path: z3[Chrome Detection] Chrome not found in common pathsz+[Chrome Verification] Verifying Chrome at: u)   [Chrome Verification] ✓ File exists at r   rI   r   u0   [Chrome Verification] ✓ Chrome is executable: u1   [Chrome Verification] ⚠ Chrome returned error: u*   [Chrome Verification] ✗ File not found: u4   [Chrome Verification] ⚠ Chrome --version timed outu3   [Chrome Verification] ⚠ Could not verify Chrome: z#[Chrome Init] Using Chrome binary: zL[Chrome Init] Chrome binary not found. Selenium will attempt auto-detection.zK[Chrome Init] Installing/verifying ChromeDriver with ChromeDriverManager...z%[Chrome Init] ChromeDriver ready at: z,[Chrome Init] Creating Selenium WebDriver...)serviceoptionsu4   [Chrome Init] ✓ WebDriver initialized successfullyu2   [Chrome Init] ✗ Failed to initialize WebDriver: r   r   r   z)[Chrome Cleanup] Removed temp directory: ).r   RuntimeErrorr  r   r   add_argumentrA   rE   rS   rT   r%   r>   GOOGLE_USER_AGENTSadd_experimental_optionr:   r;   uuidr\   uuid4makedirsr[   rU   r<   r   r   r   r   r   r   r   r   r   r   r   r   rs   r   binary_locationr   installr   r   Chromer   localsr   )r!  r"  r3  	proxy_url
user_agentr8  unique_session_idr   ra   r   r   foundr   possible_pathsr   r`   chromedriver_pathr2  r   s                      r*   initialize_selenium_driverrE  =  s	    &'lmmF1-XYY &'iG-.6701_-^,7867 "$	.y9I  ?9+!>?KK5in5EFG HI89'01/0,-6723OP56:;)*./,-34()*=>:;'@ACD;<YZ<=1212./12;<*+7878<=9:23<=./,-=>>?78CD561256NO7834;<-. -.344523/0*+[\/012 78=>1278=>,-56) +, 12J
KK2:cr?2C3GH=56##$58K7LM##$<eD "BJJy-8BJJ)* DJJL)"1-*+<*=>MN
MD1
'$7
&6 +M?;< **...U"**..AT2UKI+WX N 	CLL%E#LSEQTU`Tabc	 "CF83OPYHNN wN xNDN
  N" 	Dww~~d#"@NO		 NNPQ A+OP	[ww~~k*G}UV#k(BSW^blmn$$)KK"RSYS`S`SfSfShRi jkNN%VW]WdWdVe#fgI+WX" "-9+GHefab/199;;<M;NOP+,BC!!'7CJK -}  NCCF8LMMNN (( 	SNNQR 	[NNPQTUVQWPXYZZ	[.  	I#a&R]ab	&(*rww~~m/Lm4@GWX 	  			sv   Af( Cg  Bi (	g1!gg (h?h?!h::h?	k#k/Ak
	k
	kkkkkc                 ^   	 t        | d      ri| j                  r\| j                  }t        j                  j	                  |      r0t        j                  |d       t        j                  d|        yyyy# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY wr   r   r   s      r*   r   r   ,  r   r   c                 z   g d}|D ]`  }	 | j                  t        j                  |      }|D ]8  }|j                         s|j	                          t        j                  d       : b 	 | j                  j                  j                  t        j                         y # t        $ r Y w xY w# t        $ r Y y w xY w)N)zbutton.cookie-acceptzbutton#cookie-acceptzbutton.acceptzbutton.btn-acceptzbutton[aria-label='Close']zbutton.closez.modal button.closez.popup button.closez.cookie-consent buttonzdiv#cookie-consent buttonzbutton[title='Close']333333?)find_elementsr   CSS_SELECTORis_displayedclickr'   r(   r[   	switch_toactive_element	send_keysr   ESCAPE)r   	selectorsselelemsels        r*   dismiss_overlaysrU  9  s    I  	((#>E $??$HHJJJsO$''11$++>  		  s)   5B'B+3B. 	B+*B+.	B:9B:r   c                    g d}|D ]x  \  }}}	 |dk(  r!| j                  t        j                  |      }n | j                  t        j                  |      }|j	                         r|j                         sp	 |j                          t        dd       	 |j                          |j                  |       t        dd       d}	 |j                  t        j                         d}|s	 |j                          d}|ssg d
}
|
D ]j  \  }}	 | j                  t        j                  |      }|rB|d   j	                         r/|d   j                          t        j!                  d|        d} nl |rt        dd        y{ y#  Y xY w#  Y xY w# t        $ r
}	Y d	}	~	d	}	~	ww xY w# t        $ r
}	Y d	}	~	d	}	~	ww xY w# t        $ r Y w xY w# t        $ r}	Y d	}	~	d	}	~	ww xY w)z7Find and interact with search input on vendor websites.)	)xpathz//input[@type='search']zsearch input type)rW  zs//input[@placeholder[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'search')]]zsearch placeholder XPath)rW  zN//input[@name='search' or @name='q' or @name='keyword' or @name='searchInput']znamed search input)csszinput[type='search']zCSS search input)rX  z0[placeholder*='search'], [placeholder*='Search']zCSS search placeholder)rX  z:input[placeholder*='buscar'], input[placeholder*='Buscar']zSpanish buscar)rW  z//form//input[1]zfirst form input)rW  z&//input[@class[contains(., 'search')]]zsearch class input)rX  z/input.search, input.search-box, input.searchboxzcommon search classesrW  rH  g333333?皙?g333333?FTN))z//button[contains(., 'Search')]zSearch button text)z//button[contains(., 'Buscar')]zBuscar button text)//button[@type='submit']zsubmit button)//input[@type='submit']zsubmit input)$//button[contains(@class, 'search')]zsearch class buttonr   z[Search] Submitted with r1      )find_elementr   XPATHrJ  rK  
is_enabledrL  r+   clearrO  r   RETURNr[   submitrI  rS   rT   )r   r   r   search_selectorsselector_typeselectordescriptionelementsubmit_successra   button_selectorsbtn_selectorbtn_descbuttonss                 r*   find_and_searchrn  V  s   
 1A I,xH	' --bhhA --booxH '')1C1C1EC%

 l+S! #N!!$++.!%
 "NN$%)N
 "$  /? 	!*L(!"("6"6rxx"N"wqz'>'>'@#AJ,,."KK*B8*(MN-1N!	! Aq!MIV y
   ! ( % ! !  		s   A&G8FF%G!F&G)F(;G
A"F>,GFGFG	F%G F%%G(	F;1G6F;;G>	G
G	G

G	G!G!page_sourcec                    t        | d      }d}|j                  dd      }|D ]P  }	 |j                  st        j                  |j                        }fd |      }|rd|cxk  rdk  rn n|c S R |sR|j                  d	d
i      }|r<|j                  d      xs |j                  }t        |      }|rd|cxk  rdk  r|}|S  |s|j                  d      }	t        j                  d|	      }
g }|
D ]0  }t        |      }|sd|cxk  rdk  sn  |j                  |       2 |r0t        t        t!        |                  }|t#        |      dz     }|S |S # t
        $ r Y Gw xY w)z2Extract price from HTML using multiple strategies.html.parserNscriptzapplication/ld+json)typec                    t        | t              rw| j                  d      dk(  rd| v r	 t        | d         S d| v r(| d   }t        |t              rd|v r	 t        |d         S | j                         D ]  } |      }|s|c S  y t        | t              r| D ]  } |      }|s|c S  y # t        t
        f$ r Y w xY w# t        t
        f$ r Y pw xY w)Nz@typeOfferr   offers)
isinstancedictr<   r   r   r   valueslist)objrv  valuer`   item
find_prices        r*   r~  z+extract_price_from_html.<locals>.find_price  s    c4(www'72w#~!#(W#66  3!$X%fd368I%',VG_'= = "% *!+E!2!#)M*   T* # *!+D!1!#)M* % !+I6 ! ! %/	#: % $%s#   B/ C /C CCC      ?g     @itempropr   )attrscontentry   \d+[.,]\d{2}r1   )r   find_allstringrO   loadsr[   findr<   r   r   get_textr   findallrY   sortedrz  setrZ   )ro  soupr   scriptsrr  r_   	price_tagr  
normalizedr   matchespricesmr~  s                @r*   extract_price_from_htmlr    s   m4DM mmH+@mAG $#	==::fmm,D2 't,M!?!?$$C$N IIZ$9I:	mmI.@)..G(1JcZ969 *$$ :
 }}S!**_d3 	*A(+JcZ969j)	*
 DV-.F"3v;!#34M  ;  		s   E =E  	E-,E-max_retriesc                 z   t        |      D ]^  }	 t        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  d	ft        j                  d
ft        j                  dfg}d}|D ]S  \  }}	 | j                  ||      }|D ]2  }		 |	j                         r|	}t        j                  d| d|         n4 |r nU |s9t        j                  d       t        j                  dt        |       d        y	 |j                          t        dd       	 |j                          |j                  |       t        j                  d|        t        dd       d}
	 |j                  t         j"                         t        j                  d       d}
|
sF	 |j                  t         j$                  t         j"                         t        j                  d       d}
|
spg d}|D ]g  }	 | j                  t        j                  |      }|rB|d   j                         r/|d   j                          t        j                  d|        d}
 ni |
s(	 |j)                          t        j                  d       d}
|
sL	 |j+                  t        j                  d       }|r)| j-                  d!|       t        j                  d"       d}
|
s,t        j                  d#       ||d$z
  k  rt/        |       Q yt        d%d&        y y#  Y xY w#  Y xY w#  Y xY w#  Y xY w#  t        j                  d       Y xY w#  Y jxY w# t&        $ r Y qw xY w# t&        $ r Y w xY w# t&        $ r Y w xY w# t&        $ rO}t        j1                  d'|d$z    d(| d)t3        |              ||d$z
  k  rt/        |       nY d}~ yY d}~)d}~ww xY w)*z:Search on Google with retry logic and exponential backoff.ro   ztextarea[name='q']zinput[name='q']z//textarea[@name='q']z//input[@name='q']z[aria-label='Search']zinput[aria-label='Search']z//*[@aria-label='Search']z[role='searchbox']z$//*[@role='combobox' and @aria-owns]zbody input[type='text']Nz#[Discovery] Found search box using r   z>[Discovery] Could not find Google search box with any selectorz[Discovery] Attempted z selector combinationsFrH  gffffff?z[Discovery] Typed query: rY        ?z[Discovery] Pressed EnterTz6[Discovery] Enter key failed, trying alternate methodsz[Discovery] Pressed Ctrl+Enter)z%//button[@aria-label='Google Search']z%//button[@aria-label='google search']z)//button[contains(@aria-label, 'Search')]z$//button[contains(text(), 'Search')]z2//input[@type='submit' and @value='Google Search']r[  rZ  r\  z@//button[contains(@class, 'btn') and contains(@class, 'search')]//button[@jsaction]z'//button[contains(@data-ved, '0ahUKE')]z///div[@role='button' and contains(., 'Search')]z1//span[contains(text(), 'Search')]/parent::buttonz1//span[contains(text(), 'Buscar')]/parent::buttonr   z2[Discovery] Clicked search button using selector: z[Discovery] Submitted formzancestor::formzarguments[0].submit();z)[Discovery] Submitted form via JavaScriptz4[Discovery] Could not submit search using any methodr   r]  rI   z0[Discovery] Error during Google search (attempt r   z): )ranger   NAMErJ  r_  rI  rK  rS   rT   rU   r   rZ   rL  r+   ra  rO  r   rb  CONTROLr[   rc  r^  execute_scriptr5   rs   r\   )r   rF   r  r-   rd  
search_boxre  selector_valueelementsrh  ri  rj  rk  rm  formra   s                   r*   search_googler    s5   % QP	 #"67"3423/0"9:">?67"67AB";< " J1A -~%33M>RH#+ %%&335-4
 &.QR_Q``bcqbr,s t %  6% " "  _`5c:J6K5LLbcd  "C%
  "
   'KK3E7;<S! #NW$$T[[178!%
 "((t{{CKK @A%)N
 "$   %5 	!L!"("6"6rxx"N"wqz'>'>'@#AJ,,."KK*\]i\j(kl-1N!	! "%%'KK <=%)N
 "%22288=MND--.FM$OP)- "UV[1_,-g61UQf k%$
WUV8 % ! ! !  !   	LLKGVWK=XYZeYffijmnojpiqrsq()'2 3	s  CO"M71-M/M7$=O"$M?N7O"	6N?O"AN,O"A"N45O"<'O#O"&AO1*O"O"/M4	1M77M<9O"?NO"N	O"N)&O",N1.O"4	O=O" OO"	OO"OO"	OO"OO""	P:+>P55P:registered_domainsc                 L   g }|  d|dd  d}t         j                  d|        t         j                  d|        t         j                  d       d}	 t        d	      }d
dg}|D ]  }	 t         j                  d| d       |j                  |       t	        dd       d|j
                  v st        |j                        dkD  rUt         j                  d|        	 |j                  d       t	        dd       |j                  d       t	        dd        n 	 g d}
|
D ]~  }	 |j                  t        j                  |      }|D ]V  }	 |j                         rC|j!                         r3|j#                          t         j%                  d       t	        dd        nX  	 t         j                  d|        t'        ||d      }|j(                  j+                         |j                  j+                         g d}t-        fd|D              }|rt         j                  d        d}|rI|sF	 |j                  }t/        |d!      }g }|j1                  d"d#$      }t         j                  d%t        |       d&       |D ]  }|j                  d'd(      j3                  d)      s'd*j+                         vs:t-        fd+d,D              rO	 t5              j6                  j9                  d-d(      }|r-||vr)|j;                         t         j%                  d.|         t=        t?        |            dd0 }t         j                  d1t        |       d2       tA        |d3      D ]  \  }}	 t         j                  d4| d5t        |       d6|dd7         |j                  |       	 tC        |d8      jE                  d9        t	        d:d;       |j                  tG              }|r\tI        |      }tK        ||      \  }}tM        ||      }|j;                  d|||||||d<       t         j                  d=| d>|        nt         j%                  d?|        t	        ddA        	 |r	 tQ        |       |jS                          |rr{t         j                  dE       tU        |      }|rCd}	 t        d	      }tA        |d3      D ]  \  }}	 t         j                  dF| d5t        |       d6|dd7         |j                  |       	 tC        |d8      jE                  dG        t	        d:d;       |j                  tG              }|r\tI        |      }tK        ||      \  }}tM        ||      }|j;                  d|||||||d<       t         j                  d=| dH|        nt         j%                  d?|        t	        ddA        	 |r2	 tQ        |       |jS                          nt         j                  dK       t         j                  dLt        |       dM       |S # t        $ r Y 3w xY w# t        $ r/}	t         j                  d| dt        |	              Y d}	~	d}	~	ww xY w#  Y FxY w#  Y txY w# t        $ r,}	t         j%                  dt        |	              Y d}	~	%d}	~	ww xY w# t        $ r,}	t         j%                  d/t        |	              Y d}	~	yd}	~	ww xY w#  Y ;xY w# t        $ r/}	t         j                  d@| dt        |	              Y d}	~	d}	~	ww xY w# t        $ r,}	t         jO                  dBt        |	              Y d}	~	 d}	~	ww xY w# t        $ r.}	t         jO                  dCt        |	       d#D       Y d}	~	:d}	~	ww xY w#  Y )xY w# |r$	 tQ        |       |jS                          w #  Y w xY ww xY w#  Y xY w# t        $ r/}	t         j                  dI| dt        |	              Y d}	~	Vd}	~	ww xY w# t        $ r,}	t         j                  dJt        |	              Y d}	~	~d}	~	ww xY w#  Y WxY w# |r$	 tQ        |       |jS                          w #  Y w xY ww xY w)Nz
    Discover alternative vendors via search (Google/Bing) and scrape their prices.
    Uses multiple fallback strategies when Google blocks the request.
    ry   Nr   z precio comprar onlinez([Discovery] Starting search with query: z+[Discovery] Registered domains to exclude: z3[Discovery] Strategy 1: Attempting Google Search...F)r"  zhttps://www.google.eszhttps://www.google.comz[Discovery] Loading r&  r1   g      @Googlei  u   [Discovery] ✓ Loaded z+window.scrollTo(0, window.innerHeight / 4);      ?r  zwindow.scrollTo(0, 0);z[Discovery] Could not load r   )z //button[contains(., 'Aceptar')]z//button[contains(., 'Accept')]z//button[contains(., 'all')]r  z$[Discovery] Dismissed cookie consentz&[Discovery] Cookie dismissal skipped: z*[Discovery] Attempting Google search for: )r  )
sorry	recaptcharobot	automatedblockedzunusual trafficverifycaptchapuzzle	challengec              3   2   K   | ]  }|v xs |v   y wNr  ).0	indicatorcurrent_urlro  s     r*   	<genexpr>z/discover_alternative_vendors.<locals>.<genexpr>  s$     vV_Y+5Qk9QQvs   u3   [Discovery] ⚠ CAPTCHA or block detected on Googlerq  aT)hrefz[Discovery] Found z total links on pager  r}   httpgooglec              3   &   K   | ]  }|v  
 y wr  r  )r  xr  s     r*   r  z/discover_alternative_vendors.<locals>.<genexpr>  s!       \Ojk\]ae\e  \Os   )z/ads/z/aclkwebcacher   z[Discovery] Found link: z [Discovery] Error parsing link: rI   z[Discovery] Will scrape z  unique vendor links from Googler   z[Discovery r   z] Scraping: r   rN   c                 *    | j                  d      dk(  S Nzreturn document.readyStatecompleter  ds    r*   <lambda>z.discover_alternative_vendors.<locals>.<lambda>      !*:*:;W*X\f*f r,   r        @	vendor_idr   r   r   rg   r   price_differencepercentage_differenceu   [Discovery] ✓ Price z
 found at z[Discovery] No price found at z[Discovery] Error scraping    z*[Discovery] Error parsing Google results: z#[Discovery] Google strategy error: r   zN[Discovery] Strategy 2: Google failed/blocked, attempting Tavily API Search...z[Discovery-Tavily c                 *    | j                  d      dk(  S r  r  r  s    r*   r  z.discover_alternative_vendors.<locals>.<lambda>Q  r  r,   z found via Tavily at z"[Discovery] Tavily error scraping z#[Discovery] Tavily scraping error: z*[Discovery] Tavily API returned no resultsu0   [Discovery] ✓ Completed all strategies. Found  alternative vendors)+rS   rT   rE  r<   r+   r   rZ   ro  r  r[   rU   r\   rI  r   r_  rK  r`  rL  r   r  r  r   anyr   r  rD   r	   r   r   rY   rz  r  r   r   untilr  r   r  r  rs   r   quitrb   ) rf   re   rg   r  discovered_vendorsr   r   google_urls
google_urlra   dismiss_buttonsrf  rm  btngoogle_successcaptcha_indicatorsgoogle_blockedhtmlr  result_links	all_linksr~   r   r   
disc_pricediff	perc_diffr   	api_linksr  r  ro  s                                 @@@r*   discover_alternative_vendorsr    s   
 YaSb 122HIL
KK:<.IJ
KK=>P=QRS KKEFF[+e< $$

 & 	J2:,cBC

:&As# v||+s63E3E/F/MKK"9* FG--.[\#C---.FG#C-  0N	.	LO , $22288XFG& !!"//1cnn6F #		 &-S T +C 5 %!$ 	@OP&v|K ((..0((..0

 vcuvvNNPQ"N .DT))$T=9  " MM#DM9	0Y0@@TUV% 
VD88FB/Dv.84::<3OX[  \O  pN  \O  YOV%-d^%:%:%B%B62%NF%&8J*J , 3 3D 9 &/Gx-P Q
V  $C$56r:6s<7H6IIijk "+<!; '&IC$Wk#aL8I7J,W[\`]`WaVb$cd

4(!)&"5;; f $C-&,&8&8%<[%I
%%8%>F.Hj.YOD)%@j%QF.55-1/5.21;'**0489B	7 	 #KK*@JW]V^(_`"LL+I$)PQ
  1%O'&^ &v.
 de%l3	F33eD!*9a!8 '&IC$^&8Qs9~>Nl[_`dad[eZf$gh

4(!)&"5;; f $C-&,&8&8%<[%I
%%8%>F.Hj.YOD)%@j%QF.55-1/5.21;'**0489B	7 	 #KK*@Labhai(jk"LL+I$)PQ
  1%O'&X .v6 NNGH
KKB3GYCZB[[opqI %   !<ZL3q6(ST,!  	LLLA#a&JKK	LT  ) V"LL+KCPQF8)TUUV"! 4 % W)DTF"SQRVH'UVVW
  TI#a&RSST  T:3q6(CdSST &v. 2! 4 % ^)KD6QSTWXYTZS['\]]^
  O!DSVHMNNO .v6 s0  ^ /A4Y$:Y^ #	Z  -%ZAZ$Z(Z  *B^ >A7] 6] 	] A[3A] ;\>\B\3] ^ _ `; ,;` (_8B` `; ,a; 0a3 	YYYY	Z$Z^ Z^ ZZZZ   	[)![
^ [^ 	\!!\] \] \\	]!$]] ]] 	^!^=^ ^^ 	_#^=7_ =__ _
_5_-,_5-_1/_58_=:`  	`8	$`3-`; 3`88`; ;	a0!a+%a; +a00a; 3a8;b#?bb#bb#c                 
   g }t         j                  dd        t         j                  d       t         j                  d|  d|        t         j                  d|        t         j                  dt        |              t         j                  d d       	 t        || |      }t         j                  d	t        |       d
       |rnt         j                  d       t	        |dd d      D ]G  \  }}|j                  dd      }|j                  dd      }	t         j                  d| d| d|	        I |st         j                  d       |S t         j                  dt        |       d       t	        |d      D ]  \  }}	 |j                  dd      }
|j                  dd      }|j                  dd      }|j                  d      }|j                  dd      }|j                  dd      }|
st         j                  d | d!| d"       ||v rt         j                  d#| d$| d%       d}d&}|D ]H  }|j                         |j                         v s!|j                         |j                         v sD|}d'} n |rt         j                  d#| d(| d)       t         j                  d*| d+       t         j                  d,|j                          d-|j                          d.|j                          d-|j                          d/	       t         j                  d0| d(| d1       t         j                  d2t        |       d3|        ||
||||||t        j                         d4	}|rT||z
  }||d5<   |d6kD  r||z  d7z  nd6|d8<   t         j                  d9| d$| d:|
dd;  d<|d=d> |j                          
       n3t         j                  d9| d$| d:|
dd;  d? |j                                 |j                  |        t         j                  dAt        |       dB       t        |      t        |      z
  }t         j                  dCd        t         j                  dD       t         j                  dEt        |              t         j                  dFt        |              t         j                  dG|        t         j                  dHd d       |S # t        $ r/}t         j                  d@| d$t        |              Y d}~d}~ww xY w# t        $ r,}t         j                  dIt        |              Y d}~|S d}~ww xY w)Ja  
    Discover alternative vendors using SERP API Google Shopping Light Engine.
    Queries SERP API for shopping results and extracts vendor information with prices.
    Sequence: First vendors scraped, then SERP API used for discovery.
    
    Args:
        barcode: Product barcode
        product_name: Product name
        msp: Minimum Selling Price
        registered_domains: Set of already registered vendor domains to exclude
    
    Returns:
        List of discovered vendor dictionaries with price information
    
P================================================================================u-   [Discovery SERP] 🔍 STARTING SERP DISCOVERYz[Discovery SERP] Product: r   z0[Discovery SERP] Registered domains to exclude: z/[Discovery SERP] Number of registered domains: )re   rf   rg   u0   [Discovery SERP] ✓ search_serp_api() returned z total results to processz/[Discovery SERP] Sample vendor names from SERP:Nr]  r   r   r{   r   r   z  [z] r   z-[Discovery SERP] SERP API returned no resultsz[Discovery SERP] Processing z SERP API results
r   r}   r   rz   r   r   r   z![Discovery SERP] Skipping result  (z	): no URLu   [Discovery SERP] ⊘ Result r   z' - FILTERED (vendor already registered)FTz: 'z' - FILTEREDz;           Reason: Substring match with registered domain 'rk   z           Logic: 'z' in 'z' OR 'z' = TRUEr   z' PASSESz$           Reason: No match against z registered domains: )	r   r   r   r   rg   source_typer   r   violation_dater  r   r   r  z[Discovery SERP] Result z - URL: r   u    - Price: ₹r   r   z - Price: N/A - Status: z)[Discovery SERP] Error processing result u    [Discovery SERP] ✓ Discovered z! alternative vendors via SERP APIz
[Discovery SERP] z#[Discovery SERP] FILTERING SUMMARY:z0[Discovery SERP]   Total SERP results received: u5   [Discovery SERP]   ✓ Passed filters (new vendors): u6   [Discovery SERP]   ⊘ Filtered (registered vendors): z[Discovery SERP] z3[Discovery SERP] Critical error in SERP discovery: )rS   rT   rZ   r   r   r<   rU   r   r   r   nowr   rY   r[   r\   rs   )rf   re   rg   r  r  rQ   r   r`   vendorr   r   r   r   r   r  r   matching_domainis_registeredr   discovery_dict
price_diffra   filtered_counts                          r*   !discover_alternative_vendors_serpr    s     KK"VH
KK?A
KK,WISGH
KKBCUBVWX
KKA#FXBYAZ[\
KK6("mU "|WRUVFs7|nTmnoKKIK(!a8 DVM9=

?E:c#b
5'BCD
 NNJK%%23w<.@STU %Wa0 K	KCJ#ZZb9
$jj	B &

?B ? &

? ;$jj3LMHi8!LL#DSEK=Xa!bc "44KK">se2k]Ry z{ #' %0 F"((*flln<R]RcRcRe@e*0(,	 !KK">se3{mS_ `aKK"]^m]nno pqKK"5k6G6G6I5J&Q`QfQfQhPiiop  qF  qF  qH  pI  IO  P[  Pa  Pa  Pc  Od  dl  !m  nKK">se3{mS[ \]KK"FsK]G^F__t  vH  uI  !J  K $/",%2%2#.$)/&.lln
" !!$}!4J9CN#56Z]`aZazC?ORU?UghN#:;KK23%r+ G"",Sb/!2 3''4S&9 :%%1V\\^$46 KK23%r+ G"",Sb/!2 3%%1V\\^$46 #)).9OK	Z 	6s;M7N6OOpqr W,>(??)&239;Fs7|nUVKCPbLcKdefL^L\]^'xr23
 %  !J3%rRUVWRXQYZ[  UJ3q6(STTUsx   B8T, 5T, B	S1T,  S16T, 7A	S1BS1T, C8S1CT, 1	T):$T$T, $T))T, ,	U!5!UU!productr  timeout_secondsc           	      \   	 t         j                  d|j                   d|j                   d       t         j                  d       |j	                  |j                         	 t        ||      j                  d        t        |       t        dd	       t         j                  d
|j                          t         j                  d| j                   d       t        || j                  |j                        }|r"t         j                  d       t        dd       nt         j                  d       t         j                  d| j                   d       t        || j                  |j                        }|r"t         j                  d       t        dd       n!t         j                  d       t        dd       	 t        |       |r2	 t        |d      j                  d        t         j                  d       |j                  }t!        |      }|r't         j                  d| d|j                          |S t         j                  d|j                          t         j                  d|j                          t         j                  d|j"                          y#  t         j                  d| d       Y DxY w#  Y xY w#  t         j                  d       Y xY w# t$        $ r8}t         j'                  d|j                   dt)        |              Y d}~yd}~ww xY w) z?Scrape price from vendor website with improved search strategy.z[Vendor] ===== Scraping r  z) =====z[Vendor] Loading website...c                 *    | j                  d      dk(  S r  r  r  s    r*   r  z'scrape_vendor_website.<locals>.<lambda>  s    !**+GHJV r,   z![Vendor] Page load timeout after zs, continuing anyway...r  r  z[Vendor] Page loaded. Title: z)[Vendor] SEARCH ATTEMPT 1: Product name 'rk   u*   [Vendor] ✓ Product name search succeededr   r1   z6[Vendor] Product name search failed. Trying barcode...z$[Vendor] SEARCH ATTEMPT 2: Barcode 'u%   [Vendor] ✓ Barcode search succeededzE[Vendor] Barcode search also failed. Trying to scrape current page...rI   c                 B    t        j                  d| j                        S )Nr  )r   searchro  r  s    r*   r  z'scrape_vendor_website.<locals>.<lambda>C  s    biiG r,   z'[Vendor] Price pattern detected on pagez)[Vendor] No price pattern found within 5su    [Vendor] ✓✓✓ PRICE FOUND: z from u%   [Vendor] ✗ No price extracted from z[Vendor] Page title: z[Vendor] Current URL: Nu    [Vendor] ❌ Scraping error for r   )rS   rT   namewebsite_urlr<   r   r  rU   rU  r+   r   re   rn  rf   r   ro  r  r  r[   rs   r\   )r  r  r   r  search_successro  r   ra   s           r*   scrape_vendor_websiter    s   H.v{{m2f>P>P=QQXYZ 	13

6%%&	i&/288V 	 C3FLL>BC 	?@T@T?UUVWX(1E1Ev{{SKKDF1 NNSUKK>w>OqQR,VW__fkkRNCEAq!!fhAq!	V$
 Kfa(..G FH
 ((/<KK:=/PVP[P[}]^  NNB6;;-PQLL0?@LL1&2D2D1EFGq	iNN>>OOfgh>	KHJ  7}Bs1vhOPsh   A K* #J% ?D?K* ?K 
K* 1K >?K* >A&K* %K K* KK* K'%K* *	L+3.L&&L+c                       e Zd ZdZe	 	 	 ddedededededefd       Z	ed	e
d
ee   dededeee   ee   f   f
d       Ze	 	 	 ddedededededefd       Zy)ScraperServicez1Service for scraping product prices from vendors.r  
product_idenable_discoveryheadless_moder  r6   c                   K   | j                  t        t              j                  t        j                  |k(               d{   }|j                         j                         }|st        t        j                  d| d      | j                  t        t              j                  t        j                  dk(               d{   }|j                         j                         }|st        t        j                  d      g }	g }
t               }t               }|D ](  }t        |j                         }|j#                  |       * 	 t%        j&                  t(        j*                  ||||      }| d{   \  }	}
|rt,        j/                  d|        	 t               }|	D ]0  }|j1                  dd	      }t3        |      }|j#                  |       2 t,        j/                  d
t5        |       d       d}g }d}||k  r|dz  }t,        j/                  d| d|        t%        j&                  t6        |j8                  |j:                  t=        |j>                        |       d{   }t,        j/                  d| dt5        |       d       |jA                  |       t,        j/                  dt5        |       d       ||k\  rt,        j/                  d| d       n||k  rt,        j/                  dt5        |       d       |D ]  }d|vsd|d<    |	jA                  |       nt,        j/                  d|        t               }g }g }|	D ]  }|j1                  dd	      }|j1                  dd      }|j1                  d       }|j1                  d!      }||j:                  ||rtI        |d"      ndf}||vrU|j#                  |       |jK                  |       t,        j/                  d#| d$|dd%  d&|j:                   d'| d(| d)       |jK                  |       t,        j/                  d*| d$|dd%  d&|j:                   d'| d(| d+        |}	t,        j/                  d,       t,        j/                  d-t5        |	      t5        |      z           t,        j/                  d.t5        |	              t,        j/                  d/t5        |              t,        j/                  d0tM        d1 |	D                      t,        j/                  d2tM        d3 |	D               d4       t               } |	D ]`  }|j1                  d5      }!|d   }|d   }"|d    }|d!   }|d6   }#|j1                  d7      }$|j1                  d8      }%|!rd9}&n|j1                  d      d:k(  rd:}&nd}&t        |"      }d}'|!r	 tO        |!tF              rtQ        |!      n|!}'tY        |j                  |'|j:                  |j8                  tG        |jZ                        ||"||||$|%|#|&<      }(| j#                  |(       |#d=k(  s||j:                  |tI        |d"      f})|)| v r-t,        j/                  d>| d$|j:                   d?|"dd@         Gt]        | |"||j8                  |j:                  ||A       d{   }*|*s|'rt        |"      }+n|}+t_        |'||j:                  |||$|%|+ta        jb                         |"|j8                  tG        |jZ                        |&B      },| j#                  |,       | j#                  |)       |j#                  |)       t,        jW                  dC| d$|j:                   dD| dE| d)	       9t,        j/                  dF| d$|j:                   dD|        c ta        jb                         |_2        ta        jb                         jg                         }-|-|_4        | jk                          d{    | jm                  |       d{    g }.|
D ]  }/|/j1                  dd	      }"|/j1                  dd      }t=        |/j1                  d d            }|/j1                  d!|j>                        }||j:                  |tI        |d"      f})|)|v r?|.jK                  |/       t,        j/                  dG|/j1                  d       d$|"ddH         t,        j/                  dI|/j1                  d       d$|"ddH          t,        j/                  dJt5        |	       dKt5        |.       dL       ||j:                  t=        |j>                        t5        |	      t5        |      |	|.|-dM}0|0S 7 	67 7 7 # tB        $ r.}t,        jE                  dtG        |       d       Y d}~gd}~ww xY w# tR        tT        f$ r t,        jW                  d;|!        d}'Y w xY w7 57 7 # tB        $ rf}1t,        jE                  dNtG        |1              | jo                          d{  7   t        t        jp                  dOtG        |1             d}1~1ww xY ww)Pa  
        Scrape a product against all registered vendors.
        
        Args:
            db: Database session
            product_id: Product ID to scrape
            enable_discovery: If True, also discover vendors via Google (disabled by default)
            headless_mode: Run browser in headless mode
            timeout_seconds: Timeout for page load
            
        Returns:
            Dictionary with scraping results
        NProduct with ID 
 not foundstatus_codedetailTNo active vendors foundz/[Scraper] Google discovery ENABLED for product r   r}   z[Discovery] Starting with z vendor URLs to check againstr]  r   r   z[Discovery] Attempt r    completed. Found r  z[Discovery] Added  vendor URLs to storagez[Discovery] Completed 	 attemptsz[Discovery] COMPLETE: Found z total unique discovery URLssource_type_hint
discoveredz([Discovery] Exception during discovery: r   z0[Scraper] Google discovery DISABLED for product r   r{   r   rg   r1   u   [Dedup] ✅ KEPT: r   F    (Product: , MSP: r  r
  u   [Dedup] ❌ REMOVED DUPLICATE: z1) - Same vendor already found from another sourcez
[Deduplication Summary]z  Input URLs: z  Unique results kept: z  Duplicates removed: z  Vendor scraped: c              3   D   K   | ]  }|j                  d       sd  ywr  r   Nr<   r  rs     r*   r  z0ScraperService.scrape_product.<locals>.<genexpr>  s     0Zqquu[GY0Z     z  Discovery found: c              3   D   K   | ]  }|j                  d       rd  ywr  r  r  s     r*   r  z0ScraperService.scrape_product.<locals>.<genexpr>  s     1_AEER]L^!1_r  r  r  r   r  r  
registeredserp_api_shoppingz%Could not convert vendor_id to UUID: )r  r  re   rf   reference_idr   r   domain_namerg   r   r  r  r   r  r   u1   ⚠️ IN-SESSION DUPLICATE VIOLATION PREVENTED: z at P   r   r  r   re   rg   r   r  r  marketplacer  rR   r  r  r     🚨 VIOLATION SAVED: u    @ ₹u
    (MSP: ₹u/   ⚠️ DATABASE DUPLICATE VIOLATION PREVENTED: u$   ✅ [Response] Including violation: r   u=   ⏭️  [Response] Skipping violation (duplicate/not saved): u   📊 [Response] Final Results: z unique URLs, z violations created)r  re   rg   scraped_countviolation_countrQ   
violationsexecution_time   ❌ Fatal scraping error: Scraping failed: )9r  r   r   r  idr  r  r   r   HTTP_404_NOT_FOUNDr   	is_activeallHTTP_400_BAD_REQUESTr  r   r  addasyncio	to_threadr  _run_scrapingrS   rT   r<   r   rZ   r  rf   re   r   rg   extendr[   rs   r\   r   rY   sumrw  r   r   r   rU   r   r  r  r   r   utcnowlast_scraped_date	isoformatlast_execution_timecommitrefreshrollbackHTTP_500_INTERNAL_SERVER_ERROR)2r  r  r  r  r  r`   r  vendor_resultvendorsrQ   violation_recordscreated_violations_setr  r  r   scraping_taskvendor_normalized_urlsrR   r  max_discovery_attemptsall_discoveredr-   r  r}  excseen_result_keysdeduplicated_resultsremoved_duplicatesr   r   rg   	dedup_keycreated_violationsr  r   vendor_statusr  r  rz   vendor_id_uuidscraping_resultviolation_keyis_duplicater  r   execution_time_strfiltered_violationsvrespra   s2                                                     r*   scrape_productzScraperService.scrape_product^  s2    , zz&/"7"7

j8P"QRR.."((*"55)*Z@  !jj)=)=f>N>NRV>V)WXX'')--/"770 
 !$ !U 	+F(););<F""6*	+~	#--,,M 0=)<&G&  Mj\Z[1g-0U*") ?$jjr:%A#%F
.22:>?
 KK"<SAW=X<YYv wx ./*%'NG!$::1&:7)1E[D\$]^ ,3+<+<8#OO#00!'++.., &
 &:7)CUVYZdVeUffz${| '--j9&8Z8II`$ab #&<<"KK*@AW@XXa(bc!+ "$::0 KK">s>?R>SSo pq . D-T97CD!34D NN>2 Nzl[\  #u#% !#! Tjjr2$jj	B &

? ;jj'
 )'*>*>`mU=Z[E\swx	$44$((3(//7KK"4[MS"XJkZaZnZnYoovwzv{  |E  FS  ET  TU  !V  W&--f5KK"A+cRUVYWYRZQ[[fgng{g{f|  }D  EH  DI  IR  S`  Ra  aR  !S  T#T& +GKK35KK.W<N8O)O(PQRKK1#g,@AKK05G1H0IJKKK,S0ZG0Z-Z,[\]KK-c1_W1_._-``bcd
 "%! ]W"JJ{3	$]3#L1
 & 7Um &x 0#ZZ(:;
"JJ'>?	 )FZZ 237JJ0F)F,Z8 "&.<FyRU<Vi\e #1&zz,!(!5!5#OO!$W%9%9!: +) &"/%/*3&3 &#" ' !K/ &1'2F2FUS`bcMd$eM %(::&WXcWddghoh|h|g}  ~B  CM  NQ  OQ  CR  BS  %T  U .G
M7??GL`L`bes~. (  ,-.A*.M.9(1*8,7-4-A-A$'.;1;6?,7/7/@$./6-01E1E-F,2)I FF9-.22=A266}E"NN-CK=PST[ThThSiiop}o~  I  JM  IN  NO  ,P  Q"KK*YZeYffijqj~j~i  @F  GT  FU  )V  W{]W@ )1(9G%!)!2!<!<!>*<G' ))+ **W%%% #%& LUU<4
eeM9= %aeeOQ&? @eeE7;;/ "-g.B.BC}^_I` a !$::'..q1KK"Fquu]G[F\\_`jknln`o_p qrKK"_`a`e`efs`t_uux  zD  EH  FH  zI  yJ  !K  LL& KK9#g,~VYZmVnUo  pC  D  E ) ' 4 4W[[)!$W#&'=#>"1"4	D Kg S Y8 *=2&8 ! gLL#KCPSH:!VaeLffgJ '	2 .)Nyk'Z[)-.H(P  &L  	LL5c!fX>?++-"AA*3q6(3 	s  Ai+e:Bi+	e=
Bi+,g9 ;f <"g9 Cf 2f3B f 4+f  f 8Jg9 	g &A g9 A4g9 ;g0<Eg9 g3g9 g6Eg9 9i+=i+ g9 f 	f=#f82g9 8f==g9  )g-)g9 ,g--g9 3g9 6g9 9	i(4i#6h97,i##i((i+r  r4  c                 `   g }g }d}	 t        |      }t        j                  dd        t        j                  d| j                   d| j                   d       t        j                  d| j
                          t        j                  d	t        |              t        j                  d d       t        |d
      D ]D  \  }}|j                  s3t        j                  d| dt        |       d|j                   d       Ft        j                  d| dt        |       d|j                          t        | |||      }	t        | j
                        }
t        |
|	      }d}d}|	rt        |
|	      \  }}|j                  t!        |j"                        |j                  |j                  |	|
|||d       |	r|dk(  r<t        j                  d| dt        |       d|j                   d|	 d|
 d| d       n|dk(  r<t        j                  d| dt        |       d|j                   d|	 d|
 d| d       njt        j                  d| dt        |       d|j                   d|	 d|
 
       n2t        j                  d| dt        |       d|j                   d       |dk(  rJ|j                  t!        |j"                        |j                  |j                  | j                  |	|
|d       t%        d d!       G t        j                  dd        t        j                  d"       t        j                  d#t        |       d$t        |       d%       t        j                  d d       ||f|r2	 t'        |       |j)                          t        j                  d&       S S #  Y S xY w# t*        $ r'}t        j-                  d't!        |               d}~ww xY w# |r9	 t'        |       |j)                          t        j                  d&       w #  Y w xY ww xY w)(z-Run the actual scraping in a separate thread.N)r!  r  r  z'[Scraper] Starting scrape for PRODUCT: z (Barcode: r
  z'[Scraper] MSP (Minimum Selling Price): z"[Scraper] Total vendors to check: r   z[Scraper] [r   z
] Vendor 'z!' has no website URL, skipping...z
[Scraper] [z] Processing vendor: r  r   u   ] 🚨 VIOLATION: z	 - Price z < MSP z (Diff: r  u   ] ⚠️  COMPLAIN: z > MSP z	 (Diff: +u   ] ✅ COMPLIANT: z = MSP u   ] ❓ UNKNOWN: z - No price found)r  r   r   re   r   rg   r  r  r  u2   [Scraper] ✓ Registered vendors scraping completez[Scraper] Results: z vendors checked, z violations foundu   [Scraper] ✓ WebDriver closedr  )rE  rS   rT   re   rf   rg   rZ   r   r  rU   r  r  r   r  r  rY   r\   r   r+   r   r  r[   rs   )r  r4  r  r  rQ   r5  r   r   r  r   rg   rA  r  r  ra   s                  r*   r(  zScraperService._run_scraping  si    Q	/GFKK"VH&KKA'BVBVAWWbcjcrcrbsstuvKKA'++OPKK<S\NKLKK6("&(!4 5&V))NN[Qs7|nJv{{m[|#}~mC5#g,?TU[U`U`Tabc !6gvv _GKK( ;C O!
 	 ,FsM,Z)J	!$VYY#);;"("4"4%2+(2-6	  	 !$3SE3w<.HZ[a[f[fZggpq~p  @G  HK  GL  LT  U_  T`  `a  (b  c&*4k#aG~EYZ`ZeZeYffop}o~  F  GJ  FK  KT  U_  T`  `a  %b  ck#aG~EVW]WbWbVcclmzl{  |C  DG  CH  %I  JNN[Qs7|nOTZT_T_S``q#rs !K/%,,%(^'-{{&,&8&8(/(<(<)6",6.  C%k5&n KK"VH&KKLNKK-c'l^;McRcNdMeevwxKK6("&-- *62KKMKK @A	 
  	LL5c!fX>?	 *62KKMKK @A sH   M9N= 0N66N:=	O-"O((O--O0 0P-40P%$P-%P)'P-c                 :  K   | j                  t        t              j                  t        j                  |k(               d{   }|j                         j                         }|st        t        j                  d| d      | j                  t        t              j                  t        j                  dk(               d{   }|j                         j                         }|st        t        j                  d      g }	g }
t               }t               }|D ](  }t        |j                         }|j#                  |       * 	 t%        j&                  t(        j*                  ||||      }| d{   \  }	}
|	D ]`  }|j-                  d      dk(  s|j-                  d	      }|d
   }|d   }|d   }t/        |d         }|j-                  d      }|j-                  d      }|r d}t1        |t2              rt5        |      n|}nd}d}t7        | |||j8                  |j:                  ||       d{   }|r|rt        |      }n|}t=        |||j:                  |||||t?        j@                         ||j8                  t3        |jB                        |      }| j#                  |       tD        jG                  d| d|j:                   d|dd  d| d	       c |rtD        jI                  d|        	 d}g }d}||k  r|dz  }tD        jI                  d| d|        t%        j&                  tJ        |j8                  |j:                  t/        |jL                        |       d{   }tD        jI                  d| d tO        |       d!       |jQ                  |       tD        jI                  d"tO        |       d#       ||k\  rtD        jI                  d$| d%       n||k  rt               } g }!g }"|D ]  }#|#j-                  dd&      }$|#j-                  d
d'      }|#j-                  d      }|#j-                  d      }||j:                  ||rtS        |d(      ndf}%|%| vrU| j#                  |%       |!jU                  |#       tD        jI                  d)| d|$dd*  d+|j:                   d,| d-| d       |"jU                  |#       tD        jI                  d.| d|$dd*  d+|j:                   d,| d-| d/        |!}tD        jI                  d0tO        |      tO        |"      z    d1tO        |       d2tO        |"       d3       |D ]  }#|#j-                  d      dk(  s|#j-                  d
d'      }&|#j-                  dd&      }'|#j-                  d      }(t/        |#j-                  d|jL                              })t7        | |'|(|j8                  |j:                  |)|&       d{   }*|*st=        d|&|j:                  |)|#d   |#j-                  d      |#j-                  d      |&|#j-                  dd&      |j8                  t3        |jB                        d4t?        j@                         5      }| j#                  |       |j#                  |&|j:                  |)tS        |(d(      f       |
jU                  |&|'|j:                  |(|)|#j-                  d      d6       tD        jG                  d7|& d|j:                   d|(        tD        jI                  d8|& d|j:                   d|(         tD        jI                  d9tO        |       d:       |	jQ                  |       tD        jI                  d;tO        |       d<       t?        j@                         |_-        t?        j@                         j]                         },|,|_/        | ja                          d{    | jc                  |       d{    tD        jI                  d>d?        tD        jI                  d@|        tD        jI                  dAtO        |	              tD        jI                  dBtO        |
              tD        jI                  d? d>       ||j:                  |j8                  t/        |jL                        |	|
tO        |	      |,tO        |
      |dCdDS 7 7 7 d7 7 n7 # tV        $ r,}+tD        jY                  d=t3        |+              Y d}+~+d}+~+ww xY w7 97 ## tV        $ rf}+tD        jY                  dEt3        |+              | je                          d{  7   t        t        jf                  dFt3        |+             d}+~+ww xY ww)Ga  
        Scrape a product against all registered vendors using SERP API for discovery.
        Same functionality as scrape_product, but uses SERP API Google Shopping Light 
        Engine instead of Tavily for discovering alternative vendors.
        
        Args:
            db: Database session
            product_id: Product ID to scrape
            enable_discovery: If True, also discover vendors via SERP API Shopping (disabled by default)
            headless_mode: Run browser in headless mode
            timeout_seconds: Timeout for page load
            
        Returns:
            Dictionary with scraping results
        Nr  r  r  Tr  r   r   r  r   r   r   rg   r  r  r  r  r  r  r  r   z @ r   z
 (Source: r
  z6[Scraper SERP] SERP API discovery ENABLED for product r]  r   r   z[Discovery SERP] Attempt r   r  r  z[Discovery SERP] Added r  z[Discovery SERP] Completed r  r}   r{   r1   u   [Discovery Dedup] ✅ KEPT: r  r  r	  r  u)   [Discovery Dedup] ❌ REMOVED DUPLICATE: z) - Same vendor already foundz[Discovery Dedup] Before: z results, After: z unique results (z duplicates removed)r  )r  r   re   rg   r   r  r  r  rR   r  r  r  r  )r   r   re   r   rg   r  u   🔍 SERP VIOLATION SAVED: u!   ⚠️ SERP DUPLICATE PREVENTED: z![Discovery SERP] COMPLETE: Found z* unique discovery URLs after deduplicationz'[Discovery SERP] Results extended with z discovery vendorsz.[Discovery SERP] Error during SERP discovery: r  r  u1   [Scraper SERP] ✓ Scraping complete for product z[Scraper SERP] Total Results: z![Scraper SERP] Total Violations: browser_with_serp_api_discovery)r  re   rf   rg   rQ   r  total_resultsr.  total_violationsdiscovery_enabledr  r  r  )4r  r   r   r  r   r  r  r   r   r!  r   r"  r#  r$  r  r   r  r%  r&  r'  r  r(  r<   r   rw  r\   r   r  rf   re   r   r   r+  r  rS   rU   rT   r  rg   rZ   r)  r   rY   r[   rs   r,  r-  r.  r/  r0  r1  r2  )-r  r  r  r  r  r`   r  r3  r4  rQ   r5  r6  r  r  r   r7  r  r   r   r   rg   r  r  rz   rB  rE  r  r   r9  r:  r-   r  seen_discovery_keysdeduplicated_discoveredremoved_discovery_duplicatesr}  rR   r?  vendor_name_serpvendor_url_serpscraped_price_serpmsp_serpis_duplicate_serpra   rF  s-                                                r*   scrape_product_serpz"ScraperService.scrape_product_serp  sy
    0 zz&/"7"7

j8P"QRR.."((*"55)*Z@  !jj)=)=f>N>NRV>V)WXX'')--/"770 
 !$ !U 	+F(););<F""6*	+T	#--,,M 0=)<&G& " .O::h';6 &

; 7I"("7K!'!5J$*?$;Mu.C!',>!?J &

+B CI !!-<FyRU<Vi\e!-)- *CJwH\H\^aoz* $L ()*=j*IK*5K$-&4(3)0)=)= #*7-72;(3+3??+< *+2??),W-A-A)B(.%	 y))?}CPWPdPdOeehistwuwixhy  zD  EK  DL  LM  (N  O].Ob  TU_T`abm\-.*%'NG!$::1&?yJ`Ia$bc ,3+<+<=#OO#00!'++.., &
 &?yHZ[^_i[jZkk  %A  B '--j9&=c*o=NNe$fg #&<<"KK*EF\E]]f(gh!+ "$::4 +.%'.0+350 . R"hh|R8&*hh}i&H(,(A"hhuo &1'2F2FhuUS`bcMd{  %A	$,??/33I>3::4@"KK*F{mSVWZ[^\^W_V``kls  mA  mA  lB  BI  JM  IN  NW  Xe  Wf  fg  )h  i8??E"KK*ST_S``cdghkikdlcmmx  zA  zN  zN  yO  OV  WZ  V[  [d  er  ds  sP  )Q  R!R$ &=NKK"<S=PSVWsSt=t<u  vG  HK  LZ  H[  G\  \m  nq  rN  nO  mP  Pd  !e  f !/ +T88H-</3xxy/Q,.2hh|R.HO15/1J.',TXXeW[[-I'JH 7P "O5GZaZnZnpx  GW7 1- $5,5.20@181E1E(0262G59XX>P5Q:>((CZ:[0@(,r(B3:??14W5I5I1J0C3;??3D-"	 !#y 1 6 : :<LgNbNbdlns  uG  IJ  oK  <L  !M !2 8 83C2A4;4H4H5G+38<AS8T:" !# !'1LM]L^^abibvbvawwz  |N  {O  0P  !Q &.OP`Oaadeleyeydzz}  Q  ~R  -S  !TW+T\ KK"CCDWCX  YC  !D  ENN>2KK"I#nJ]I^^p qr )1(9G%!)!2!<!<!>*<G' ))+ **W%%%KK"VH&KKKJ<XYKK8WGHKK;C@Q<R;STUKK6("& ) ' 4 4"??W[[)"/!$W'9$'(9$:%5@ { S Y8 *=.$V&n1T ! \LL#QRUVWRXQY!Z[[\  &,  	LL5c!fX>?++-"AA*3q6(3 	s  AfcBf	c
Bf,d) ;c<!d) Bd) =c">d) B?d) A2c+ 7c%8B c+ 9E9c+ 3Bc+ 6c(7Fc+ Ad) *d#+d) d&Cd) ffd) "d) %c+ (c+ +	d 4!dd) d  d) &d) )	f24f&e)',fffN)FTrN   )__name__
__module____qualname____doc__staticmethodr   intboolr   rJ  r   r   r   r   r(  rY  r  r,   r*   r  r  [  s3   ; "'"!ppp p 	p
 p 
p pd	 Ww Wf Wd Weh Wmrswx|s}  @D  EI  @J  tJ  nK W Wr  "'"!HHH H 	H
 H 
H Hr,   r  )g       @g      N@r  )r$  r   )TF)r}   )r]  )rN   )_r'   r   rO   r%   loggingr&  r:   r   r   r   typingr   r   r   r   urllib.parser	   r
   r8  r   concurrent.futuresr   rV   bs4r   seleniumr   !selenium.webdriver.chrome.servicer   !selenium.webdriver.chrome.optionsr   webdriver_manager.chromer   selenium.webdriver.support.uir   selenium.webdriver.common.byr   selenium.webdriver.common.keysr   sqlalchemy.ext.asyncior   r  r   fastapir   r   r   r   app.models.productr   app.models.vendorr   app.models.violationr   app.models.scraping_resultr   	getLoggerrZ  rS   r6  r=   HTTP_HEADERSr   r+   r_  r5   r\   rA   rE   rb   rd   r   r   	_executorr   r   r   r   r  r  r`  r  r  r   r=  rE  rU  rn  r  r  r  r  r  r  r  r  r,   r*   <module>rv     s    	     	  ( . . ,  1    5 5 8 7 + / /  )   & $ * 5			8	$  M*0 S%  !$ U  	s 	 	PU 	 "(3- HSM &S &T#Y &R$# $$s) $d# d d% d4PT: dN@H 1-		H-S -S -#c #c #"s x "E "% "E%QV,DW "	U 	8E? 	s 	& "&GG	G E?G 	G
 G 
G #G 
GT3  T $Nk k kR[RbRb k^	H:Y# YC Y YxH H% HZU U3 Ut Upi# iS iu ibe ijnosjt iXHs H# HE Hgj Hostxoy HVJ7 JF JUX Jbjkpbq JZY
 Y
r,   