
    /iX                     t   d dl mZmZ d dlZd dlmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlmZ  e       Z ej>                  e       Z!de"de#de"de#fdZ$ejK                  de       eejL                         edd       eddd       edd      d eejN                        fdede"de"de(dee(   f
d        Z)ejU                  de+e	jX                  !       ed"d#$       ed%d&d'd()       ed"d*$       eejL                         eejN                        fd+ed,e
d-e#d.e"de#defd/       Z-ej]                  d0e+       eejL                         eejN                        fd1e"d+edefd2       Z/eja                  d0e+       eejL                         eejN                        fd1e"defd3       Z1ejK                  d0e       eejL                         eejN                        fd1e"ded4efd5       Z2ejU                  d6e       ed"d7$       ed%d&d'd8)       ed"d9$       eejL                         eejN                        fd1e"d-e#d.e"de#def
d:       Z3ejU                  d;e+e	jh                  !       ed"d7$       ed%d&d'd8)       ed"d9$       eejL                         eejN                        fd-e#d.e"de#defd<       Z5ejU                  d=e+       ed"d7$       ed%d&d'd8)       ed"d>$       eejL                         eejN                        fd1e"d-e#d.e"de#def
d?       Z6ejU                  d@e+e	jh                  !       ed"d7$       ed%d&d'd8)       ed"d>$       eejL                         eejN                        fd-e#d.e"de#defdA       Z7y)B    )ListOptionalN)	APIRouterDependsHTTPExceptionQuerystatusBackgroundTasks)AsyncSession)select)deps)Product)ProductCreateProductUpdateProductResponseProductListResponse)ScrapingResponse)ProductService)ScraperService
product_idheadless_modetimeout_secondsenable_discoveryc           	      v  K   	 ddl m} t        j                  d|          |       4 d{   }t	        j
                  || |||       d{   }t        j                  d|         t        j                  d       t        j                  d|j                  d	d              t        j                  d
|j                  dd              ddd      d{    y7 7 7 	# 1 d{  7  sw Y   yxY w# t        $ r0}t        j                  d|  dt        |       d       Y d}~yd}~ww xY ww)z
    Background task to perform scraping after product creation.
    This runs asynchronously without blocking the API response.
    r   AsyncSessionLocaluI   🚀 [Background] Starting automatic first-time scraping for product ID: Nr   r   r   r   u?   ✅ [Background] First-time scraping completed for product ID: z[Background] Scraping Summary:      • Total Results: total_resultsu      • Total Violations: total_violationsu3   ⚠️ [Background] Scraping failed for product ID : Texc_info)
app.db.sessionr   loggerinfor   scrape_product_serpget	Exceptionerrorstr)r   r   r   r   r   dbscraping_resultes           =/var/www/html/marco-python-backend/app/api/routes/products.py_perform_background_scrapingr0      s9    r4_`j_klm$& 	b 	b"$2$F$F%!1+ /% O KKYZdYefgKK8:KK01D1D_VW1X0YZ[KK3O4G4GHZ\]4^3_`a	b 	b 	b	b 	b 	b 	b  rJ:,VXY\]^Y_X`alpqqrs   D9)C= C"C= C(C$BC(C= C&C= !D9"C= $C(&C= (C:.C1/C:6C= 9D9:C= =	D6&D1,D91D66D9/)response_model   )ge
   d   )r4   leproduct_namez&^(product_name|msp|last_scraped_date)$)regexr,   pagelimitsortBysearchc                 v   K   t        j                  | ||||       d{   \  }}t        ||||      S 7 w)zN
    Retrieve a list of products with pagination, sorting, and searching.
    )r:   r;   sort_byr=   N)itemstotalr:   r;   )r   get_productsr   )r,   r:   r;   r<   r=   current_userproductsrA   s           r/   rB   rB   8   sD      +77
UF6 OHe XUUSSs   979)r2   status_codeTz1Run browser in headless mode for initial scraping)description      <   z8Timeout in seconds for page load during initial scraping)r4   r7   rF   zADiscover alternative vendors via SERP API during initial scraping
product_inbackground_tasksheadlesstimeoutc                 X  K   t        j                  ||        d{   }t        j                  d|j                   d|j
                   d       |j                  t        |j
                  |||       t        j                  d|j
                          d|j
                  dd	d
S 7 w)a  
    Add a new product and automatically trigger scraping in the background.
    
    **Returns immediately** (HTTP 201) without waiting for scraping to complete.
    Scraping happens asynchronously in the background using SERP API.
    
    Parameters:
    - product_in: Product creation data
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Discover alternative vendors via SERP API (default: True)
    
    **Response (Immediate):**
    - message: Product creation success
    - product_id: ID of created product
    - scraping_status: "in_progress" - indicates scraping is running in background
    
    **Note:** Scraping continues in background. You can check results via GET /products/{id} 
    or monitor logs for completion message: "First-time scraping completed for product ID: {id}"
    Nu   ✅ Product created:  (ID: ))r   r   r   r   u3   📌 Background scraping scheduled for product ID: zCProduct added successfully! Scraping is starting in the background.in_progresszuYou can start working with the product while scraping happens. Check logs or GET /products/{id} for scraping results.)messager   scraping_statusnote)r   create_productr%   r&   r8   idadd_taskr0   )rJ   rK   rL   rM   r   r,   rC   products           r/   add_productrY   J   s     > #11"jAAG
KK'(<(<'=VGJJ<qQR $::)   KKEgjj\RS Yjj( H	  Bs   B*B(BB*z/{id}rV   c                 T   K   t        j                  || |       d{    ddiS 7 w)z0
    Edit an existing product. (Admin only)
    NrR   zProduct updated successfully)r   update_product)rV   rJ   r,   rC   s       r/   r[   r[      s1      
'
'B

;;;566 <s   (&	(c                 R   K   t        j                  ||        d{    ddiS 7 w)z(
    Delete a product. (Admin only)
    NrR   zProduct deleted successfully)r   delete_product)rV   r,   rC   s      r/   r]   r]      s/      
'
'B
///566 0s   '%	'returnc                 N   K   t        j                  ||        d{   }|S 7 w)z.
    Retrieve a single product by its ID.
    N)r   get_product_by_id)rV   r,   rC   rX   s       r/   get_productra      s'      #44R<<GN =s   %#%z/{id}/scrapezRun browser in headless modez Timeout in seconds for page loadz3Also discover alternative vendors via Google searchc                    K   t        j                  ||        d{   }|j                  st        t        j                  d      t        j                  || |||       d{   }|S 7 O7 w)a  
    Manually trigger scraping for a specific product.
    Scrapes the product against all registered active vendors.
    When discovery is enabled, also searches Google for alternative sellers and scrapes their prices.
    
    Parameters:
    - id: Product ID to scrape
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: if true, discover and scrape alternative vendors (default: true)
    N2Scraping can only be triggered for active productsrE   detailr   )r   r`   r	   r   HTTP_400_BAD_REQUESTr   scrape_productrV   rL   rM   r   r,   rC   rX   results           r/   trigger_scrapingrj      su     ( #44R<<G>>33G
 	
 "00
) F M! ="   A0A,AA0%A.&A0.A0z/scrape-allc                 @  K   t        t              j                  t        j                  dk(        }|j	                  |       d{   }|j                         j                         }|st        t        j                  d      g }d}	d}
g }t        j                  dd        t        j                  dt        |       d	       t        j                  d d
       	 t        |d      D ]"  \  }}d}	 ddlm}  |       }t        j                  d
d        t        j                  d| dt        |       d|j                   d|j                    d	       t        j                  d        t        j                  d|j"                          t        j                  d|j$                          t        j                  d|        t'        j(                  ||j                   || |       d{   }|j+                  dd      }|j+                  dd      }t        j                  d|j                    d       t        j                  d|        t        j                  d|        |j+                  dg       }|rTt        j-                  d|j                   d       |D ],  }t        j-                  d|d     d!|d"   d#d$|d%   d#d       . |	|z  }	|
|z  }
|j/                  |       |s
	 |j9                          d{    % t        j                  dd        t        j                  d,       t        j                  d        t        j                  d-t        |              t        j                  d.t        |              t        j                  d/t        |              t        j                  d0|	        t        j                  d1|
        t        j                  d d
       d2d3t        |       d	t        |      t        |      t        |      |	|
||r|nd|d4
S 7 7 I# t        $ ro}t        j1                  d&|j                    d'|j2                   d()       |j/                  |j                   |j                  |j2                  d*       Y d}~d}~wt4        $ rm}t        j1                  d&|j                    d't7        |       d)       |j/                  |j                   |j                  t7        |      d*       Y d}~4d}~ww xY w7 $# t4        $ r,}t        j-                  d+t7        |              Y d}~ud}~ww xY w# |rR	 |j9                          d{  7   w # t4        $ r+}t        j-                  d+t7        |              Y d}~w d}~ww xY ww xY w# t4        $ rO}t        j1                  d5t7        |       d)       t        t        j:                  d6t7        |             d}~ww xY ww)7a<  
    Manually trigger scraping for ALL active products.
    Scrapes EACH product against ALL registered active vendors.
    For each vendor's website URL: Extract price using search and price detection.
    Then if discovery enabled: Google search for alternative vendors and scrape them too.
    
    This is a SYNCHRONOUS operation - waits for all products to complete before returning.
    Products are scraped sequentially to avoid browser and connection issues.
    
    Parameters:
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Also discover and scrape alternative vendors via Google search (default: True)
    
    Returns:
    - Complete results including all scraped vendors and violations for each product
    TN%No active products found for scrapingrd   r   

P================================================================================u(   🚀 [SCRAPE ALL] Starting scraping for z	 products
r3   r      ────────────────────────────────────────────────────────────────────────────────[r1      ] 📦 PRODUCT: rO   rP           Barcode:            MSP: ₹        Discovery Enabled: r   scraped_countviolation_count   
✅ [RESULT] Product :u      • Vendors Scraped: u      • Violations Found: 
violations   
   🚨 VIOLATIONS FOR       - vendor_name   : ₹scraped_pricez.3f
    (MSP: ₹msp   
❌ [ERROR] Product r!   Fr"   r   r8   r*   Warning closing session: u   📊 [SCRAPE ALL] FINAL SUMMARYTotal Products: Successfully Scraped: Failed: zTotal Vendor URLs Scraped: Total Violations Found: 	completedScraping completed for )
r	   rR   total_productssuccessful_productsfailed_productstotal_vendors_scrapedr    rD   failed_listr   u'   

❌ [FATAL ERROR] Scrape All failed: Scraping failed: )r   r   wherer	   executescalarsallr   rf   r%   r&   len	enumerater$   r   r8   rV   barcoder   r   rg   r(   warningappendr*   re   r)   r+   closeHTTP_500_INTERNAL_SERVER_ERROR)rL   rM   r   r,   rC   stmtri   rD   all_products_resultstotal_scrapedr    r   idxrX   fresh_dbr   product_resultrw   rx   r{   vher.   s                          r/   scrape_all_productsr      s    4 '?  4!78D::d##F~~##%H33:
 	

 MO
KK$vh 
KK:3x=/ST
KK6("]
%h2 =	MLCH;M<,.b
O,auAc(m_4DWEYEYDZZ`ahakak`llmnoxj*//@AB.w{{m<=9:J9KLM (6'D'D&zz%5"*$+( " !/ 2 2?A F"0"4"45F"J5gjj\CD6}oFG77HIJ ,//bA
NN%>w?S?S>TTU#VW' |!M2B1C5?I[\_H``jklmrkstwjxxy'z{| . O3 $++N;$ M&nn...w=	M@ 	d6(O$57vh &s8}o67,S1E-F,GHIhs?34561-AB./?.@ABvhbM" "05I1J0K9U!(m#&';#<"?3%2 0,.=?4 0
 	
{ $D"4 ! 5gjj\BII;OZ_`&&")**$+$8$8YY(  
  5gjj\CF8LW[\&&")**$+$8$8 V(   /$ M)B3q6('KLLM M&nn...$ M)B3q6('KLLM <  
@QITXY==&s1vh/
 	

s	  AXP:BX!W 7C=Q 4P=5C>Q 3W 8T2T/T2D)W 9X=Q  	T,	A$R3-U*3T,?A"T'!U*'T,,U*/T22	U';!U"W "U''W *W .VV
VW 	V<	!V7	2W 7V<	<W  W 	XA
XXXz/{id}/scrape-serpz>Also discover alternative vendors via SERP API Google Shoppingc                    K   t        j                  ||        d{   }|j                  st        t        j                  d      t        j                  || |||       d{   }|S 7 O7 w)a  
    Manually trigger scraping for a specific product using SERP API for discovery.
    Same functionality as /{id}/scrape, but uses SERP API Google Shopping Light Engine
    instead of Tavily API for discovering alternative vendors.
    
    Scrapes the product against all registered active vendors.
    When discovery is enabled, also searches SERP API Google Shopping for alternative sellers 
    and scrapes their prices.
    
    Parameters:
    - id: Product ID to scrape
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: if true, discover and scrape alternative vendors via SERP API (default: true)
    Nrc   rd   r   )r   r`   r	   r   rf   r   r'   rh   s           r/   trigger_scraping_serpr   [  su     0 #44R<<G>>33G
 	
 "55
) F M! =rk   z/scrape-all-serpc                   K   t        t              j                  t        j                  dk(        }|j	                  |       d{   }|j                         j                         }|st        t        j                  d      g }d}	d}
g }t        j                  dd        t        j                  dt        |       d	       t        j                  d d
       	 t        |d      D ]\  \  }}d}	 ddlm}  |       }t        j                  d
d        t        j                  d| dt        |       d|j                   d|j                    d	       t        j                  d        t        j                  d|j"                          t        j                  d|j$                          t        j                  d       t        j                  d|        t'        j(                  ||j                   || |       d{   }|j+                  dd      }|j+                  dd      }t        j                  d|j                    d       t        j                  d|        t        j                  d|        |j+                  dg       }|ryt        j-                  d|j                   d       |D ]Q  }t        j-                  d |j+                  d!d"       d#|j+                  d$d%       d&|j+                  d'd%       d       S |	|z  }	|
|z  }
|j/                  |       |sD	 |j9                          d{    _ t        j                  dd        t        j                  d.       t        j                  d        t        j                  d/t        |              t        j                  d0t        |              t        j                  d1t        |              t        j                  d2|	        t        j                  d3|
        t        j                  d4       t        j                  d d
       d5d6t        |       d	t        |      t        |      t        |      |	|
||r|nd|d7d8d9S 7 E7 # t        $ ro}t        j1                  d(|j                    d)|j2                   d*+       |j/                  |j                   |j                  |j2                  d,       Y d}~d}~wt4        $ rm}t        j1                  d(|j                    d)t7        |       d+       |j/                  |j                   |j                  t7        |      d,       Y d}~Kd}~ww xY w7 ;# t4        $ r,}t        j-                  d-t7        |              Y d}~d}~ww xY w# |rR	 |j9                          d{  7   w # t4        $ r+}t        j-                  d-t7        |              Y d}~w d}~ww xY ww xY w# t4        $ rO}t        j1                  d:t7        |       d+       t        t        j:                  d;t7        |             d}~ww xY ww)<a  
    Manually trigger scraping for ALL active products using SERP API for discovery.
    Same functionality as /scrape-all, but uses SERP API Google Shopping Light Engine
    instead of Tavily API for discovering alternative vendors.
    
    Scrapes EACH product against ALL registered active vendors.
    For each vendor's website URL: Extract price using search and price detection.
    Then if discovery enabled: SERP API search for alternative vendors and scrape them too.
    
    This is a SYNCHRONOUS operation - waits for all products to complete before returning.
    Products are scraped sequentially to avoid browser and connection issues.
    
    Parameters:
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Also discover and scrape alternative vendors via SERP API (default: True)
    
    Returns:
    - Complete results including all scraped vendors and violations for each product
    TNrm   rd   r   rn   ro   u-   🚀 [SCRAPE ALL SERP] Starting scraping for z products using SERP APIrp   r3   r   rq   rr   r1   rs   rO   rP   rt   ru   z2        Discovery Method: SERP API Google Shoppingrv   r   r   r    ry   rz   r   u      • Total Violations Found: r{   r|   r}   r~   Unknownr   r   zN/Ar   r   r   r!   Fr"   r   r   u$   📊 [SCRAPE ALL SERP] FINAL SUMMARYr   r   r   zTotal Results: r   z7Discovery Method: SERP API Google Shopping Light Enginer   r   serp_api_google_shoppingbrowser_with_serp_api_discovery)r	   rR   r   r   r   r   r    rD   r   r   discovery_methodsource_typeu,   

❌ [FATAL ERROR] Scrape All SERP failed: r   )r   r   r   r	   r   r   r   r   rf   r%   r&   r   r   r$   r   r8   rV   r   r   r   r'   r(   r   r   r*   re   r)   r+   r   r   )rL   rM   r   r,   rC   r   ri   rD   r   r   r    r   r   rX   r   r   r   r   total_violation_countr{   r   r   r.   s                          r/   scrape_all_products_serpr     s    : '?  4!78D::d##F~~##%H33:
 	

 MO
KK$vh 
KK?HNfgh
KK6("a
%h2 >	MLCH<M<,.b
O,auAc(m_4DWEYEYDZZ`ahakak`llmnoxj*//@AB.w{{m<=PR9:J9KLM (6'I'I&zz%5"*$+( " !/ 2 2?A F(6(:(:;Mq(Q%5gjj\CD4]ODE=>S=TUV ,//bA
NN%>w?S?S>TTU#VW' Y!%%y2Q1RRWXYX]X]^motXuWv  wA  BC  BG  BG  HM  OT  BU  AV  VW  (X  YY . $99 $++N;$ M&nn...y>	MB 	d6(O$:<vh &s8}o67,S1E-F,GHIhs?3456om_56./?.@ABMOvhbM" "05I1J0KKcd!(m#&';#<"?3* 0,.=?4 0 :<
 	
 $F"4 ! 5gjj\BII;OZ_`&&")**$+$8$8YY(  
  5gjj\CF8LW[\&&")**$+$8$8 V(   /$ M)B3q6('KLLM M&nn...$ M)B3q6('KLLM B  
Ec!fXNY]^==&s1vh/
 	

s	  AY/RBY/!X 7DR	R
D#R-X 2VV V
E X 
Y/R	U=A$T>V;U=A"U82V;8U==V; V	V8!V3-X 3V88X ;X?WW
WX	X	"!X	XX	XX 	Y,A
Y''Y,,Y/)8typingr   r   loggingfastapir   r   r   r   r	   r
   sqlalchemy.ext.asyncior   
sqlalchemyr   app.apir   app.models.productr   app.schemas.productr   r   r   r   app.schemas.scrapingr   app.services.product_servicer   app.services.scraper_servicer   router	getLogger__name__r%   intboolr0   r(   get_dbget_current_userr+   rB   postdictHTTP_201_CREATEDrY   putr[   deleter]   ra   rj   HTTP_200_OKr   r   r        r/   <module>r      s   !  U U /   &  2 7 7				8	$rrr r 	rB C 34t{{+aAraC(.VW ../TT
T T 	T
 SMT 5T" S63J3JK 4-`ab6pq"45xyt{{+../11%1 1 	1
 1 	1 L1h GD) t{{+../	
7
7
7 	
7 *
7 wt, t{{+../	7	7	7 -	7 GO4 t{{+../			 		 5	 ^,<= 4-KLb6XY"45jkt{{+../### # 	#
 	# >#L ]4V=O=OP4-KLb6XY"45jkt{{+../I
I
I
 I
 		I
 QI
X  6 4-KLb6XY"45uvt{{+../''' ' 	'
 	' 7'T &BTBTU4-KLb6XY"45uvt{{+../P
P
P
 P
 		P
 VP
r   