
    2iH                       U d dl mZ d dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZmZ  ej<                  d      Z ej@                         Z!e!jE                   ejF                  d             g Z$de%d<   g Z&de%d<   eD ].  Z'	  ee'      re$jQ                  e'       ne&jQ                  e'       0 e$e&z   Z*de%d<   	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ+	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ,	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ-	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ.y# e)$ r e&jQ                  e'       Y w xY w)    )annotationsN)PathLike)BinaryIO   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDIANA_SUPPORTED_SIMILARTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sz	list[str]_mb_supported_sb_supportedIANA_SUPPORTED_MB_FIRSTc
                   t        | t        t        f      s#t        dj	                  t        |                   |rBt        j                  }
t        j                  t               t        j                  t               t        |       }|dk(  r_t        j                  d       |r.t        j                  t               t        j                  
       t        t!        | dddg d      g      S |Dt        j#                  t        d	d
j%                  |             |D cg c]  }t'        |d       }}ng }|Dt        j#                  t        dd
j%                  |             |D cg c]  }t'        |d       }}ng }|||z  k  r!t        j#                  t        d|||       d}|}|dkD  r||z  |k  rt)        ||z        }t        |       t*        k  }t        |       t,        k\  }|r*t        j#                  t        dj	                  |             n+|r)t        j#                  t        dj	                  |             g }|rt/        |       nd}|,|j1                  |       t        j#                  t        d|       t3               }g }g }t3               }t3               }i }d}t3               }d}d}d}d}d}d}t               }t               } t5        |       \  }!}"|!6|j1                  |!       t        j#                  t        dt        |"      |!       |j1                  d       d|vr|j1                  d       |t6        z   D 	]A  }#|r|#|vr|r|#|v r|#|v r|j9                  |#       d}$|!|#k(  }%|%xr t;        |#      }&|#dv r|%st        j#                  t        d|#       `|#dv r|%st        j#                  t        d|#       |#|v rt        j#                  t        d|#       |#|v rt        j#                  t        d|#       	 t=        |#      }'|r[|'st3        tC        |#            }(nt3        tE        |#            }(|(jG                  |      st        j#                  t        d|#|(|       +|r&|'s$||k\  rt        j#                  t        d|#||       S|r|'st        j#                  t        d|#       t	 |r9|'du r5tI        |&du r| dt)        d       n| t        |"      t)        d       |#       ntI        |&du r| n| t        |"      d |#      }$tO        |%sdn
t        |"      |t)        ||z              }*|'xr |$duxr t        |$      |k  }+|+rt        j#                  t        d!|#       |$|'stQ        |$      },|jS                  |,      }-|-|-\  }.}/}0|0rtt!        | |#|.|%|/|du s|#|ddfv r|$nd|"      }1|j1                  |1       |j9                  |#       t        j#                  t        d#|#tU        |.d$z  d%&             |#|ddfv ry|.d'k  rt|.dk(  r^t        j                  d(|1jV                         |r.t        j                  t               t        j                  
       t        |1g      c S | j1                  |1       t        |       r||||v rvd|v rrd|v rn| jY                         }2t        j                  d(|2jV                         |r.t        j                  t               t        j                  
       t        |2g      c S |j1                  |#       t        j#                  t        d)|#       |	r-|#dd|d*d+fv r$t!        | |#||%g |$|"      }3|#|k(  r|3}n
|#dk(  r|3}n|3})t)        t        |*      d,z        }4t[        |4d-      }4d}5d}6g }7g }8	 t]        | |#|*||%|&|"|'|$	      D ]g  }9|7j1                  |9       |8j1                  t_        |9||d.u xr dt        |      cxk  xr d-k  nc              |8d/   |k\  r|5dz  }5|5|4k\  s|%sb|&du sg n |6s$|r"|'s 	 | t)        d1      d ja                  |#d23       |8rtc        |8      t        |8      z  nd}:|:|k\  s|5|4k\  r|j1                  |#       |#td        v r|jg                  td        |#          |$ |'s|ji                  tQ        |$      |:g df       t        j#                  t        d5|#|5tU        |:d$z  d%&             |	r/|#dd|d*d+fv r&|6s$t!        | |#||%g |$|"      }3|#|k(  r|3}n
|#dk(  r|3}n|3}t        j#                  t        d6|#tU        |:d$z  d%&             |'stC        |#      };ntE        |#      };|;r3t        j#                  t        d7j	                  |#tI        |;                   g }<|#dk7  rD|7D ]3  }9tk        |9||;rd8j%                  |;      nd      }=|<j1                  |=       5 tm        |<      }>ntm        |<      }>|>r*t        j#                  t        d9j	                  |>|#             t!        | |#|:|%|>|du s|#|ddfv r|$nd|"      }?|j1                  |?       |$ |'s|ji                  tQ        |$      |:|>d.f       |r|'s
|:d:k  r|dz  }|#|ddfv ry|:d'k  rt|:dk(  r^t        j                  d(|?jV                         |r.t        j                  t               t        j                  
       t        |?g      c S | j1                  |?       t        |       r||||v rvd|v rrd|v rn| jY                         }2t        j                  d(|2jV                         |r.t        j                  t               t        j                  
       t        |2g      c S |sW|'sU|>rt[        d; |>D        d<      nd}@|@d=k\  r8d|v r4d|v r0d.}|jg                  |;       t        j#                  t        d>|#|:@       |s\|'rZ|+rX|$Vt        |$      |d?z  k  rE|#d@vrAd|v r=d|v r9d.}t        j#                  t        dA|#|:t        |$      |t        |$      |z  d$z         |#|!k(  st        j                  dB|#       |r.t        j                  t               t        j                  
       t        ||#   g      c S  t        |      dk(  r|s|s|rt        j#                  t        dC       |r2t        j                  dD|jV                         |j1                  |       nr|r||r|r|jn                  |jn                  k7  s|'t        j                  dE       |j1                  |       n(|r&t        j                  dF       |j1                  |       |r<t        j                  dG|jY                         jV                  t        |      dz
         nt        j                  dH       |r.t        j                  t               t        j                  
       |S c c}w c c}w # t>        t@        f$ r t        j#                  t        d|#       Y 
w xY w# tJ        tL        f$ rQ})t        |)tL              s%t        j#                  t        d |#tI        |)             |j1                  |#       Y d})~)4d})~)ww xY w# tJ        $ r4})t        j#                  t        d0|#tI        |)             |4}5d.}6Y d})~)d})~)ww xY w# tJ        $ rA})t        j#                  t        d4|#tI        |)             |j1                  |#       Y d})~)d})~)ww xY w)Iaf  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z3Expected object of type bytes or bytearray, got: {}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.   zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.zY%s is deemed too similar to a code page that was already considered unsuited. Continuing!zESkipping %s: already fast-tracked from a similar successful encoding.z2Encoding %s does not provide an IncrementalDecoderzbSkipping %s: definitive match already found, this encoding targets different languages (%s vs %s).zXSkipping %s: already accumulated %d same-family results after definitive match (cap=%d).zCSkipping single-byte %s: multi-byte definitive match already found.g    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %szpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.)preemptive_declarationzM%s fast-tracked (identical decoded payload to a prior encoding, chaos=%f %%).d      )ndigits皙?z.Encoding detection: %s is most likely the one.zZ%s fast-skipped (identical decoded payload to a prior encoding that failed chaos probing).r"   r#         TzaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.z=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}g{Gz?c              3  &   K   | ]	  \  }}|  y w)N ).0_vs      ^/var/www/html/marco-python-backend/venv/lib/python3.12/site-packages/charset_normalizer/api.py	<genexpr>zfrom_bytes.<locals>.<genexpr>  s     441aQ4s   )defaultg      ?zyDefinitive match found: %s (chaos=%.3f, coherence=%.2f). Encodings targeting different language families will be skipped.g\(\?>	   r$   r   r"   r#   	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigzjMulti-byte definitive match: %s (chaos=%.3f, decoded=%d/%d=%.1f%%). Single-byte encodings will be skipped.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)8
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerr   r   logjoinr   intr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorr   r	   intersectionstrUnicodeDecodeErrorLookupErrorrangehashgetroundr%   bestmaxr   r   decodesumr   update
setdefaultr   r
   fingerprint)A	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdenable_fallbackprevious_logger_levellengthcpis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failuresoft_failure_skipsuccess_fast_trackedpayload_result_cachedefinitive_match_founddefinitive_target_languages post_definitive_sb_success_countPOST_DEFINITIVE_SB_CAPmb_definitive_match_foundfallback_asciifallback_u8fallback_specifiedresultsearly_stop_resultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderenc_languageser_multi_byte_bonuspayload_hashcachedcached_mess	cached_cdcached_passed
fast_matchprobable_resultfallback_entrymax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiotarget_languages	cd_ratioschunk_languagescd_ratios_mergedcurrent_matchbest_coherencesA                                                                    r6   
from_bytesr   9   s   < i)U!34AHHY
 	
 %+\\/*i.F{ST  1OO12|IwUBPRSTUU

5IIl#		
 8DD	"e,DD

6IIl#		
 8DD	"e,DD*u$%

l	
 
qyVe^j0%(
"%i.3E"E"%i.4D"D

LSS	
 


W^^	
 (* .By)t  %$$%78

N	
 uF)+)+"%%%(U
 TV $),/E -.$"# ',*.N'+K.2,.G)7)9 3I >L+$$\2

W		
   )++$$W-.1HH v<M=M\9F"

=!&*%1]%B!5 "
:Q;
 009MJJn
 I%.BJJd
  --JJk
  00JJW
 	*@*O! "( #$6}$E F #$9-$H I --.IJ

x!!/  #)04JJJJj0& 
 %-BJJU
 	$)>%)G ,u4 "+CI.&s;'7#d)D* #& ,u4 "&s;'7'9:*#& )As;/?
 " .t+.O$v- 	 JJ-	 &/D $_ 5L)--l;F!8>5Y !-!%#,! !6 >#0$6#I$J , "&/A"J" NN:.(,,];JJg%kC/;	 &*<gw)OO'#-&#-"LL P * 3 3  ' & 4 4_ E &0E F#1:,#??*11*= .//7;MQW;W#v-#v-8J8O8O8QL+44 #"00A"OO,AB-.?@@ ,22=AJJt% '=*  = , *6%)%0+3E* ),>>1?.*g5-;N*8K!$SWq[!1 115 ! %!		'	),$ %
    '  !4GA\1B,Ga,G R=I-$)$$(99(-=-F7V &%)
#d)+&--mH-M ENY#i.!@SVi'+;?P+P#**=9 66!(()?)NO *3H$//)OR+G JJ0 o+Q7  !W&8(HMN-!-!(#+=" !$66)7&"g-%3N"0K

K/C'3		
 %*<]*K4]CJJ8??!3'7#8 	 G# # 2"1&2BCHH-.#   12  6i@5i@JJ299$m %  *U2$);Wg(NN  
 #5
" 	}% &/D ++_% "2D9 #)$&,1, 0'7CC#% #%D!** ((9OO$9:%}o66%%m4 "##+/AV/K6!6!0557OLL@(( $$_5 56!?"344 &.C $ 4#34cB 
 $F):w&?P)-&+223CD

 P!#" *% +O$v}4
 6!6!(,%JJ|O$O$v-3 L(LL1
 $$_5 56!7=#9":;;mv<p 7|q.,>JJa
 LLI"++ NN-.^3"++~/I/II'LLUVNN;'LLUVNN>*kLLN##L1	
 	TU_--.Ny E EL $[1 	JJD
 	X #K0 		a-

O!F	 $**=9		X 
	) JJsA	  1$(!
	)* & 

t!F	 (..}=su   w%w#w(5AxA4y<
y<y<z<(*xxy9(Ay44y9<	z9)z44z9<	|6||c
                F    t        | j                         |||||||||	
      S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)
fprd   re   rf   rg   rh   ri   rj   rk   rl   s
             r6   from_fpr   R  s5      
	     c
                n    t        | d      5 }
t        |
|||||||||	
      cddd       S # 1 sw Y   yxY w)z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )pathrd   re   rf   rg   rh   ri   rj   rk   rl   r   s              r6   	from_pathr   p  sK      
dD	 
R 

 
 
s   +4c
                    t        | t        t        f      rt        | |||||||||	
      }
|
 S t        | t        t
        f      rt        | |||||||||	
      }
|
 S t        | |||||||||	
      }
|
 S )a)  
    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
    )	rd   re   rf   rg   rh   ri   rj   rk   rl   )r>   rU   r   r   r@   r?   r   r   )fp_or_path_or_payloadrd   re   rf   rg   rh   ri   rj   rk   rl   guessess              r6   	is_binaryr     s    " '#x9!!%%!51+
Z ;C 
	

 !!%%!51+
4 ; !!%%!51+
 ;r   )	      皙?NNTFr*   T)rc   zbytes | bytearrayrd   rN   re   rN   rf   floatrg   list[str] | Nonerh   r   ri   boolrj   r   rk   r   rl   r   returnr   )r   r   rd   rN   re   rN   rf   r   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   r   r   )r   zstr | bytes | PathLikerd   rN   re   rN   rf   r   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   r   r   )	r   r   r   NNTFr*   F)r   z!PathLike | str | BinaryIO | bytesrd   rN   re   rN   rf   r   rg   r   rh   r   ri   r   rj   r   rk   r   rl   r   r   r   )/
__future__r   loggingosr   typingr   cdr   r   r	   r
   constantr   r   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   	getLoggerrD   StreamHandlerrG   setFormatter	Formatterr   __annotations__r   _supported_encrO   rS   r   r   r   r   r   r2   r   r6   <module>r      s;   "       0  
		/	0'''')   GAB y y $ -N-!.1  0  0- &3]%B  B
 %)%)!% # V VV V 	V
 #V #V V V V V Vv %)%)!% #   	
 # #     @ %)%)!% # 

 

 
 	

 #
 #
 
 
 
 
 
B %)%)!% #!?<?? ? 	?
 #? #? ? ? ? ? 
?y  -^,-s   (+E,,FF