
    2iv                       U d dl mZ d dlZd dlmZ d dlmZ ej                  dk\  rd dlm	Z	 n	 d dl
m	Z	 dd	lmZmZmZmZmZmZmZmZmZmZmZmZmZ dd
lmZmZmZmZmZm Z m!Z! eez  ez  ez  ez  Z"de#d<   e	 G d d             Z$ G d d      Z%e	 G d de%             Z&e	 G d de%             Z'e	 G d de%             Z(e	 G d de%             Z)e	 G d de%             Z*e	 G d de%             Z+e	 G d de%             Z,e	 G d d e%             Z-e	 G d! d"e%             Z. ed#$      	 	 	 	 	 	 d(d%       Z/ ed&$      	 d)	 	 	 	 	 	 	 d*d'       Z0y# e$ r d Z	Y 'w xY w)+    )annotationsN)	lru_cache)	getLogger)      )finalc                    | S )N )clss    ]/var/www/html/marco-python-backend/venv/lib/python3.12/site-packages/charset_normalizer/md.pyr   r      s    J       )COMMON_CJK_CHARACTERSCOMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD_ACCENTUATED_ARABIC_ARABIC_ISOLATED_FORM_CJK_HANGUL	_HIRAGANA	_KATAKANA_LATIN_THAI)_character_flagsis_emoticonis_punctuationis_separator	is_symbolremove_accentunicode_rangeint_GLYPH_MASKc                  $    e Zd ZdZdZddZddZy)CharInfou{  Pre-computed character properties shared across all detectors.

    Instantiated once and reused via :meth:`update` on every character
    in the hot loop so that redundant calls to str methods
    (``isalpha``, ``isupper``, …) and cached utility functions
    (``_character_flags``, ``is_punctuation``, …) are avoided when
    several plugins need the same information.
    	character	printablealphaupperlowerspacedigitis_asciicase_variableflagsaccentuatedlatinis_cjk	is_arabicis_glyphpunctsymc                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        d| _        d| _        y )N Fr   r'   selfs    r   __init__zCharInfo.__init__N   s{     $ 
 
 
 
 
##(
!& 
!$# 
r   c                t   || _         t        |      }|dk  r-d| _        d| _        d| _        d| _        d| _        d|cxk  rdk  rUn nRd| _        d| _        d| _	        d| _
        d| _        d| _        d| _        t        | _        d| _        d| _        d| _        yd|cxk  rdk  rUn nRd| _        d| _        d| _	        d| _
        d| _        d| _        d| _        t        | _        d| _        d| _        d| _        yd|cxk  rd	k  rQn nNd| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d
| _        d| _        d| _        d| _        y|dk(  sd|cxk  rdk  rTn nQd| _        d| _        d| _	        d| _
        d| _        |dk(  | _        d| _        d
| _        d| _        d| _        d| _        y|j'                         | _        d| _        d| _        d| _	        d| _
        d| _        d| _        d
| _        d| _        | j                  rt)        |      nd| _        | j                  rt+        |      nd| _        yd| _        |j'                         | _        |j-                         | _        |j/                         | _        |j1                         | _	        |j3                         | _
        |j5                         | _        | j                  | j                  k7  | _        | j                  rt7        |      }nd
}|| _        t9        |t:        z        | _        t9        |t        z        | _        t9        |t<        z        | _        t9        |t>        z        | _        t9        |t@        z        | _        | j                  rt)        |      nd| _        | j                  rt+        |      nd| _        y)zBUpdate all properties for *character* (called once per character).   TFA   Z   a   z   0   9   r       	      N)!r(   ordr/   r2   r4   r5   r6   r*   r+   r,   r-   r.   r)   r0   r   r1   r3   r7   r8   isprintabler   r    isalphaisupperislowerisspaceisdigitr   boolr   r   r   r$   )r<   r(   or1   s       r   updatezCharInfo.updatea   s:   " Ys7 DM$DDK"DN!DMQ}"}!
!
"
"
"
!%%)"#
!
"
 qC!
"
!
"
"
!%%)"#
!
"
 qB"
"
"
"
!
!%%*"
"
"
 bQ!\r\"
"
"
!
"
!"b%*"
"
"
  "+!6!6!8"
"
"
"
"
%*"
"
:>..^I6e
37>>9Y/u "DM&224DN"**,DJ"**,DJ"**,DJ"**,DJ"**,DJ!%tzz!9D zz(3DJ#EL$89Defn-DJut|,DK!%'/2DN !45DM 7;nn	2%DJ/3~~y+5DHr   NreturnNone)r(   strrT   rU   )__name__
__module____qualname____doc__	__slots__r=   rR   r
   r   r   r&   r&   /   s    I(&jIr   r&   c                  6    e Zd ZdZdZddZddZed	d       Zy)
MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    r
   c                    t         )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        NotImplementedErrorr<   r(   infos      r   	feed_infozMessDetectorPlugin.feed_info   s
    
 "!r   c                    t         )zB
        Permit to reset the plugin to the initial state.
        r_   r;   s    r   resetzMessDetectorPlugin.reset   s
     "!r   c                    t         )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        r_   r;   s    r   ratiozMessDetectorPlugin.ratio   s
     "!r   Nr(   rV   rb   r&   rT   rU   rS   rT   float)	rW   rX   rY   rZ   r[   rc   re   propertyrg   r
   r   r   r]   r]      s,    
 I"" " "r   r]   c                  :    e Zd ZdZddZddZddZed	d       Zy)
 TooManySymbolOrPunctuationPlugin_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordc                J    d| _         d| _        d| _        d | _        d| _        y Nr   Frn   r;   s    r   r=   z)TooManySymbolOrPunctuationPlugin.__init__   s*    '("#%&04!,1#r   c                ,   | xj                   dz  c_         || j                  k7  ri|t        vra|j                  r| xj                  dz  c_        || _        y|j
                  s,|j                  r t        |      s| xj                  dz  c_        || _        y)1Optimized feed using pre-computed character info.r      N)	rq   rr   r   r7   ro   r.   r8   r   rp   ra   s      r   rc   z*TooManySymbolOrPunctuationPlugin.feed_info   s}    " 222!==zz''1,' %.! ZZDHH[5K""a'"$-!r   c                .    d| _         d| _        d| _        y Nr   )ro   rq   rp   r;   s    r   re   z&TooManySymbolOrPunctuationPlugin.reset  s    "# !r   c                    | j                   dk(  ry| j                  | j                  z   | j                   z  }|dk\  r|S dS )Nr           333333?)rq   ro   rp   )r<   ratio_of_punctuations     r   rg   z&TooManySymbolOrPunctuationPlugin.ratio  sO      A% ##d&8&88!!'" (<s'B#KKr   NrS   rh   ri   	rW   rX   rY   r[   r=   rc   re   rk   rg   r
   r   r   rm   rm      s.    I2.
 L Lr   rm   c                  :    e Zd ZdZddZddZddZed	d       Zy)
TooManyAccentuatedPluginrq   _accentuated_countc                     d| _         d| _        y rz   r   r;   s    r   r=   z!TooManyAccentuatedPlugin.__init__"  s    %&'(r   c                r    | xj                   dz  c_         |j                  r| xj                  dz  c_        yyrw   r   N)rq   r2   r   ra   s      r   rc   z"TooManyAccentuatedPlugin.feed_info&  s2    "##q(# r   c                     d| _         d| _        y rz   r   r;   s    r   re   zTooManyAccentuatedPlugin.reset-  s     !"#r   c                f    | j                   dk  ry| j                  | j                   z  }|dk\  r|S dS )Nr   r|   gffffff?r   )r<   ratio_of_accentuations     r   rg   zTooManyAccentuatedPlugin.ratio1  s=      1$'+'>'>AVAV'V(=(E$N3Nr   NrS   rh   ri   r   r
   r   r   r   r     s,    :I))$ O Or   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
UnprintablePlugin_unprintable_countrq   c                     d| _         d| _        y rz   r   r;   s    r   r=   zUnprintablePlugin.__init__>  s    '(%&r   c                    |j                   s+|j                  s|dk7  r|dk7  r| xj                  dz  c_        | xj                  dz  c_        y)rw   u   ﻿r   N)r-   r)   r   rq   ra   s      r   rc   zUnprintablePlugin.feed_infoB  sD     

NNV#X%##q(#"r   c                    d| _         y rz   )r   r;   s    r   re   zUnprintablePlugin.resetM  s
    "#r   c                Z    | j                   dk(  ry| j                  dz  | j                   z  S )Nr   r|   r   )rq   r   r;   s    r   rg   zUnprintablePlugin.ratioP  s/      A%''!+t/D/DDDr   NrS   rh   ri   r   r
   r   r   r   r   :  s,    :I'	#$ E Er   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
SuspiciousDuplicateAccentPlugin_successive_countrq   _last_latin_character_last_was_accentuatedc                <    d| _         d| _        d | _        d| _        y ru   r   r;   s    r   r=   z(SuspiciousDuplicateAccentPlugin.__init__a  s"    &'%&15"+0"r   c                   | xj                   dz  c_         | j                  |j                  r}| j                  rq|j                  r/| j                  j                         r| xj                  dz  c_        t        |      t        | j                        k(  r| xj                  dz  c_        || _        |j                  | _        yr   )rq   r   r2   r   r+   rL   r   r!   ra   s      r   rc   z)SuspiciousDuplicateAccentPlugin.feed_infoh  s    "&&2  **zzd88@@B&&!+&Y'=9S9S+TT&&!+&%."%)%5%5"r   c                <    d| _         d| _        d | _        d| _        y ru   r   r;   s    r   re   z%SuspiciousDuplicateAccentPlugin.resetw  s"    !" !%)"%*"r   c                Z    | j                   dk(  ry| j                  dz  | j                   z  S )Nr   r|   rx   )rq   r   r;   s    r   rg   z%SuspiciousDuplicateAccentPlugin.ratio}  s/      A%&&*d.C.CCCr   NrS   rh   ri   r   r
   r   r   r   r   X  s.    I16+ D Dr   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
SuspiciousRange"_suspicious_successive_range_countrq   _last_printable_seen_last_printable_rangec                <    d| _         d| _        d | _        d | _        y rz   r   r;   s    r   r=   zSuspiciousRange.__init__  s"    78/%&04!15"r   c                `   | xj                   dz  c_         |j                  s|j                  s|t        v rd| _        d| _        y| j                  || _        t        |      | _        y| j
                  }t        |      }t        ||      r| xj                  dz  c_        || _        || _        yr   )	rq   r-   r7   r   r   r   r"    is_suspiciously_successive_ranger   )r<   r(   rb   unicode_range_aunicode_range_bs        r   rc   zSuspiciousRange.feed_info  s    "::y4P'P(,D%)-D&$$,(1D%)6y)AD&&*&@&@&3I&>+O_M33q83$-!%4"r   c                <    d| _         d| _        d | _        d | _        y rz   )rq   r   r   r   r;   s    r   re   zSuspiciousRange.reset  s"     !23/$(!%)"r   c                ^    | j                   dk  ry| j                  dz  | j                   z  }|S )NrH   r|   rx   )rq   r   )r<   ratio_of_suspicious_range_usages     r   rg   zSuspiciousRange.ratio  s<      B& 33a7!!2"' /.r   NrS   rh   ri   r   r
   r   r   r   r     s,    I65.* / /r   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
SuperWeirdWordPlugin_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchrq   _bad_character_count_buffer_length_buffer_last_char_buffer_last_char_accentuated_buffer_accent_count_buffer_glyph_count_buffer_upper_countc                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d | _        d| _	        d| _
        d| _        d| _        y ru   r   r;   s    r   r=   zSuperWeirdWordPlugin.__init__  sj     !$%() */!). %&)*!#$-138*)*!() () r   c                   |j                   r| xj                  dz  c_        || _        |j                  r| xj                  dz  c_        |j
                  | _        |j
                  r| xj                  dz  c_        | j                  s+|j                  r|j
                  r|j                  sd| _        |j                  r| xj                  dz  c_        y| j                  sy|j                  s|j                  st        |      r| xj                  dz  c_        | j                  }| xj                   |z  c_        |dk\  r| j                  |z  dk\  rd| _        n}| j                  rF| j                  j%                         r,| j                  |k7  r| xj&                  dz  c_        d| _        n+| j                  dk(  rd| _        | xj&                  dz  c_        |dk\  rM| j                  rA| j                  dkD  xr | j                  |z  dk  }|s| xj&                  dz  c_        d| _        | j"                  r1| xj(                  dz  c_        | xj*                  |z  c_        d	| _        d	| _        d| _        d| _        d	| _        d| _        d| _        d| _        y|d
vrE|j,                  s8|j.                  r+d| _        | xj                  dz  c_        || _        d	| _        yyyy)rw   r   TN         ?   r   r}   F>   _-<=>|~)r*   r   r   r+   r   r2   r   r   r   r3   r6   r   r-   r7   r   r   rq   r   rL   r   r   r   r.   r8   )r<   r(   rb   buffer_lengthprobable_camel_caseds        r   rc   zSuperWeirdWordPlugin.feed_info  s   ::1$%.D"zz((A-(151A1AD.))Q.),,t'7'7+/(}}((A-(""::|I'>!!%!4!4M!!]2!!,,}<C04D-66..66800MA,,1,04D---204D-,,1,"t'?'?,,q0 H00=@CG %
 ,,,1,04D-(($$)$))]:),1)',D$"#D%)D"16D.()D%'(D$'(D$@@JJ(,D%1$%.D"16D.   Ar   c                    d| _         d | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        y ru   )r   r   r   r   r   r   r   rq   r   r   r   r   r   r;   s    r   re   zSuperWeirdWordPlugin.reset,  sj    !%-2*$)!#(   !$%!#$ $%!#$ #$ r   c                r    | j                   dk  r| j                  dk(  ry| j                  | j                  z  S )N
   r   r|   )r   r   r   rq   r;   s    r   rg   zSuperWeirdWordPlugin.ratio;  s7    r!d&>&>!&C((4+@+@@@r   NrS   rh   ri   r   r
   r   r   r   r     s0    I *$I7V% A Ar   r   c                  >    e Zd ZdZdZddZd	dZddZed
d       Z	y)CjkUncommonPluginz<
    Detect messy CJK text that probably means nothing.
    rq   _uncommon_countc                     d| _         d| _        y rz   r   r;   s    r   r=   zCjkUncommonPlugin.__init__K  s    %&$%r   c                j    | xj                   dz  c_         |t        vr| xj                  dz  c_        yyr   )rq   r   r   ra   s      r   rc   zCjkUncommonPlugin.feed_infoO  s2    "11  A%  2r   c                     d| _         d| _        y rz   r   r;   s    r   re   zCjkUncommonPlugin.resetV  s     ! r   c                l    | j                   dk  ry| j                  | j                   z  }|dkD  r|dz  S dS )Nr   r|   r   r   r   )r<   uncommon_form_usages     r   rg   zCjkUncommonPlugin.ratioZ  sD      1$%)%9%9D<Q<Q%Q ,?+D"R'M#Mr   NrS   rh   ri   )
rW   rX   rY   rZ   r[   r=   rc   re   rk   rg   r
   r   r   r   r   C  s3     8I&&! N Nr   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
ArchaicUpperLowerPlugin	_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalrq   _last_alpha_seen_last_alpha_seen_upper_last_alpha_seen_lower_current_ascii_onlyc                    d| _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        y )NFr   Tr   r;   s    r   r=   z ArchaicUpperLowerPlugin.__init__t  sI    	45,23*890%&,0,1#,1#)- r   c                *   |j                   xr |j                  }| }|r| j                  dkD  r| j                  dk  r7|j                  s+| j                  s| xj
                  | j                  z  c_        d| _        d| _        d| _        d| _        | xj                  dz  c_	        d| _        y| j                  r|j                  sd| _        | j                  h|j                  r| j                  s|j                  r=| j                  r1| j                  r| xj                  dz  c_        d| _        nd| _        nd| _        | xj                  dz  c_	        | xj                  dz  c_        || _        |j                  | _        |j                  | _        y)rw   r   @   NFr   Trx   )r*   r0   r   r.   r   r   r   r   r   rq   r/   r+   r   r,   r   )r<   r(   rb   is_concerned	chunk_seps        r   rc   z!ArchaicUpperLowerPlugin.feed_info  sS   !ZZ>D,>,>**	==A44:

0088668 23D.34D0$(D!DI!!Q&!'+D$##DMM',D$  ,

t::

t::9966!;6 %DI $DI!	",,1, )&*jj#&*jj#r   c                    d| _         d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _        y )Nr   FT)	rq   r   r   r   r   r   r   r   r   r;   s    r   re   zArchaicUpperLowerPlugin.reset  sI     !/0,-.*340 $&+#&+#	#' r   c                T    | j                   dk(  ry| j                  | j                   z  S )Nr   r|   )rq   r   r;   s    r   rg   zArchaicUpperLowerPlugin.ratio  s*      A%77$:O:OOOr   NrS   rh   ri   r   r
   r   r   r   r   f  s/    
I.+1Z	( P Pr   r   c                  :    e Zd ZdZddZddZddZed	d       Zy)
ArabicIsolatedFormPluginrq   _isolated_form_countc                     d| _         d| _        y rz   r   r;   s    r   r=   z!ArabicIsolatedFormPlugin.__init__  s    %&)*!r   c                     d| _         d| _        y rz   r   r;   s    r   re   zArabicIsolatedFormPlugin.reset  s     !$%!r   c                    | xj                   dz  c_         |j                  t        z  r| xj                  dz  c_        yyr   )rq   r1   r   r   ra   s      r   rc   z"ArabicIsolatedFormPlugin.feed_info  s6    "::--%%*% .r   c                X    | j                   dk  ry| j                  | j                   z  }|S )Nr   r|   r   )r<   isolated_form_usages     r   rg   zArabicIsolatedFormPlugin.ratio  s0      1$%)%>%>AVAV%V""r   NrS   rh   ri   )	rW   rX   rY   r[   r=   re   rc   rk   rg   r
   r   r   r   r     s*    <I+&+ # #r   r      )maxsizec                   | |y| |k(  ryd| v rd|v ryd| v sd|v ryd| v sd|v r	d| v sd|v ry| j                  d      |j                  d      }}|D ]  }|t        v r||v s y | dv |dv }}|s|r	d| v sd|v ry|r|ryd	| v sd	|v rd| v sd|v ry| d
k(  s|d
k(  ryd| v sd|v s| dv r!|dv rd| v sd|v ryd| v sd|v ry| d
k(  s|d
k(  ryy)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    TFLatin	Emoticons	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationForms)splitr   )r   r   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r   r   r     s    /"9/)/!g&@o%)G 	?"g&@&+*H 	c"c" '
  00!!	 	
	

 	33 ' 	, E_$<,?"h/&AO#u'?m+-/O 	 E_$<3377O+}/Oo%O)Cm+-/Or   i   c           	        t        |       }|dk  rd}n
|dk  rd}nd}t               }t               }t               }t	               }t               }	t               }
t               }t               }t               }|j                  }|j                  }|j                  }|j                  }|	j                  }|
j                  }|j                  }|j                  }|j                  }t               }|j                  }t        d||      D ]"  }| |||z    D ]  } ||        |||        |||        |||       |j                  r |||        |||       |j                  sQ |||       |j                   r	 |||       |j"                  r	 |||       |j$                  s |||        |j&                  |j&                  z   |j&                  z   |j&                  z   |	j&                  z   |
j&                  z   |j&                  z   |j&                  z   |j&                  z   }||k\  s# n  |d        |d|        |d|        |d|       |j&                  |j&                  z   |j&                  z   |j&                  z   |	j&                  z   |
j&                  z   |j&                  z   |j&                  z   |j&                  z   }|rt)        d      }|j+                  t,        d	| d
| d|        |dkD  r8|j+                  t,        d| dd         |j+                  t,        d| dd         |||||	|
|||f	D ]1  }|j+                  t,        |j.                   d|j&                          3 t1        |d      S )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    i  rF   r   r   r?   r   
charset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz: r   )lenrm   r   r   r   r   r   r   r   r   rc   r&   rR   ranger)   r*   r3   r4   r5   rg   r   logr   	__class__round)decoded_sequencemaximum_thresholddebugseq_lenstepd_spd_tad_upd_sdad_srd_swd_cud_aud_ai	d_sp_feed	d_ta_feed	d_up_feed
d_sda_feed	d_sr_feed	d_sw_feed	d_cu_feed	d_au_feed	d_ai_feedrb   info_updateblock_startr(   mean_mess_ratiologgerdts                                 r   
mess_ratior'  *  s    '(G}	4 .N-OD%=%?D/1D-L-NE+-D!5!7D/1D$;$=D%=%?D IIIJIIIII ZD++K Q. ;
)+d8JK 	/I	" i&i&i& ~~)T*)T* zz)T*::y$/;;i.>>i.3	/8 JJjjjj kk jj	
 jj jj jj jj 	 //S;
X 	D$$$ JJjjjj kk jj	
 jj jj jj jj 	 /0

1156GGX Y!!2 35	
 R<JJu0@"0E/FGHJJu.>su.E-FGHtUD$dDI 	=BJJub
;<	= !$$r   )r   
str | Noner   r(  rT   rP   )g?F)r  rV   r  rj   r  rP   rT   rj   )1
__future__r   sys	functoolsr   loggingr   version_infotypingr   typing_extensionsImportErrorconstantr   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r    r!   r"   r$   __annotations__r&   r]   rm   r   r   r   r   r   r   r   r   r   r'  r
   r   r   <module>r4     sC   " 
  v+      '>I-	9EAS A [I [I [I|" "< .L'9 .L .Lb O1 O O6 E* E E: )D&8 )D )DX 4/( 4/ 4/n BA- BA BAJ N* N ND YP0 YP YPx #1 # #8 4FF2<F	F FR 4IN}%}%.3}%BF}%
}% }%  	s   E EE