o
    I&iL                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlmZ	 d dl
mZ d dlZd dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d dl
mZ dd	lmZmZ eeZej j!Z!ej j"Z"ej j#Z#ee!j$e!j%e!j&e!j'e!j(e!j)e!j*e!j+e!j,e!j-e!j.e!j/e!j0e!j1e!j2e!j3e!j4e!j5e!j6e!j7e!j8e!j9e!j:e!j;e!j<ee!j=e!j>e!j?e!j@jAgZBi e eBZCe!jDe!jEjFe!jGe!jHe!jIe!jJjKe!jLe!jMe!jNg	ZOeeCeO d
d ZPePe!jQjRgdd ZSePe!jTjRgdd ZUePe!jVjFgdddddZVePe!jWgedddZWePe!jXgdd ZXePe!jYjFgdd ZYePe!jZgdd ZZePe!j[gdd Z[ePe!j\j]gdddZ^ePe!j_gedd  Z_ePe!j`gedd!d"Z`ePe!jaged#d$ ZaePe!jbjFgdd%d&ZbePe!jcgd'd( ZcePe!jdgdd)d*d+ZdePe!jegd,d- ZeePe!jfe!jggd.d/ ZfePe!jhjFgdd0d1d2ZhePe!jie"jigd3d4 ZiePe!jje"jjgd5d6 ZjePe!jkdd8d9ZkePe!jldd:d;ZlePe!jmgd<d= ZmePe!jngd7d>d?d@ZnePe!jojFgdAdB ZpePe!jojqgdCdD ZrdEejKdFeejs dGejsfdHdIZtePe!juddddJdKdLZuePe!jvddddJdMdNZvePe!jwdddd7d7ejxdOdPdQZwePe!jyjFddddJdRdSZyePe!jyjzddddJdTdUZ{ePe!j|jFdVdW Z|ePe#j}jFdXejKdYe~dZed[ed\ed]ejqdGejKfd^d_ZePe#jjFdXejKdYe~dZed[ed\ed]ejqdGejKfd`daZePe#j}jdXejKdYejKdZejKd[ed\ed]ejqdGejKfdbdcZePe#jjdXejKdYejKdZejKd[ed\ed]ejqdGejKfdddeZePej jjdfdg ZePe!jge	 	 	7ddhejKdiejKdjedkedledGejKfdmdnZePe!jjddodpZePe!jjddqdrZePe!jjdsdt Ze!jjFejjjePe!jdXejKduejKdvejejK dwejejK dxejejK dyedze~d{e~fd|d}Ze dd~d Zdd ZePe!jdd ZdS )    N)Optional)core_aten_decompositionsget_decompositionsremove_decompositions)_grid_sampler_2dpw_cast_for_opmath)extra_random_decomps)	out_dtype)type_to_dtype   )configinductor_primsc                 C   s8   t | r| gn| D ]}|tv rtd|  q	t| tS )Nzduplicate decomp: %s)callabledecompositionslogwarningdecompregister_decomposition)opsop r   HC:\wamp64\www\opt\env\Lib\site-packages\torch/_inductor/decomposition.pyr   U   s
   r   c                 C      d S Nr   tensormsgr   r   r   assert_async_msg_decomp^      r   c                 C   r   r   r   r   r   r   r   "functional_assert_async_msg_decompd   r   r   )minmaxc                C   r   r   r   )symbolr    r!   r   r   r   sym_constrain_range_for_sizei   r   r#   c                 C   s(   |d ur	|  |} |d ur| |} | S r   )	clamp_min	clamp_max)xr    r!   r   r   r   clampn   s
   

r'   c                 K   s:   | d}|d u rtt||d< tj| |fi |S tS )Ndtype)getr
   typeatenfullNotImplemented)size
fill_valuekwargsr(   r   r   r   r,   x   s
   
r,   c                    sN   dgt   }t|D ]\}}|||< qtj fdd|D fi ||S )Nr   c                    s   g | ]} | qS r   r   ).0lr.   r   r   
<listcomp>   s    z"empty_permuted.<locals>.<listcomp>)len	enumeratetorchemptyZpermute)r.   Zphysical_layoutr0   permpr2   r   r3   r   empty_permuted   s   
&r;   c                 C   st   |
d r
| j jdkrtS t| dgttd|   }t| |||||||||	|
d |
d dg\}}}|||fS )N   cudar   r   F)	devicer*   r-   r+   sumlistrangedimconvolution_backward)Zgrad_outputinputweightZ
bias_sizesZstridepaddingZdilationZ
transposedZoutput_paddinggroupsZoutput_maskZ	grad_biasZgrad_inpZgrad_weight_r   r   r   rC      s"    

rC   c                 C   s   t | dtd  S )N      ?g       @)r7   r   mathr&   r   r   r   log2      rL   c                 C   s   d| }t | | d|  S )Ng      $@rI   )r+   round)r&   decimalsZten_pow_decimalsr   r   r   	round_dec   s   rP   c                 C   s   t jr| jd dkr| d|d jdd}|S | jjdkrA| ddkrA|ddkrAtj| 	d|	d ddddS t
S )Nr   r<   rB   cpuTrB   keepdim)r   coordinate_descent_tuningshape	unsqueezer?   r>   r*   r.   r7   squeezer-   )selfZbatch2outr   r   r   bmm   s   r\   c                 C   s   | j jdkrV|ddkr/|ddkr/tj|d|d dddd}|| ||   S |ddkrV|ddkrV|ddkrV|j| jddd}|| ||   S tS )NrS   r   r   rQ   TrT      )	r>   r*   r.   r7   r?   rY   rX   Tr-   )rZ   Zmat1Zmat2betaalphar[   r   r   r   addmm   s   *ra   c                    s  t jrjd dks jd dkrd d jddS jjdkrddkr^ddkr^ ddkr^j jkr^t	
t	
  dkr^t	 fdd	tdD S ddkr ddkrt	jd d dd
ddS tS )Nr   r   r<   rR   rS   rQ       c                    s    g | ]}|d d f   qS r   r   )r1   iinput2rZ   r   r   r4      s     zmm.<locals>.<listcomp>TrT   )r   rV   rW   rX   r?   r>   r*   r.   r(   r7   ZnumelcatrA   rY   r-   )rZ   re   r   rd   r   mm   s"   $rg   c                 C   sb   dd }t t|| }t|dkr|d  S dt|  k r%t| k r/n tS tj||S tS )Nc                 S   s   t | jdkp| jd dkS )Nr   r   )r5   rW   rK   r   r   r   non_empty_tensor   s   zcat.<locals>.non_empty_tensorr   r   )r@   filterr5   cloner+   rf   defaultr-   )ZtensorsrB   rh   Zfiltered_tensorsr   r   r   rf      s   rf   c                 C   sb   |   rtt| jtdt| j| jS t| dk tj	d}tt| tdd}|| S )Nnanr   g        )

is_complexr7   whereisnanrealfloatatan2imagrJ   pi)r&   retrl   r   r   r   angle  s   rv   r`   c                C   sv   t | o|  }t |o| }|r|stS |}|d ur"|| }t | j|j}| | jj||jj |S r   )r7   Z	is_tensorrm   r-   Zpromote_typesr(   viewrp   )r&   yr`   Zx_is_complex_tensorZy_is_complex_tensorzZcomplex_typer   r   r   add  s   "r{   c                 C   s   |   rJ d| S )NzTODO: implement this)rm   rZ   r   r   r   conj_physical  s   r}   c                 C   s   | S r   r   r|   r   r   r   lift$  r   r~   )	generatorc                C   s    |d u sJ t j| t jd| k S )N)r(   )r7   	rand_likefloat32)rZ   r   r   r   r   	bernoulli)  s   r   c                 C   s   t t ||| kB | |S r   r7   rn   ro   rZ   otherr   r   r   fmin/     r   c                 C   s   t t ||| k B | |S r   r   r   r   r   r   fmax4  r   r   Fc                 C       | j tjkrtj| ||dS tS NrT   )r(   r7   boolanyr-   rZ   rB   rU   r   r   r   amax9     r   c                 C   r   r   )r(   r7   r   allr-   r   r   r   r   amin@  r   r   c                 C   s   t | ||| S r   )r7   Znarrowrj   )rZ   rB   startlengthr   r   r   narrow_copyG     r   implicitc                C   s   t j| ||d S )Nr   )r+   expandrj   )rZ   r.   r   r   r   r   expand_copyL  r   r   c                 C   s   t | | S r   )r+   rx   rj   )rZ   r.   r   r   r   view_copy_defaultQ  s   r   c                 C   s   |  | S r   )torj   )rZ   r(   r   r   r   view_copy_dtypeV  s   r   r   memory_formatreturnc                 C   s    |t ju s	|d u rt| S |S r   )r7   preserve_formatutilsZsuggest_memory_format)r   r   r   r   r   get_like_layout[  s   
r   )r(   r>   r   c                K   :   t jg |  f|p| j|p| jd|jt| |dS Nr(   r>   r   )r7   Zrandr.   r(   r>   r   r   rZ   r(   r>   r   r0   r   r   r   r   e     

r   c                K   r   r   )r7   Zrandnr.   r(   r>   r   r   r   r   r   r   
randn_likeo  r   r   )r(   layoutr>   
pin_memoryrequires_gradr   c                C   s>   t jg |  ||p| j|p| j|p| j|djt| |dS )N)r(   r   r>   r   r   )r7   r,   r.   r(   r   r>   r   r   )rZ   r/   r(   r   r>   r   r   r   r   r   r   	full_likey  s   

r   c                K   s@   t jjd|g |  f|p| j|p| jd|jt| |dS )Nr   r   r   r+   randintlowr.   r(   r>   r   r   )rZ   highr(   r>   r   r0   r   r   r   randint_like  s   

r   c                K   s@   t jj||g |  f|p| j|p| jd|jt| |dS r   r   )rZ   r   r   r(   r>   r   r0   r   r   r   randint_like_low  s   

r   c                 K   s   t jjd| |fi |S )Nr   )r+   r   r   )r   r.   r0   r   r   r   r     rM   r   rD   scale
zero_point	quant_min	quant_maxr(   c                 C   B   | j tjkr| tj} d| }tt| | | |||S NrI   r(   r7   Zbfloat16r   r   r'   rN   rD   r   r   r   r   r(   Z	inv_scaler   r   r   'quantize_per_tensor_default_decomp_impl     	r   c                 C      |  tj| | S r   r   r7   r   rD   r   r   r   r   r(   r   r   r   )dequantize_per_tensor_default_decomp_impl     	r   c                 C   r   r   r   r   r   r   r   &quantize_per_tensor_tensor_decomp_impl  r   r   c                 C   r   r   r   r   r   r   r   (dequantize_per_tensor_tensor_decomp_impl  r   r   c                 C   sP   dd }|| dddf }|| ddd f }| dd df  tj| | S )Nc                    st    fdddD \}}}}t jdkr&||d>  |d>  |d>  tjd S |d> |d>  |d>  | tjd S )	Nc                 3   s$    | ]} d |f  tjV  qdS .N)r   r7   Zint32)r1   nu8r   r   	<genexpr>  s   " zPq_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.<locals>.<genexpr>)r   r   r<      little   r]      r   )sys	byteorderrx   r7   r   )r   r&   ry   rz   wr   r   r   bitcast_u8_to_f32  s   
((z=q_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.ir   )packedr   scalesoffsetsr   r   r   "q_embedding_bag_byte_unpack_decomp  s    r   agridinterpolation_modepadding_modealign_cornersc                 C   s@   | j t dko|dko| jtjd }t| |||||d}|S )NrS   r   r   )r   r   r   r   _expand_grid)r>   r7   Zis_contiguousZcontiguous_formatdecomp_grid_sampler_2d)r   r   r   r   r   r   outputr   r   r   grid_sampler_2d  s   r   c                 C      t jj| t j|||dS Nrw   )r+   _foreach_addList_foreach_mulrZ   Zleft_tensorsZright_tensorsZscalarr   r   r   _foreach_addcmul_scalar     r   c                 C   r   r   )r+   r   r   Z_foreach_divr   r   r   r   _foreach_addcdiv_scalar   r   r   c              	   C   s"   t j| t jt j|| |S r   )r+   r   r   r   ScalarZ_foreach_sub)Zstart_tensorsZend_tensorsrE   r   r   r   _foreach_lerp_scalar'  s   r   rE   biasrunning_meanrunning_vartrainingexponential_average_factorepsilonc              
   C   sB   t | |||||||\}}	}
|r||	|
fS ||d|dfS )Nr   )r+   native_batch_normZ	new_zeros)rD   rE   r   r   r   r   r   r   r   bcr   r   r   miopen_batch_norm1  s    

r   c                   C   s   i t tS r   )r   r   r   r   r   r   fast_random_decompsQ  s   r   c                   C   s   t jrtS t S )z"decomps can change based on config)r   Zfallback_randomr   r   r   r   r   r   select_decomp_tableV  s   r   c                 C   sF   | j jdkr!t| |g\} }|ddd }t| |||S tS )Nr=   rQ   r   r   )	r>   r*   r+   Zbroadcast_tensorsZreshapeZcumsumr   Zmasked_scatter_with_indexr-   )rZ   masksourceZ
source_idxr   r   r   masked_scatter]  s
   r   )NNr   )r   r   )NF)r   r   F)r   )	functoolsloggingrJ   r   typingr   r7   Ztorch._decomp_decompr   Ztorch._prims_commonZ_prims_commonr   Z$torch.ao.quantization.fx._decomposedr   r   r   Ztorch._decomp.decompositionsr   r   r   Z$torch._decomp.decompositions_for_rngr   Z!torch._higher_order_ops.out_dtyper	   r
    r   r   	getLogger__name__r   r   r+   ZprimsZquantized_decomposedZ_adaptive_avg_pool2d_backwardZarangeZbitwise_and_Zbitwise_or_Z
clamp_min_distZ
empty_likeflipZgeluZhardtanhZindex_selectlcmZ
leaky_reluZlinalg_vector_normZ_log_softmaxZ max_pool2d_with_indices_backwardZ_native_batch_norm_legitZ#_native_batch_norm_legit_functionalZ$_native_batch_norm_legit_no_trainingr   Znative_group_normZnative_layer_normZ_softmaxZsin_Zsqrt_Z_to_copyZtril_indicesZtriu_indicesZupsample_bilinear2dZvecZinductor_decompositionsr   Z_unsafe_indexZ#_scaled_dot_product_flash_attentionrk   r%   r$   ZglusplitZTensorrY   r?   ZunbindZdecomps_to_excluder   Z_assert_asyncr   r   Z_functional_assert_asyncr   r#   r'   r,   r;   rC   rL   rN   rO   rP   r\   ra   rg   rf   rv   r{   r}   r~   Zdetach_r   r   r   r   r   r   r   Z	view_copyr   r(   r   r   r   r   r   r   r   r   Z	low_dtyper   r   Zquantize_per_tensorrq   intr   Zdequantize_per_tensorr   r   r   r   Z	quantizedZembedding_bag_byte_unpackr   r   r   Z_foreach_addcmulr   r   Z_foreach_addcdivr   Z_foreach_lerpr   r   Zpy_implZ_CZDispatchKeyZAutograd	lru_cacher   r   r   r   r   r   r   <module>   s   
"
	








 


















		













	



