o
    I&iU]                 !   @   s)  U d dl Z d dlZd dlZd dlZd dlmZ d dl mZmZ d dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$ d d	lm%Z%m&Z&m'Z'm(Z( d d
l)m*Z*m+Z+m,Z,m-Z- d dl.m/Z0 d dl1m2Z2 ej3j4Z4g Z5ee6 e7d< ej8j9j:Z:G dd deZ;	d1dedej<de=fddZ>ee>ej<j?ddZ@ee>ej<j?dZAee>ej<jBdZCde deDde fddZEe"e:jFe-deAde d e fd!d"ZFe"e:jGe-deAde d e fd#d$ZGe"e:jHe-deAde de d%eId&eIfd'd(ZHe"e:jJe-deAd)e d*eId+eId,eId-e=d.e fd/d0ZJe"e:jKjLgd1d2 ZMe"e:jKj gd3e fd4d5ZNe"e:jOe- eAd6e de fd7d8ZOe"e:jPe-deAd)e d6e fd9d:ZPe"e:jQe-dd)e d6e d;eId<eIfd=d>ZQe"e:jRe- eAd6e de fd?d@ZRe"e:jSe- eAd)e d6e de fdAdBZSe"e:jTe-dd)e d6e d&eIfdCdDZTe"e:jUe-deAd)e d6e dEeIdFe=fdGdHZUe"e:jVe-deAd2dJe d6e dKe6fdLdMZVe"e:jWeAd)e dNe fdOdPZWe"e:jXe- eAd6e de fdQdRZXe"e:jYe-deAd)e d6e de fdSdTZYe"e:jZd6e dUe de fdVdWZZe"e:j[d)e d6e dUe dee e f fdXdYZ[e"e:j\e:j\j]^e4j_e- eA		d3d6e dZe d[eId\eId]e=d^eej` de fd_d`Z\e"e:jae:jaj]^e4j_eA		d3d6e dZe d[eId\eId]e=d^eej` de fdadbZae"e:jbe- eAd)e d6e dZe d[eId\eId]e=dFe=de fdcddZbe"e:jce-deAd)e d6e dee de fdfdgZcdhe dieDfdjdkZddlejefdmdnZfe"e:jge- eAe;jhjifd6e doe dieDde fdpdqZge"e:jje-deAd)e dNe doe dieDfdrdsZje"e:jke- eAe;jhjidtfd6e doe dieDd%eIfdudvZke"e:jlj]eAd)e d6e doe dieDd%eIf
dwdxZle"e:jljmeAd)e d6e doe dieDd%eIde fdydzZne"e:joj]eAd)e d6e doe dieDd{eIf
d|d}Zoe"e:jojpeAd)e d6e doe dieDd{eIde fd~dZqd)e d6e doe dUee  dieDdeDde de fddZre"e:jse-deAd)e d6e deDde fddZse"e:jte-dd)e d6e doe dUee  dieDdeDde de fddZte"e:jue-dd)e d6e doe dUee  dieDdeDde de fddZue"e:jve- eAde;jhjifd6e doe dUee  dieDde f
ddZve"e:jwe-deAde;jhjifd)e d6e doe dUee  dieDde fddZwe"e:jxe- eAe;jhjifdNe doe dieDde fddZxe"e:jye-deAe;jhjifd)e d6e doe dieDde f
ddZye"e:jze- d4dNe de deIfddZze"e:j{e- de de de fddZ{e"e:j|e- d)e deeD deDdeDdeDdeDfddZ|e"e:j}j 	 			d5d6e deDdeeD deeD deDf
ddZ~e"e:je- d)e deeD deDdeDfddZe"e:je- d)e deeD deDdeDdeDf
ddZd)e de dejefddZe"e:je-de@d)e de deDdejefddZe"e:je- e@d)e de deDdejefddZdd Ze"e:je- eAdNe deeD deeD deeD deeD de fddZe"e:je- eAdNe deeD deeD deeD deeD deeD de fddZe"e:je- d)e de d+eIfddZe"e:je- dJe deeD deDdeDdeDde fddƄZe"e:jj]eA	d6d)e d6e deeI de fddɄZe"e:je:jj]^e4je:jj]^e4jdNe deIdee= fdd̄Ze"e:je-dd΃dNe deIdee= fddЄZe"e:je- de deDde=fddӄZe"e:je- de deDde=fddՄZe"e:jj d7d6e de d*eIde fddׄZe"e:jjLd7d6e deId*eIde fddلZe"e:je- 			d8dUe de deDde=de=de fddZe"e:je- d)e de deDdeDde=f
ddZdeeD fddZe"e:j	 d9d6e deeD deDdee  fddZe"e:jj d9dNe deDdeDdee df fddZe"e:jj]	 d9dNe deeD deDdee df fddZe"e:jj d9d6e deDdeDdee df fddZe:jj^e4j	 d9d6e de deDdee  fddZe"e:je- eAd:d6e de de d%eDd*eDf
ddZe"e:je- eA			d;d6e de de d%eDd*eDde=fddZe"e:je- eAd:d6e de de d%eDd*eDf
ddZe"e:jj]eAd)e dNe de de d ee  deDdeDdeDdeDdee= deee  ee  ee  f fddZe"e:jjpd)e dNe de de d ee  deDdeDdeDdeDdee= dej dej dej deee  ee  ee  f fd	d
Zdee  dee  fddZe"e:jj]de dNe deeD de de dUee  dee  dee= deee  ee  ee  f fddZe"e:jjpde dNe deeD de de dUee  dee  dee= dej dej dej deee  ee  ee  f fddZdNe dUee  dee  dee  dee  d]e=deIdeIde=dee e e ee  ee  f fddZe"e:je-ddddNe dUee  dee  dee  dee  d]e=deIdeIdee e e f fddZe:jj]^e4je:jj]^e4jdNe dUee  dee  dee  dee  d]e=deIdeIdee e e f fdd Ze:jj]^e4jd9dee  fd!d"Ze"e:jj]dNe dUee  dee  de de deIdeIdee e e f fd#d$Ze"e:jj]dNe dUee  dee  de de d]e=deIdeIdee e e f fd%d&Ze"e:jjdNe dUee  dee  d]e=deIdeIdee e e f fd'd(Ze"e:jj]dNe dUee  dee  de de d]e=deIdeIdee e e e e f fd)d*Ze"e:je-dd΃eAd6d+d,Zd-d. Zd/d0 Ze"e:je- ddddddd1de dleeje d2eej d3e=d4e=d5eej fd6d7Ze"e:je:je:jge- d8d9 Ze:jj]^e4je"e:je-ddΐdd:dNe dUe dee  dee  dee  d]e=d;eId<eIfd=d>Zd?d@ Ze"e:jj]de dNe dUee  dee  dee  dee  dee  de=deIdee= dee ee  ee  f fdAdBZe"e:jjpde dNe dUee  dee  dee  dee  dee  de=deIdee= dej dej dej dee ee  ee  f fdCdDZe"e:je-ddΐddNe d)e dUe dee  dee  dee  dEee  d<eIdFe fdGdHZe"e:je- eAdNe deeDeDf fdIdJZe"e:jddKde'deDde'dLe'd*e&f
dMdNZe"e:je- ddKde'deDde'dLe'd*e&f
dOdPZddKde'deDde'dLe'dQe=d*e&fdRdSZe"e:jăde'deDde'dLe'fdTdUZe"e:jŃe- de'deDde'dLe'fdVdWZde'deDde'dLe'dQe=f
dXdYZe"e:jǃe-ddeeAd6e dee e f fdZd[Ze"e:jȃe- 	\	t	d<de d]ee=eDeIf d^ee=eDeIf d^eej` fd_d`Ze"e:jɃd=dadbZɐdcdd Zʐdedf Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdgdh Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdidj Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdkdl Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdmdn Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdodp Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdqdr Zؐd1dsdtZe"e:jj]e:jj]^e4jeA	d6dNe deeD dueeI de fdvdwZe"e:jj]e:jj]^e4jeA	d6dNe deeD dueeI de fdxdyZϐdzd{ Ze"e:jj]e:jj]^e4jeA		d>dNe deeD d|eeI d}eeI de f
d~dZe"e:jj]e:jj]^e4jeA		d>dNe deeD d|eeI d}eeI de f
ddZe"e:jj]e:jj]^e4jeA			d?dNe deeD deeI d|eeI d}eeI de fddZe"e:jj]e:jj]^e4jeA			d?dNe deeD deeI d|eeI d}eeI de fddZאdd Zېdd Zܐdd Zݐdd Z	d1ddZߐdd Zdd Zd1ddZd1ddZdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jje:jj^e4je:jj^e4jdd Zdd Zd1ddZd1ddZdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jje:jj^e4je:jj^e4jdd Zdd Zdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jje:jj^e4je:jj^e4jdd Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdd Ze"e:jj̓e:jj͠^e4je:jj͠^e4jdd Ze"e:jj]e:jj]^e4jeA		d>dNe deeD de=d|eeI d}eeI de fddZe"e:jj]eAdNe deeD de fddZe"e:jj]de de de=fddÄZe"e:je:j ge- dĐdń Ze"e:jgdƐdǄ Zd6e doe dUee  dieDdeDdee e f fdȐdɄZe"e:je-ddd6e doe dUee  dieDdeDdee e f fdʐd˄Ze"e:je-ddd6e doe dUee  dieDdeDdee e f fd̐d̈́Zde deIde fdϐdЄZde deIde fdѐd҄Zde de(fdԐdՄZde(de de fdؐdلZ	dee  de fdڐdۄZ
deDde=dlejed2ejfdݐdބZde deDdeDde=fddZde deDdeDdeDde=f
ddZde deeD de=fddZde deeD de=fddZe"e:je- eAde deeD de=fddZ	 	 		d@de de deDdeDde=de=de fddZe"e:je- eA	 	 	dAde de deDdeDde=de fddZe"e:je- eAdd Ze"e:je- dde;jhjifddZdej dej de=fddZe:jj]^e4je- dd Ze"e:jj]eA		d>de deeDeDf de=deeI d eeI de fddZe"e:jj̓e:jj͠^e4je:jj͠^e4je- eA	d6de deeeDeDf  de=deeeIeIf  de f
ddZe"e:je-dddddd	d
Ze"e:je- dBddddZe"e:jj]e:jjpge- dejdddde&dleeje dejd2eej d3e=f
ddZe"e:jj gdejdddde&de&dleeje dejd2eej d3e=fddZ!e"e$dd Z"e"e:j#e:j#j]^e4je- ddde;jhjifdNe doe de&de&dUee  dieDde fddZ#e"e:j$e:j$j]^e4je-dddNe doe dieDdee e f fddZ$e"e:j%j]	\		dCddde de d3e deId e=d!e=d+eeI dee e e e eDeDe e e f	 fd"d#Z&d$d% Z'e"e:j(ge- eAd:d&d'Z(e"e:j)e- d(d) Z)e"e:j*j]e:j*jpgddd*d6e dleeje dee  de fd+d,Z+e"e:j,j]e:j,j-gd6d6e deeD fd-d.Z.e"ej9j:j/d/d0 Z/e'e:j0e:j1 e'e:j2e:j e'e:j3e:j e'e:j4e:j( e'e:j5e:jK e'e:j6e:j7 e'e:j8e:jR e'e:j9e:j: e'e:j;e:jO e'e:j<e:j= e'e:j>e:j? e'e:j@e:jA e'e:jBe:jC e'e:jDe:jE e'e:jFe:jG e'e:jHe:jI e'e:jJe:jK e'e:jLe:jM e'e:jNe:jO e'e:jPe:jQ e'e:jRe:jS e'e:jTe:jU e'e:jVe:jW e'e:jXe:jY e'e:jZe:jX dS (D      N)Enum)partialreduce)chainproduct)CallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberType
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r(   r(   GC:\wamp64\www\opt\env\Lib\site-packages\torch/_decomp/decompositions.pyr   $   s    r   Fftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t j| i |D }tj|di\  fdd}fdd}t|| i t||}r4|S t||S )Nc                 S   s   g | ]	}t |tr|qS r(   )
isinstancer   .0xr(   r(   r)   
<listcomp>4   s
    
z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                       t | tr
|  S | S Nr-   r   tor0   computation_dtyper(   r)   increase_prec<      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r3   r4   r5   r7   )result_dtyper(   r)   decrease_precB   r;   z0type_casts.<locals>.inner.<locals>.decrease_prec)pytreeZarg_tree_leavesutilselementwise_dtypesr   )argskwargsZ	flat_argsr:   r=   rr,   r*   r+   )r9   r<   r)   inner2   s   

ztype_casts.<locals>.inner)	functoolswraps)r*   r+   r,   rE   r(   rD   r)   
type_casts-   s   rH   T)r+   r,   )r+   r0   dimreturnc                 C   s$   t ||   D ]}| d} q| S )N)rangerI   	unsqueeze)r0   rI   _r(   r(   r)   _unsqueeze_to_dim_   s   rO   
grad_inputout_gradyc                 C   s   | d||     S Nr    Zconj_physicalrQ   rR   r(   r(   r)   tanh_backwarde      rV   c                 C   s   | |d|     S rS   rT   rU   r(   r(   r)   sigmoid_backwardl   rW   rX   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rQ   r0   rY   rZ   zr(   r(   r)   softplus_backwards   s   "ra   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )r^   r_   r]   )	rb   rc   rd   re   rf   rg   ZnegcoefZposcoefZ
negiptcoefr(   r(   r)   elu_backward{   s   ri   c                 C      t | |S r4   )r^   Z	full_likeselfvaluer(   r(   r)   fill_scalar      rn   rm   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrI   r(   rm   r(   r)   <lambda>       zfill_tensor.<locals>.<lambda>)r^   _checkrI   atencopyrk   r(   rq   r)   fill_tensor   s
   

rw   rl   c                 C   s    t jt j| d ddddd S N   r   min   maxr^   clamprl   r(   r(   r)   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r^   r_   rb   rl   r(   r(   r)   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S )Nr   r   )rb   rl   r   r   r(   r(   r)   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S rx   r   r   r(   r(   r)   	hardswish   s   $r   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr   ry         ?r   r   r(   r(   r)   hardswish_backward   s
   r   c                 C   s   t ||kd| S rh   r   )rb   rl   rZ   r(   r(   r)   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S rh   r   )rb   rl   r   r   r(   r(   r)   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r    ry   g      )r^   r   erfr]   )r   rl   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberE   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr(   r(   r)   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rS   )r^   r   FZsoftplussigmoid)rb   r   Zinput_tanh_softplusZinput_sigmoidoutr(   r(   r)   mish_backward  s   
r   c                 C   s   | t |  S r4   )r^   r   r   r(   r(   r)   silu
  s   r   c                 C   s,   ddt |   }| | d|d|    S rS   )r^   r]   )rb   rl   r   r(   r(   r)   silu_backward  s   r   weightc                 C   s   t | dk| ||  S rh   r   )rl   r   r(   r(   r)   _prelu_kernel  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rb   rl   r   Z
input_gradZweight_gradr(   r(   r)   _prelu_kernel_backward  s   r   noiseloweruppertraining	generatorc           
      C   sh   |d u sJ |r(| dk}t | ||}t|| | | }|t||d |S || d }	t | |	S Nr   r    r!   )ru   uniformr^   r_   copy_
leaky_relu)
rl   r   r   r   r   r   Znot_positiverC   outputr   r(   r(   r)   rrelu_with_noise)  s   r   c              	   C   s   |  t| |||||S r4   )r   r   )rl   r   r   r   r   r   r(   r(   r)   rrelu_with_noise_A  s   r   c                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r!   )mulru   r   )rb   rl   r   r   r   r   r   r   r(   r(   r)   rrelu_with_noise_backwardO  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r    rK   )r^   r_   r]   abs)rb   rl   r   Zin_negativeZ	max_derivsignr`   r(   r(   r)   log_sigmoid_backwardd  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r4   )r   r&   rm   r^   meanr'   sum)r   r   r(   r(   r)   apply_loss_reductionq  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r4   )r^   Z	complex32Zfloat16Z	complex64float32Z
complex128Zfloat64r   r(   r(   r)   to_real_dtypez  s   


r   targetc                 C   s   | | d }t ||S )Nr!   )r   )rl   r   r   r   r(   r(   r)   mse_loss  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r   r&   rm   numel)rb   r   r   r   normr(   r(   r)   mse_loss_backward  s   r   r\   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r!   )r   r^   r_   r   )rl   r   r   rY   r   r(   r(   r)   smooth_l1_loss  s   	&
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S r[   )r   r&   rm   r   r^   r   r_   r   )	rb   rl   r   r   rY   r   r0   Zabs_xZ	norm_gradr(   r(   r)   smooth_l1_loss_backward  s   

r   c                 C   *   t | ||||}t||j t||ddS NT)Z	copy_fromZcopy_toZexact_dtype)r   r   shaper   )rb   rl   r   r   rY   rP   resultr(   r(   r)   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S r[   )r   r&   rm   r   r^   r_   )rb   rl   r   r   r   r   r0   r(   r(   r)   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )rb   rl   r   r   r   rP   r   r(   r(   r)   huber_loss_backward_out  r   r   ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr!   r   r    g      c                 S   s   g | ]}d qS r    r(   r/   rN   r(   r(   r)   r1     rs   z&_nll_loss_backward.<locals>.<listcomp>)rI   r   r&   rm   rM   r^   r_   
zeros_likescatterrL   r   reshape)rb   rl   r   r   r   r   r   channel_dimsafe_targetrP   Z	new_shaper(   r(   r)   _nll_loss_backward  s    	

 

r   c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr!   z.Halving dimension must be even, but dimension z	 is size r\   rp   )rI   r?   canonicalize_dimsizenarrowr^   r   cat)
rb   rl   rI   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr(   r(   r)   glu_backward  s   

r   c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r!   input tensor should be 1D or 2Dr    ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rK   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rI   r   r   r   r%   rm   r   )rb   rl   r   r   r   r   r   no_batch_dimr(   r(   r)   nll_loss_backward  s<   ("
r   c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: ry   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r!   r    r   r   r   z ( z, elements))rI   r   r   r   )rb   rl   r   r   r   r   r   r(   r(   r)   nll_loss2d_backwardI  s*   r   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr    r(   i)r^   maximumlog1pnew_fulllogr   )rl   r   r   r   r   r(   r(   r)   binary_cross_entropyl  s   

r   c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r    rz   )r^   r   r   r&   rm   r   )rb   rl   r   r   r   ZEPSILONr   r(   r(   r)   binary_cross_entropy_backward  s   
"r   c                 C   s    t t |  | }t||S r4   )r^   r   r]   r   )r   r   r   r   r(   r(   r)   soft_margin_loss  s   
r   c                 C   s6   ||  t || d  }|tjjkr||  }|S rS   )r^   r   r   r&   rm   r   )rb   rl   r   r   rP   r(   r(   r)   soft_margin_loss_backward  s   	r   r!   otherpc                 C   s   t j| | |dS )N)r   )ru   r   )r   r   r   r(   r(   r)   dist  r   r   x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr!   rK   Tmemory_formatr   )powr   r^   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r(   r(   r)   _euclidean_dist  s   r  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r4   )	new_zerosr^   Zslice_scatter)rb   r  rI   r  r  r  rP   r(   r(   r)   slice_backward  s   

r  r    c                 C   s:  |   }|dkrtdt|   |}t|  }t|  }|dkr(td|d ur.|nd}|d ur6|ntj}	|dk rC||| 7 }|	dk rM|	|| 7 }	|dk rTd}n
||| kr^|| }|	|k re|}	n
|	|| kro|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
rtd| |||
S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver    z<Slice decomposition for quantized tensors aren't implemented)rI   RuntimeErrorr?   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rl   rI   r  r  r  ndimsizesstridesZ	start_valZend_valr  lenr(   r(   r)   slice_forward  s>   	r  indexc                 C   s   |  |}t|| ||S r4   )r  r^   Zselect_scatter)rb   r  rI   r  rP   r(   r(   r)   select_backward  s   
r  offsetdim1dim2c                 C   s   |  |}t|| |||S r4   )r  r^   Zdiagonal_scatter)rb   r  r  r  r  rP   r(   r(   r)   diagonal_backward  s   
r  input_dtypec                 C   s   | j |kr
||}|S r4   )r   r6   )rb   rP   r  r(   r(   r)   _cast_grad_to_input_dtype  s   

r  r   c                 C   s0   | | }||t j||dd  }t| || S NTrI   keepdim)r^   r   r  
contiguous)rb   r   rI   r  Znew_grad_outputrP   r(   r(   r)   _softmax_backward_data  s
   
r   c                 C   s*   | t |t j| |dd  }t| ||S r  )r^   r]   r   r  )rb   r   rI   r  rP   r(   r(   r)   _log_softmax_backward_data.  s   
r!  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr!   r    r   devicer   rK   )r   r^   arangeint64rM   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr#  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr(   r(   r)    _im2col_col2im_indices_along_dim:  s
   r&  kernel_sizedilationpaddingr
  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr!   c                   S      dS )Nz"im2col(): only 2D kernel supportedr(   r(   r(   r(   r)   rr   W      zim2col.<locals>.<lambda>c                   S   r*  )Nz$im2col(): only 2D dilation supportedr(   r(   r(   r(   r)   rr   X  r+  c                   S   r*  )Nz#im2col(): only 2D padding supportedr(   r(   r(   r(   r)   rr   Y  r+  c                   S   r*  )Nz"im2col(): only 2D stride supportedr(   r(   r(   r(   r)   rr   Z  r+  Tc                 S   <   |rt dd | D nt dd | D }t|dd  d S )Nc                 s       | ]}|d kV  qdS r   Nr(   r/   r   r(   r(   r)   	<genexpr>]      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS r.  r(   r/  r(   r(   r)   r0  ]  r1  c                   S   r*  )Nz<{param_name} should be greater {'than' zero, but got {param}r(   r(   r(   r(   r)   rr   _  r+  z0im2col.<locals>.check_positive.<locals>.<lambda>allr^   rt   param
param_namestrictcondr(   r(   r)   check_positive\     (zim2col.<locals>.check_positiver'  r(  r)  Fr8  r
  ry   r   c                 s       | ]}|d kV  qdS r.  r(   r/   dr(   r(   r)   r0  j  r1  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler(   r   r(   r)   rr   k      c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r    r!   Nr(   r/   r   padZdilZkerstr(   r(   r)   r0  n  s
    "
r   c                 s   r-  r.  r(   )r/   cr(   r(   r)   r0  u  r1  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.rB  r(   r(  r'  output_sizer)  r   r
  r(   r)   rr   v  s    r   r   rK   r    ry      T)r^   rt   r  r   r4  rC  ziprM   r&  r#  r   rG  permuter   r   squeeze)r   r'  r(  r)  r
  r:  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr   Znum_blocks_rowZnum_blocks_colr(   rN  r)   im2colM  sd   




 




r^  rO  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr!   c                   S   r*  )Nzonly 2D output_size supportedr(   r(   r(   r(   r)   rr     r+  zcol2im.<locals>.<lambda>c                   S   r*  )Nzonly 2D kernel supportedr(   r(   r(   r(   r)   rr     r+  c                   S   r*  )Nzonly 2D dilation supportedr(   r(   r(   r(   r)   rr     r+  c                   S   r*  )Nzonly 2D padding supportedr(   r(   r(   r(   r)   rr     r+  c                   S   r*  )Nzonly 2D stride supportedr(   r(   r(   r(   r)   rr     r+  Tc                 S   r,  )Nc                 s   r-  r.  r(   r/  r(   r(   r)   r0    r1  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   r2  r.  r(   r/  r(   r(   r)   r0    r1  c                   S   r*  )Nz9{param_name} should be greater than zero, but got {param}r(   r(   r(   r(   r)   rr     r+  z0col2im.<locals>.check_positive.<locals>.<lambda>r3  r5  r(   r(   r)   r:    r;  zcol2im.<locals>.check_positiver'  r(  r)  Fr<  r
  rO  )r!   ry   c                 s   r>  r.  r(   r?  r(   r(   r)   r0    r1  zcol2im.<locals>.<genexpr>r   c                      rA  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: rB  r(   rD  r(   r)   rr     rE  r   r    c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r(   r(   )r'  r   r(   r)   rr     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r    r!   r(   rF  r(   r(   r)   r1     s    "zcol2im.<locals>.<listcomp>rK   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=rJ  rK  rL  rM  z , expected input.size(-1) to be 	 but got rK   .r(   r(   Lr(  r'  rO  r)  r   r
  r(   r)   rr         c                      r`  ra  r(   r(   rd  r(   r)   rr     rf  ry   r   rP  c                 S   s   g | ]
\}}|d |  qS r!   r(   )r/   or   r(   r(   r)   r1     s    
accumulaterQ  )r^   rt   r  r   r4  rR  rM   r   rS  r&  r#  rO   r  prodru   _unsafe_index_putr   rG  rT  )r   rO  r'  r(  r)  r
  r:  r  Zprod_kernel_sizecolrU  out_hout_wrV  rW  rX  rY  rZ  r[  r\  r]  Zindices_rowZindices_colZoutput_padded_sizer   idxr(   rd  r)   col2im  s   




 



"

rq  maskc                 C   s$   | | | |  jt| d}|S )Nr   )type_ascloner?   suggest_memory_format)rb   rr  rd   rC   r(   r(   r)   native_dropout_backward  s   	rv  
input_size	dimensionr   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r#  r   rK   r    r4   Tri  )r  r^   Zsqueeze_copyr?   r   r$  r#  Zint32ZunfoldflattenZmovedimr  ru   rl  r  )	r   rw  rx  r   r  rI   rp  rP   r  r(   r(   r)   unfold_backward  s   
r{  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nr\   r   r(   nan)r^   r_   logical_andr   float)rb   rl   r|  lohir(   r(   r)   logit_backward%  s   r  trainc                 C   s&   |r|dkrt | ||d S |  S rh   )ru   native_dropoutrt  )r   r   r  r(   r(   r)   dropout:  s   r  out0out1c                 C   s   |r6|dkr6|dkrt | t j| t jdfS | jjstdt | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r    r   z?result type Float can't be cast to the desired output type Longr\   )	r^   r   boolr   is_floating_pointr  	rand_liker  r   )r   r   r  Z	bool_maskresr(   r(   r)   r  D  s   r  half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr*t
| }ntj| |dd}t
| | }|tj||dd }|sJ||}|S Nr2   r   Tr  )r  r   r^   halfr?   r@   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr6   r   r]   amaxr   )r0   rI   r  r9   r<   Zunnormalizedx_maxr   r(   r(   r)   _softmaxU  s   


r  c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr'| }ntj
| |dd}| | }ttjt||dd}|| }|sL||}|S r  )r  r   r^   r  r?   r@   r  r  r6   r   r  r   r   r]   )	r0   rI   r  r9   r<   Zshiftedr  Zshifted_logsumexpr   r(   r(   r)   _log_softmaxl  s    


r  c                 C      t j|| |dS Nrc   r^   subrl   r   rc   r(   r(   r)   rsub_Tensor     r  c                 C   r  r  r  r  r(   r(   r)   rsub_Scalar  r  r  rK   indicespadding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr!   z'weight' must be 2-Dr    r   )rI   r  Zindex_selectrT  )r   r  r  r  r  r   r(   r(   r)   	embedding  s   	


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tri  rK   r   )r?   r@   r  r  r6   r   r^   longr  r   ru   rl  rM   rO   r  masked_fillr   )rb   r  r  r  r  r9   r<   countsonesZgrad_weights_scalerr  r   grad_weightr(   r(   r)   embedding_dense_backward  s&   	


r  c                 C   s   d}| D ]}||9 }q|S rS   r(   )r0   rC   ir(   r(   r)   rk    s   
rk  split_sizesc           	         s   t ttj  k fdd t}g }d}ddlm} t|D ]'}| }t 	|dd  ||| j  k |
 || ||7 }q&|S )Nc                      s   dt  dj   S )NzSplit sizes add up to z but got the tensor's size of )r   r   r(   rI   rl   r  r(   r)   rr     s    z"split_with_sizes.<locals>.<lambda>r   )expect_truec                   S   r*  )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr(   r(   r(   r(   r)   rr     r+  )r^   Z_check_with
ValueErrorr   r   r  %torch.fx.experimental.symbolic_shapesr  rL   Z_check_is_sizeappendr   )	rl   r  rI   Z
num_splitsZsplitsZ	start_idxr  r  lengthr(   r  r)   split_with_sizes  s&   
r  
split_size.c                 C      t j| ||S r4   )ru   splitr   )r   r  rI   r(   r(   r)   unsafe_split  r  r  c                 C   r  r4   )ru   r  default)r   r  rI   r(   r(   r)   unsafe_split_with_sizes  s   r  c                    s   | j }|| } dkr|dksJ | fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r    )	guard_intc                       g | ]} qS r(   r(   r/   r  r  r(   r)   r1      rs   zsplit.<locals>.<listcomp>rK   )r   r  r  rL   r^   r  )rl   r  rI   r  dim_sizechunksr  r  r(   r  r)   r    s   r  tensor_indices_or_sectionsc                    s   |j jdksJ |jtjksJ |  t dkp dk fdd  dkr9| }t|t	s3J | 
||S dd |D }| 
||S )Ncpur    r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr(   r(   Z	split_dimr(   r)   rr     s    zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r(   )itemr  r(   r(   r)   r1         zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)r#  typer   r^   r%  rI   rt   r  r-   r   tensor_split)rl   r  rI   sectionsr  r(   r  r)   /tensor_split_tensor_indices_or_sections_py_impl  s   
r  mat1mat2c                 C   H   |   s|  st|}t|}|t|| }|dkr|S |||   S rh   )r  
is_complexintr^   mm)rl   r  r  rY   rc   r   r(   r(   r)   addmm  s   r  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )r  is_cudaru   gelurelu)rl   r  r  rY   rc   r  r   r(   r(   r)   _addmm_activation3  s   

r  vecc                 C   r  rh   )r  r  r  r^   mv)rl   r  r  rY   rc   r   r(   r(   r)   addmvG  s   r  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d r:d|
  }d urt|d|

d	}t|d|

d	}t|dd|
}n&||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r_|	|
|	|
d  |d j
dgd
 }|	d	 rk|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr(   r(   )r  r  r  r(   r)   rr   i  r  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got rD  r(   )r  r  r   r(   r)   rr   m      c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rK   )r   r(   )r  r  r(   r)   rr   q  s   $ r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r(   r(   )r  r  r(   r)   rr   w  rs   r!   rp   r\   rK   r    r#  r   )r?   Zcheck_same_deviceZcheck_same_shaper^   rt   r   r   divmodr   viewr   rM   r   r  r#  rO   r6   r   )rb   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biassZds_valZdb_valc1c2c3r(   )r  r  r  r  r  r   r)   native_group_norm_backwardT  s   
 
""



$

r  out2c
                C   d   t | |||||||||	
}|
||f}t|D ]\}}|d ur/t|| |j t||| dd q|S r   )r  	enumerater   r   r   )rb   r   r   r  r  r  r  r  r  r  r  r  r  r   rP   r  rC   r(   r(   r)   native_group_norm_backward_out  s   
r  c                 C   s   | d ur	|  |S | S r4   r6   )r0   r   r(   r(   r)   _maybe_cast  s   
r  grad_outnormalized_shapebiasc           !         sL  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}|dks`|dkr|d ri|	|nd |d rw|	||d  nd |d r|	||d  fS d fS t
|| }t
|| }|| | }|d ur|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d }d } |d r|| | }|d r|d urt|dkrt|
| |d}n|
| }|d r|d urt|dkrt|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s*    | ]}|d ur|   n|V  qd S r4   )r6   r  r.   r8   r(   r)   r0    
    
z-native_layer_norm_backward.<locals>.<genexpr>r   r    r!   TF)r   rI   r?   get_computation_dtyper   r  rL   r  rk  r  rO   r^   r   r   rt  r  )!r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  MZx_hatZ
grad_x_hatabr  r  r  rE   r  Zd_weightr  r(   r8   r)   native_layer_norm_backward  sl   





r   c             	   C   s`   t | |||||||}||	|
f}t|D ]\}}|d ur-t|| |j t||| dd q|S r   )r   r  r   r   r   )r  r   r  r   r  r   r  r  r  r  r  r   rP   r  rC   r(   r(   r)   native_layer_norm_backward_out   s   
r  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r!   r   T)rI   Z
correctionr  r    )r   rv   r  r   )r	  rL   rI   r?   r  r   r6   r^   Zvar_meanrsqrtrT  r   r   r   r   r#  r  r  rO   rz  )r   r   r  r  r  r   r  r|  r  Zreduction_dimsr9   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r   	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr(   r(   r)   native_batch_norm_helper;  st   





r  r   r
  save_invstdc              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r|  r   r
  r  rN   r(   r(   r)   native_batch_norm  s   
r  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)ru   _native_batch_norm_legitr  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r|  r(   r(   r)   native_batch_norm_decomposition  s&   r  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr    r   c                    r  r(   r(   r   r  r(   r)   r1     rs   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   r^   opsru   r  r  r  r   )tensorr  rI   r  r  r(   r  r)   unsafe_chunk_py_impl  s   
r  c              
   C   s   t j| ||||d||S r  )ru   r  r  )r   r   r  r  r  r  r|  r(   r(   r)   r    s   
r  c              
   C   r  r  r  r  r(   r(   r)   r    s   
r  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  r|  r   r
  r  rN   r(   r(   r)   !_native_batch_norm_legit_no_stats  s   	
r  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NTz#new_running_mean should not be Nonez"new_running_var should not be Noner  )r   r   r  r  r  r   r  r|  r   r
  r  r  r	  r(   r(   r)   #_native_batch_norm_legit_functional  s   r  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   r\   )r^   r  r6   uint8rs  )r   r   r   rr  r  r(   r(   r)   _fused_dropout_decomposition)  s   r  c                 C   s   t | tjjr
| jS d S r4   )r-   r^   Z_subclassesZ
FakeTensorZfake_device)r  r(   r(   r)   device_hint3  s   r  c                 C   sD   |d ur | j jdkr ddlm} | }d|_|j}||| |S | S )Nmetar   )FakeTensorModeT)r#  r  Ztorch._subclasses.fake_tensorr!  Zin_kernel_invocationZfake_tensor_converterZfrom_meta_and_device)r0   common_devicer!  Z	fake_mode	converterr(   r(   r)   wrap_output_with_input_device_:  s   r$  )r   layoutr#  
pin_memorynon_blockingr   r#  r&  r'  r   c          	      C   s   |r|t jksJ d|rJ d|d u r!|d u r!|d u r!|  S d}t| }|d urI|| jkrI|d urB|jdkrBt j| |} d}t j| |} |d urX|sXt j| |} d}|r_t	| |} |d urjt j| |dS | S )NTODOFr  Tr   )
r^   stridedrt  r  r#  r  _primsZconvert_element_typeZ
device_putr$  )	r0   r   r%  r#  r&  r'  r   Zdtype_convertedr"  r(   r(   r)   _to_copyG  s&   
r+  c                 C   s
   t | S r4   )ru   aliasr7   r(   r(   r)   nop_decompositionn  s   
r-  Zout3exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )ru   r  r  r^   r  )r   r   r  r  r  r   r.  r/  r  r  rI  r(   r(   r)   cudnn_batch_normv  s"   
r0  c                 C   sD   t |D ]\}}|dkr|| jk r| j| || ks| |} q| S rS   )r  r  r   rM   )r0   broadcast_maskr  rr  r(   r(   r)   _broadcast_batch_norm_backward  s
   $
r2  c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r4   r  r.   r8   r(   r)   r0    s
    
z-native_batch_norm_backward.<locals>.<genexpr>r!   z$rank of the input must be at least 2r    r\   )r   r?   r  r   rI   rk  r	  r^   r  rL   r  r2  r   r   r6   r  )&r  r   r   r  r  r
  r  r  r|  r  r  Zweight_dtyper  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  r1  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojrP   r  Z	grad_biasr(   r8   r)   native_batch_norm_backward  s   
	



r3  c
                C   r  r   )r3  r  r   r   r   )r  r   r   r  r  r
  r  r  r|  r  r  r  r  r   rP   r  rC   r(   r(   r)   native_batch_norm_backward_out  s&   
r4  save_varreserveSpacec	           	      C   s    t || |||||d|g d
S )NT)TTT)ru   r3  )	r   rb   r   r  r  r
  r5  r/  r6  r(   r(   r)   cudnn_batch_norm_backward*  s   r7  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd qd |d  dkrjd |d  dkrjtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr=  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r(   r(   r  r(   r)   rr   O     
 z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape rc  rB  r(   rD  r(   r)   rr   T  s    rK   c                 s   s    | ]	\}}|| V  qd S r4   r(   )r/   r  rh  r(   r(   r)   r0  Z      z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS )r    Nr(   )r/   r  rh  r  r(   r(   r)   r0  [  s    
c                 S   s   t j| | |ddS )NtruncZrounding_moder^   divr  r  rI  r(   r(   r)   start_index`  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr    r<  r=  r>  r@  r(   r(   r)   	end_indexc      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nry  r    r   rK   r"  )r^   r$  r%  rM   Zscalar_tensorr   r#  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxrp  maxvali1r  )r#  rB  rA  r(   r)   compute_idxf  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rK   rp   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rK   r   r   r   )r-   r   rM   rO   r^   r  )valsr  rH  rG  rI   rr  r(   r(   r)   
maybe_mask  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)rG  rI   r   )r#  r   r  r^   rt   rC  rR  nnr  Z
avg_pool2drO   r   r   rL   )r   rO  r@  r
  ZkernelrK  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wrL  rM  retr  jr(   )r#  rB  r  r   rA  r)   adaptive_avg_pool2dE  sN   

(  



&rQ  r  r  c                C      t | |||d|dS )NTinplacerc   
_index_addr0   rI   r  r  rc   r(   r(   r)   
index_add_  s   	rX  c                C   rR  )NFrS  rU  rW  r(   r(   r)   	index_add  s   
rY  rT  c                   s"  t | jtjdkfdd jdkrdnd|jdkr*|ndtkfdd  dkr]t | jttkpQt 	t
  fdd |  }| jdk}|ri| dn| }d f }|rwtjntj}	|	|||dd	}
|r| S |r|
dS |
 S )
Nr    c                         d j  dS Nz(Index should have dimension 1 or 0 (got r   r9  r(   r  r(   r)   rr         z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r(   r(   )rI   
index_sizetensor_sizer(   r)   rr         c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r(   )rc   python_typer(   r)   rr     r`  r4   Tri  )r?   canonicalize_dimsr  r^   rt   r   Zdtype_to_typer   r  Zis_weakly_lesser_typer  rM   ru   
index_put_	index_putrT  r  )r0   rI   r  r  rT  rc   zero_dimr   rp  re  r   r(   )rc   rI   r  r^  rb  r_  r)   rV    s6   	

rV  c                 C      t | |||ddS )NTrT  _index_copyr0   rI   r  r  r(   r(   r)   index_copy_  s   rl  c                 C   rg  )NFrh  ri  rk  r(   r(   r)   
index_copy  r   rm  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| }d|  f }|r.tjntj}||||}	|r;| S |rB|		dS |	
 S )Nr    c                      rZ  r[  r9  r(   r\  r(   r)   rr    	  r]  z_index_copy.<locals>.<lambda>r   r4   )r?   rc  r  r^   rt   rM   ru   rd  re  rT  r  )
r0   rI   r  r  rT  rf  r   rp  re  r   r(   r\  r)   rj    s   

rj  c                 C   sL   t | d| }t t |  }| jr| d}n|}|t | |fS )Nr(   r  )r^   rD  r  r]   r   r  r   )rl   r{   r`   r   r(   r(   r)   log_sigmoid_forward	  s   rn  r   lowhighc                 C   s$   t j| jt|t|| j| j|dS )N)ro  rp  r   r#  r   )primsZ_uniform_helperr   r   r   r#  )r0   ro  rp  r   r(   r(   r)   r   	  s   r   c                 C   s   |  t| |||S r4   )r   r   )rl   ro  rp  r   r(   r(   r)   uniform_.	  s   rr  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr!   c                   S   r*  Nz9Must specify exactly one of output_size and scale_factorsr(   r(   r(   r(   r)   rr   9	  r+  z.upsample_compute_output_size.<locals>.<lambda>c                   S   r*  N r(   r(   r(   r(   r)   rr   ;	  r+  c                   S   r*  rs  r(   r(   r(   r(   r)   rr   A	  r+  c                   S   r*  rt  r(   r(   r(   r(   r)   rr   C	  r+  Fc                   S   r*  rs  r(   r(   r(   r(   r)   rr   L	  r+  )r  r^   rt   r  r  r  r   )rw  rO  scale_factorsZspatial_dimensionsr  r  r(   r(   r)   upsample_compute_output_size4	  s.   rw  c                 C   s   | d u rd S | | S r4   r(   )scalesrp  r(   r(   r)   get_scale_valueP	  s   ry  c                 C   *   t |  ||}t|d}tj| ||S rh   )rw  r   ry  ru   upsample_nearest1dr  r   rO  rv  osizerd   r(   r(   r)   upsample_nearest1d_vecV	     
r~  c                 C   rz  rh   )rw  r   ry  ru   _upsample_nearest_exact1dr  r|  r(   r(   r)   _upsample_nearest_exact1d_vec`	  r  r  c                 C   6   t |  ||}t|d}t|d}tj| |||S Nr   r    )rw  r   ry  ru   upsample_nearest2dr  r   rO  rv  r}  scale_hscale_wr(   r(   r)   upsample_nearest2d_vecj	     

r  c                 C   r  r  )rw  r   ry  ru   _upsample_nearest_exact2dr  r  r(   r(   r)   _upsample_nearest_exact2d_vecu	  r  r  c                 C   B   t |  ||}t|d}t|d}t|d}tj| ||||S r   )rw  r   ry  ru   upsample_nearest3dr  r   rO  rv  r}  Zscale_dr  r  r(   r(   r)   upsample_nearest3d_vec	  s
   


r  c                 C   r  r   )rw  r   ry  ru   _upsample_nearest_exact3dr  r  r(   r(   r)   _upsample_nearest_exact3d_vec	  s   



r  c                 C   s   g }t |}|r
dnd}t|D ]I}|| }| j| |  }	|| d ur,|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}qL|
| qt|S )Nr   r   r"  r    rK   )r  rL   r   r^   r$  r   r#  r6   r%  rM   r  rC  )r   rO  rx  exactr  Znum_spatial_dimsr  r@  r}  isizerd   Zoutput_indicesZinput_indicesrN   r(   r(   r)   !_compute_upsample_nearest_indices	  s   $r  rx  c                 C   s"   t | ||f\}t| d d |fS r4   r  ru   _unsafe_indexr   rO  rx  Z	l_indicesr(   r(   r)   r{  	  s   r{  c                 C   s&   t | ||fdd\}t| d d |fS NT)r  r  r  r(   r(   r)   r  	  s   
r  c                 C   sV   t | d d ||f}t| }| j\}}}}| jjdkr#|dk r#tj}|j	|d}|S )Ncudar   r   )
ru   r  r?   ru  r   r#  r  r^   r   r  )r   	h_indices	w_indicesr   r   rN   
n_channelsr(   r(   r)   _upsample_nearest2d_common	  s   
r  scales_hscales_wc                 C   s    t | |||f\}}t| ||S r4   r  r  r   rO  r  r  r  r  r(   r(   r)   r  	  s   	
r  c                 C   s$   t | |||fdd\}}t| ||S r  r  r  r(   r(   r)   r  	  s   	
r  scales_dc           	      C   s2   t | ||||f\}}}t| d d |||f}|S r4   r  	r   rO  r  r  r  Z	d_indicesr  r  r   r(   r(   r)   r  
  s
   

r  c           	      C   s6   t | ||||fdd\}}}t| d d |||f}|S r  r  r  r(   r(   r)   r  
  s
   
r  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )NrP  r   ry   r!   r   c                    s    g | ]}t ||   qS r(   rB  r  Z
group_sizeparamsr(   r)   r1   0
  s    z!gather_params.<locals>.<listcomp>)r  rL   )r  
has_biaseshas_projectionsr(   r  r)   gather_params%
  s   r  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr!   r    NNr(   )r  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr(   r(   r)   params_hiddens5
  s   $r  c                 C   s2   ||ksJ | | d|||  | dd|S rh   )r  r   )r  last_batch_size
batch_sizer  r(   r(   r)   update_hidden_for_packed@
  s   r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS rh   )r^   concatr   )r  r  r  Z
inp_hiddenr(   r(   r)    update_hidden_for_packed_reverseF
  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r    r!   ry   rK   )
r   r^   r  r	  r   r  r  r  reverser   )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr  r   
hidden_outr(   r(   r)   one_layer_rnn_dataT
  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r4   r   linearr  r  r  r  r  r  nonlinearityr(   r)   rE   
  s   zrnn_cell.<locals>.innerr(   r  rE   r(   r  r)   rnn_cell
  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r4   r  r  r  r(   r)   rE   
  s   zrnn_cell_data.<locals>.innerr(   r  r(   r  r)   rnn_cell_data
  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r    r!   ry   )	r   r  fliprM   r  r  r^   r   rT  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r  r   r(   r(   r)   one_layer_rnn
  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r    r!   ry   F)
r^   Zzerosr   rM   r  r  ru   Zmkldnn_rnn_layerr  rT  )r  r  r  r  r  Zw0Zw1Zw2Zw3hxcxr  modeZhidden_size
num_layersr  batch_firstr  ZoutputsrR   hycyr(   r(   r)   mkldnn_one_layer_lstm
  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r    r   T)r  )r  )	transposerL   r  r  r^   r   rI   r  )r   r  r  r  r  r  r  r  r  layer_fnfinal_hiddensr  r  r  r  r  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr(   r(   r)   _rnn_helper
  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r^   r   stackr   r  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   rnn_tanh_input     
r  c	                 C   r  r  )	r  r  r  r   r  r  r^   r  r  r  r(   r(   r)   rnn_relu_input!  r  r  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r^   r  r  datar  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   rnn_relu_data@  &   
r  c	                 C   r  r  )	r  r  r  r   r  r  r^   r   r  r  r(   r(   r)   rnn_tanh_datac  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr   r   r    r!   ry   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r(   r(   r)   	lstm_cell  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r    r!   ry   rP  r   r  )r  rM   r   r  r  r  r  r  r^   r   rT  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r(   r(   r)   one_layer_lstm  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r    r!   ry   rP  r   rK   r  )r  r^   r  r	  r   r   r   r  r  r  r  r  rR  r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zorig_hxZorig_cxr  r  r  r  Zhidden0Zhidden1r   r(   r(   r)   one_layer_lstm_data  s\   *

r  c                 C   s   dd }|| ||rt S tS )a   Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._has_mkldnn`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t jjsdS | gt| tt| }dd |D }t|dkr#dS | }|t dkr0dS dd |D }|D ]}|t j	t j
fvrF dS q9| jrLdS |d d|d dk}|r^dS d	S )
NFc                 S      h | ]}|j qS r(   r  r/   tr(   r(   r)   	<setcomp>      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r    r  c                 S   r  r(   r   r  r(   r(   r)   r  
  r  r   r!   T)r^   _CZ_has_mkldnnr	  r   from_iterabler  popr#  r  Zbfloat16requires_gradr   )	r   r  r  ZtensorsZdevicesr#  Zdtypesr   r  r(   r(   r)   
use_mkldnn  s(   z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r  )r   r  r  r  r(   r(   r)   select_one_layer_lstm_function  s   r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr!   lstm expects two hidden statesr   r    )	r  r  r   r	  rR  r  r  r^   r  )r   r  r  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   	lstm_impl   s$   $"r   c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr!   r  r   r    F)r  )
r  r  r   r	  rR  r  r   r  r^   r  r  r(   r(   r)   lstm_data_implB  s"   $
"r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nry   r    r!   r   )r  r   r  r   r   r  r  r  r  r  r  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater(   r(   r)   gru_cellc  s   r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nry   r    r   r!   r  r  r(   r(   r)   gru_cell_datal  s   r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r^   r  )r  r  r  r  r  r  r  r  r  r   r  r(   r(   r)   gru_impl_datau  s   r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r^   r  )r   r  r  r  r  r  r  r  r  r   r  r(   r(   r)   gru_impl  s   
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S r  )rw  r   ry  r^   r  ru   _upsample_bilinear2d_aar   rO  align_cornersrv  r}  r  r  r(   r(   r)   upsample_bilinear2d_aa_vec  s   


r
  c                 C   s4   t |  ||}t|d}t|d}t| ||||S r  )rw  r   ry  upsample_bilinear2dr  r(   r(   r)   upsample_bilinear2d_vec  s   

r  r	  c           $      C   sf  | j \}}}}|d }	|d }
|	dkr+|r|d |	d  }n|d ur&d| n||	 }nd}|
dkrI|r<|d |
d  }n|d urDd| n||
 }nd}tj|	| j| jd}tj|
| j| jd}|rj|| }|| }n||d  d jdd}||d  d jdd}|tj}t|j|d dtj}|tj}t|j|d dtj}|	d}|	d}|	d}t
| d d ||g}t
| d d ||g}t
| d d ||g}t
| d d ||g}|| }d| }|| }d| }t||t|| } t||t|| }!t| |t|!| }"t| }#| jjd	kr+|d
k r+tj}#|"j|#d}"|"S )Nr   r    r\   r   r"  r   rz   r}   r     r   )r   r^   r$  r   r#  r   r6   r%  ceilrM   ru   r  r   r?   ru  r  r   r  )$r   rO  r	  r  r  Zn_batchr  Zin_hZin_wrn  ro  Zh_scale_factorZw_scale_factorr  rP  r0   rR   Zx_floorZx_ceilZy_floorZy_ceilZx_viewZx_floor_viewZx_ceil_viewv1v2Zv3Zv4Zxscale2Zxscale1Zyscale2Zyscale1q1q2r   r   r(   r(   r)   r    sV   




r  c                 C   s  |d }|d }|d }|d }| }| }| }|dk r.|d| d f }|d| d f }d}|dk rD|dd |f }|dd |f }d}|dk rb|d| d d d f }|d| d d d f }d}|dk r~|dd |d d f }|dd |d d f }d}d|   d  }	|	||f }
|	|df }|	||f }tj| ddgd d f ddgf |
|ddgd d f || ddgd d f ddgf |gdd}|	d|f }|	d|f }tj|ddgf |||ddgf |gdd}|	||f }|	|df }|	||f }tj| ddgd d f ddgf ||ddgd d f || ddgd d f ddgf |gdd}tj|||gd	dS )
Nr   r    r!   ry   .r   rK   rp   r   )rI   r^   r   repeat)r   r)  pad_left	pad_rightpad_topZ
pad_bottomZ	input_midZinput_mid_tbZinput_mid_lrZbatch_dims_no_repeatZrepeat_top_leftZrepeat_top_middleZrepeat_top_rightZtop_rowsZrepeat_middle_leftZrepeat_middle_rightZmiddle_rowsZrepeat_bottom_leftZrepeat_bottom_middleZrepeat_bottom_rightZbottom_rowsr(   r(   r)   replication_pad2d  sl   """"r  r  r  c                 C   s   | j |j kS r4   rD  )r  r  r(   r(   r)   is_same_sizer  ro   r  c                 G   rj   r4   )ru   r  )r0   r   rA   r(   r(   r)   _reshape_aliasw  s   r  c                 C   rj   r4   )ru   r  )r0   r  r(   r(   r)   _index}  ro   r  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr    r!   r   r(   r   )rI   r   r  r^   r_   rM   gatherrT  r   r%   rm   r   expandr   r6   r'   r&   )rl   r   r   r   r   Zn_dimsr   r   wr   Zsafe_target_r   r   Zwsumr(   r(   r)   _nll_loss_forward  sB   


r  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r!   r   r    r   r   r   r   rK   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rI   r   r   r  )rl   r   r   r   r   r   Z	n_classesr(   r(   r)   nll_loss_forward  s    	
r  c                 C   s   t | ||||S r4   )r  )rl   r   r   r   r   r(   r(   r)   nll_loss2d_forward  s   	r   Ac                 C   s    |d |  |d  |  |  d S )Nr!   ry   r    r(   r0   r!  r(   r(   r)   _upsample_cubic_convolution1  rC  r#  c                 C   s(   ||  d|  |  d|  |  d|  S )NrP     r   r(   r"  r(   r(   r)   _upsample_cubic_convolution2  s   (r%  r  c                 C   s4   d}t | d |t| |td|  |t d|  |fS )Ng      r\   r   )r%  r#  )r  r!  r(   r(   r)    _upsample_get_cubic_coefficients  s   r&  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s   s    | ]	\}}|| V  qd S r4   r(   )r/   r  r  r(   r(   r)   r0    r;  z+_upsample_cubic_interp1d.<locals>.<genexpr>)r&  _sum_tensorsrR  )r'  r(  Zcoeffs2r(   r(   r)   _upsample_cubic_interp1d  s   r*  c                 C   s   t tj| S r4   )r   r^   add)r(  r(   r(   r)   r)    s   r)  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr    r   ry  )Zstepsr#  r   )r^   r  Zlinspace)r,  r	  r   r#  r  r(   r(   r)   _linspace_from_neg_one  s   r-  thetahr  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr    )r    r    r    r"  )r   r!   constantr   rG  r  rm   r    r    )r!   r   	r   r#  r-  r  r^   r  rN  r  rG  )	r.  r/  r  r	  r   r#  grid_xgrid_ygrid_oner(   r(   r)   _make_base_grid_4d  s   r7  r@  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr    )r    r    r    r    r"  )r   ry   r0  r   r1  r_  )r!   r    )ry   r   r3  )r.  r@  r/  r  r	  r   r#  r4  r5  Zgrid_zr6  r(   r(   r)   _make_base_grid_5d  s   r8  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr	  rK   ry   r    r   r!   )r7  r  r   rM   r   )	r.  r   r	  r  rN   r/  r  	base_gridgridr(   r(   r)   _affine_grid_generator_4d)  s    r<  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr9  rK   r   r    r   ry   )r8  r  r   rM   r   )
r.  r   r	  r  rN   r@  r/  r  r:  r;  r(   r(   r)   _affine_grid_generator_5d3  s    r=  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )N)r   rP  c                   S   r*  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r(   r(   r(   r(   r)   rr   C  r+  z'affine_grid_generator.<locals>.<lambda>r   r9  )r^   rt   r  r<  r=  )r.  r   r	  r(   r(   r)   affine_grid_generator=  s   
r>  r;  interpolation_modepadding_mode_expand_gridc                    sJ  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\ |j\}}|dkscJ ru|d| d}dtdtdtffddt jjddddt j jdd dddtdtdtdt	f fdddtdtdtffdd
|d  }	|d! }
d"kr1||	}||
}|
 |
 d }}d }}||}}|| ||  }|| ||  }|| ||  }| |  }t
fd#d$|f|||f|||f|||ffD S dkrN||	}||
}| }| }
||dS |	}|
}|
 |
 | | }sud|d}dtdtdtf
fd%d&d'tdtffd(d)	t	fd*d$td+D }t||S ),N)r   r    r!   c                      r8  )NzInvalid interpolation mode r(   r(   )r?  r(   r)   rr   \  r:  z"_grid_sampler_2d.<locals>.<lambda>c                      r8  )NzInvalid padding mode r(   r(   )r@  r(   r)   rr   _  r:  coordsr   rJ   c                    s0    r|d d n|d }|d d }| | | S Nr   r(   )rB  r   r   ofsr9  r(   r)   unnormalizeb  s   z%_grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr!   r   r    r   )r^   r   r   fmodfloorr6   Zint8r_   )rB  rF  rG  Z
coords_minZcoords_spanZcoords2extraZflipsr(   r(   r)   reflect_coordinatesm  s   
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r    r!   rK   r   )rB  r   Zcoords_reflected)r	  r@  rK  r(   r)   compute_coordinatesy  s   z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r4   r(   )rB  r   Z	coords_un)rL  rE  r(   r)   compute_source_index  s   

z._grid_sampler_2d.<locals>.compute_source_indexr!   r    xsysc                    s,   t d| kt | k t d|k| k S rh   )r^   r~  )rN  rO  )iHiWr(   r)   in_bounds_cond  s   $z(_grid_sampler_2d.<locals>.in_bounds_condr  wsc                    sN   | |r	nd t  fdd| jtjd|jtjd|fD S )Nr    c                 3   s*    | ]}t |d  V  qdS r.  )r^   r_   r  r  )r  rI  r9  oHoWr(   r)   r0    r  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )rC  r6   r^   r%  )rN  rO  rS  )r  r  rA  rR  rT  rU  )rI  r9  r)   clip  s
   
z_grid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r4   r(   )rW  rX  r  Zidx_xZidx_yZw_)C_idxN_idxr  rV  r(   r)   get_summand  s   z%_grid_sampler_2d.<locals>.get_summand).r   ).r    r   c                 3   s"    | ]\}}} |||V  qd S r4   r(   )r/   rW  rX  r  )r[  r(   r)   r0    
    

z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rS   r(   )rW  rX  r0   rR   )rL  r[  rP  rQ  r(   r)   get_value_bounded  s   

z+_grid_sampler_2d.<locals>.get_value_boundedrD  c                    sF   | d  } d | | d | d |f}t |S )Nr    r!   )r*  )rD  Ziy_ofscs)r]  ix_nwiy_nwtxr(   r)   	get_coeff  s   
z#_grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r4   r(   )r/   rD  )rb  r(   r)   r0    r1  r   )r^   rt   r   r  r   r  r  r$  r#  r   rI  r)  roundrM   rC  rL   r*  )r  r;  r?  r@  r	  rA  rM  rN   twor0   rR   rW  rX  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyr'  r(   )r  rY  r  rZ  rA  r  r	  rV  rL  rb  r[  r]  rP  rQ  rR  r?  r_  r`  rT  rU  r@  rK  ra  rE  r)   _grid_sampler_2dK  sx   
 ( 




	





 

rg  c                 C   s   t | ||||dS )N)r;  r?  r@  r	  )rg  )r  r;  r?  r@  r	  r(   r(   r)   grid_sampler_2d  s   
rh  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr!   r    c                      s   d    d   S )Nzmatrix @ vector expected, got r  rp   r(   rl   r  r(   r)   rr   
  r  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r0   r    z), vec (r   )r   r(   ri  r(   r)   rr     s   * rp   )r^   rt   rI   r   r   ri  r(   ri  r)   r    s   r  c                 C   s   |   d}|d ur+|d | d }d| |  ||  |  |    |   }nd| |  | |  |  |     }|d urI|| }t||S r  )r   r]   r   r   )rl   r   r   Z
pos_weightr   r   Z
log_weightr   r(   r(   r)    binary_cross_entropy_with_logits  s   

rj  tensor1tensor2c                 C   s   | j |j kr
| |fn|| f\}}|j dkr|j dksdS |jr!dS | j dkr(dS | dkr0dS |j}| }tdd t|d d |d	d
 |d	d
 D S )Nry   r!   FTr   c                 s   s"    | ]\}}}||| kV  qd S r4   r(   )r/   Zst1Zst2s2r(   r(   r)   r0  ;  r\  zshould_fold.<locals>.<genexpr>r   r    rK   )r  r  r   r   r
  r4  rR  )rk  rl  t1t2Zt1_shapeZ	t1_strider(   r(   r)   should_fold+  s    
"rp  c                 C   sj  |   }|  }|dkr|dksJ |dkr |dkr t| |S |dkr.|dkr.t| |S |dkrD|dkrDttt| d|dS |dkrR|dkrRt| |S t| |r||k}|r`|jn| }|sf|n	|dkrn| 	 n| }|j
}t|d d }ttj|}	|  dk}
|
r||j
d  ||	|d }|
r|||}|r|j S |S |||S |dkr|dkr|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]
}||| q|dkr:|dkr:|d |d kr:|d dkr'| jr't| d|S |d dkr:|jr:t| |dS tt||}|||g }t|}| ||||}|dk}|ro||g }||||d}n|||g }|||||}|}|dkr|| |dkr|| |r||d|S |||S tddd	  d S )
Nr   r    r!   rK   r   ry   Fc                   S   r*  )Nz/both arguments to matmul need to be at least 1Dr(   r(   r(   r(   r)   rr     r+  zmatmul.<locals>.<lambda>)rI   r^   dotr  rT  r  rM   rp  r   r  r   r	  r   operatorr   r  r   r  r  r   rL   r  r   Zbroadcast_shapesrk  r  bmmrt   )rk  rl  Zdim_tensor1Zdim_tensor2r  rn  ro  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr   r  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr(   r(   r)   r   A  s   
	










r   r  r  c                    s  j \}}|\}}ddd}	dd }
|	|||}|	|||}tj|jd|dddtj|jdd|dd tj|jddd|df}tj|jdddd|f}|
|||}| }|| |jtjd}|
|||}| }|| }|jtjd}|d ||d |d f}|d ||d |d f fd	d
fddtfdd|D }t	||}t
}|j|d}|S )Nc                 S   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S Nr    r   r(   )rE  rF  r	  rd   r(   r(   r)   compute_scale  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   s   |r| | S | |d  d S rC  r(   )rd   Z	dst_indexr	  r(   r(   r)   rM    s   z8upsample_bicubic2d_default.<locals>.compute_source_indexr  r    r   r!   c                    s8   t | dd }t |dd }t ||gS r  )r^   r   ru   r  )rO  rN  Zy_idxZx_idx)rY  rZ  r  rP  rQ  r(   r)   load_bounded  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    s"   t  fddD }t|S )Nc                 3   s    | ]} |V  qd S r4   r(   )r/   Zx_ofs)rx  rR   r(   r)   r0    s    zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>)rC  r*  )rR   Zcoeffs_x)ixs_ofsrx  t_x)rR   r)   get_x_interp  s   
z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   rc  r4   r(   )r/   Zy_ofs)r{  r(   r)   r0    r1  z-upsample_bicubic2d_default.<locals>.<genexpr>r   r4   )r   r^   r$  r#  r  rI  r6   r%  rC  r*  r?   ru  r  )r  rO  r	  r  r  r  r  rT  rU  rw  rM  Zheight_scaleZwidth_scaleZout_yZout_xZreal_xZin_xrW  Zreal_yZin_yZt_yrX  Ziys_ofsZcoeffs_yr   r   r(   )	rY  rZ  r  r{  rP  rQ  ry  rx  rz  r)   upsample_bicubic2d_default  s6   	


r|  rv  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t	| ||||S )Nr    c                   S   r*  )Nz:Must specify exactly one of output_size and scale_factors.r(   r(   r(   r(   r)   rr     r+  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r4   )r   r   )r/   r  rd   r(   r(   r)   r0    s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r!   r  )
r^   rt   r  r   r   r  rC  rR  r   r|  )r  rO  r	  rv  r  r  r(   r(   r)   upsample_bicubic2d_vec  s   
r}  r{   r~   r  c                C   s(   t j| ||d}t j| ||d}||fS )Nr  )r^   aminr  )rl   rI   r  r~  r  r(   r(   r)   aminmax  s   r  r   c                C   s"   t jtt| d| |||dS )Nr   r   )ru   r   r^   r_   isnan)rl   rI   r  r   r(   r(   r)   nansum  s   "r  r   r%  r#  r&  r%  c             	   C   s   t jjd| d||||dS )Nr   r    r  ru   r$  Z
start_step)r  r   r%  r#  r&  r(   r(   r)   arange_default#     
r  c             	   C   s   t jj| |d||||dS )Nr    r  r  )r  r  r   r%  r#  r&  r(   r(   r)   arange_start2  r  r  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rA   rB   r  r(   r(   r)   out_dtype_decompA  s   r  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r    r!   c                   S   r*  )Nz only p == 1 and p == 2 supportedr(   r(   r(   r(   r)   rr   W  r+  z#multi_margin_loss.<locals>.<lambda>c                         d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: rD  r(   )r   r(   r)   rr   Z      c                         d  dj  S )Nz#inconsistent target size, expected rb  rD  r(   )nframer   r(   r)   rr   ^  r  c                      r  )Nz#inconsistent weight size, expected rb  rD  r(   )rI   r   r(   r)   rr   d  r  rI   r  r  rp   )r^   
atleast_2dZ
atleast_1dr   rt   r  r   rM   r  r   r$  r#  r_   r   r&   rm   r   r'   r   )	r   r   r   r  r   r   ur`   rp  r(   )rI   r   r  r   r   r)   multi_margin_lossH  sB   







r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr    r!   r   c                      r8  r  r(   r(   )orig_input_shaper(   r)   rr     r:  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r(   r(   r  Zorig_target_shaper(   r)   rr     rs   r  rK   Tr  r  rp   r\   )r   rK   )r   r^   r  rt   r  r$  r#  r~  r_   r  anyrM   Tr   r   r&   rm   r   r   r'   r6   r   r   )r   r   r   rI   rp  Zis_endZend_idxZtarget_maskZtidx0r  Ztidx1r  r`   r(   r  r)   multilabel_margin_loss_forwardw  s@   





r  rd   querykey	dropout_p	is_causalreturn_debug_maskc             
      s  j }jd jd jd jd f\}}	}
}tto$|tjufdd t dko> dko> dkfdd t d	k fd
d tjd jd kohjd jd kdd  t|du dd  tj||
|	|gtjd}tjg tj	dtjg tj	d}}d\}}tjg tj	dtjg tj	d}}tjg j j
jd}tjjd  |d |d\}}|ddjtjd}|dd||||||||f	S )Nr   r    r!   ry   c                      r  )Nz'query must be FP32, FP64, BF16 but got r   r(   )r  r(   r)   rr     r  z4scaled_dot_product_flash_attention.<locals>.<lambda>r   c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got r  rp   r(   )r  r  rm   r(   r)   rr     s   " r   c                      r8  )Nz&dropout probability must be zero, got r(   r(   )r  r(   r)   rr     r:  c                   S   r*  )Nz&q, k, v should have the same head sizer(   r(   r(   r(   r)   rr     r+  Fc                   S   r*  )Nz#return_debug_mask is not supported.r(   r(   r(   r(   r)   rr     r+  r   )r   r   )r   r#  r  r  r   )r   r   r^   rt   r  r  rI   emptyr  r  r#  r  ru   Z"_scaled_dot_product_attention_mathr  r  r  r   )r  r  rm   r  r  r  rd   r   Z	batchSizeZnum_headZqSizeZheadSizeZ	logsumexpZ	cum_seq_qZ	cum_seq_kZmax_qZmax_kZphilox_seedZphilox_offsetZdebug_attn_maskr   rN   r(   )r  r  r  rm   r)   "scaled_dot_product_flash_attention  sh   
"&


$
r  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S rh   )r   )rA   rB   r   outplace_opr(   r)   
inplace_op  s   z$register_inplace.<locals>.inplace_opr   )Zaten_opr  r  r(   r  r)   register_inplace  s   r  c                 C   sx   |   s|  st|}t|}t||}t|tjr |dkr$|| }|dkr*|S t|tjr4|dkr8| | } | | S rv  )r  r  r  r^   rs  r-   numbersNumber)rl   Zbatch1Zbatch2rY   rc   r   r(   r(   r)   baddbmm%  s   r  c                 C   s   t j| |ddS )NrI  r=  r>  )rl   r   r(   r(   r)   floor_divide6  s   r  r   r   c                C   s.   |d u rt jj| g |dS t jj| g ||dS )Nr   r  )ru   r   Zdim_IntListZIntList_out)rl   r   r   r(   r(   r)   sum_default<  s   r  c                 C   s2   |d u rt j| tt|  S t j| |gS r4   )ru   rT  dimsr	  rL   rI   )rl   rI   r(   r(   r)   squeeze_defaultI  s   r  c                    s@   t  fddtt| jD }| jd|dd}| ||  |fS )Nc                 3   s    | ]	}| kr|V  qd S r4   r(   r  rp   r(   r)   r0  T  r;  z)_weight_norm_interface.<locals>.<genexpr>r!   Tr  )rC  rL   r  r   r   )r0   rR   rI   Zkeep_dimr   r(   rp   r)   _weight_norm_interfaceQ  s    r  )F)r   )FNrg  )r   NNr    r4   r   )rK   FFr  r2  )r    r    F)r   r\   N)r   r    Nr  )NNN)r   r   FT)r   r   Fr  )r   FF([  rF   r  rr  r  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   r^   Ztorch._primsr*  rq  Ztorch._prims_commonZ_prims_commonr?   Ztorch.nn.functionalrN  r  r   r   r   r   Ztorch._decompr   r  r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Ztorch.utilsr   r>   Ztorch.utils._pytreer   r  ZDispatchKeyr   str__annotations__Z_opsr  ru   r   r  r  rH   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZINT_TO_FLOATZpw_cast_for_int_to_realr  rO   rV   rX   ra   r  ri   fillZScalarrn   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  Zpy_implZAutogradCUDA	Generatorr   r   r   r   r   r   r   r&   rm   r   r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  slicer  r  r  r  r   r!  r&  r^  rq  rv  r{  r  r  ZCompositeImplicitAutogradZAutogradr  r  r  Zrsubr  r  r  r  rk  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  Zunsafe_chunkr  r  r  Zno_statsr  r  Z_fused_dropoutr  r  r$  r+  r#  r   detachZliftZ
lift_freshr-  r0  r2  r3  r4  r7  Z_adaptive_avg_pool2drQ  rX  rY  rV  rl  rm  rj  rn  r   rr  rw  ry  r{  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r  r  r  r  r  r  Zlstmr   r  r  r  Zgrur  r  r  r
  r  r  r  r  r  Z_unsafe_viewr  r  r  r  r   r#  r%  r&  r*  r)  r-  r7  r8  r<  r=  r>  rg  rh  r  rj  rp  r   Zupsample_bicubic2dr|  r}  r  r  r$  r)  r%  r  r  r  r  r  r  Z#_scaled_dot_product_flash_attentionr  r  r  r  r   r  rT  rI   r  r  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_Zfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__rd  re  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_r   Zlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_rd  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r(   r(   r(   r)   <module>   s  
 $

$ 
 

 
	




  *!	
2"
	P`
 
 
 
(


(((

	

W	

	
N
	
R		#

	

	%
	

d	
%	$f("$$






	
 
	



.2
)


  ?
2
	
	


	

M"
Y 

5


 
(
.$$


* 
'


$w
=



,

<
	e	
"