o
    I&i                    @   sH,  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlZd dlZd dlm  mZ d dlmZmZ d dlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z( d dl)m*Z*m+Z+m,Z, d	d
l-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3 ddl4m5Z5m6Z6 ddl2m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZA ddlmBZBmCZCmDZDmEZEmFZFmGZG ddlHmIZImJZJ eKeLZMi ZNi ZOeP ZQejIjRZRejIjSZSejIjTZTeP ZUeP ZVeP ZWeX ZYdd ZZdd Z[dd Z\e[eRj]eRj^eRj_eRj`eRjaeRjbeRjceRjdeRjeeRjfeRjgeRjheRjig ejjejkejlejmejnejoejpejqejrejsejrejtejudZvdewfddZxdd Zydd Zzde!fd d!Z{d"d# Z|d$d% Z}d&d' Z~d(d) Zd*e!jd*fd+d,Zd-d. Zdd/d0Z					*dd1d2Zdd3d4Zdd5e?dejfd6d7ZeeTjdd8d5e?dejfd9d:Zd*d;d5e?dejfd<d=ZeeRjjdd8d5e?dejfd>d?Zd*d;d5e?d@ejfdAdBZeeTjdd8d5e?d@ejfdCdDZddEe!jd*ddd*d*fdFdGZ	*ddHdIZeeRjd*ddJdKdL ZeeRjd*ddJdMdN ZeeRjeRjeRjeRjeTjgdOdP ZeeRdQr eeRje eeRjdd8ddRdSZeeRjdd8ddTdUZeeRjgddVdWZeeRjdXdY ZeeRjdZd[ ZeeRjd\d] ZeeRjd^d_ ZeeRjd`da ZeeRjdbdc ZeeRjdd8ddde ZeeTjdd8dfdg ZeeRjdd8dhdi ZeeRjdjdk ZeeRjdd8eeRjdd8eeRjdd8dldm ZeeRjdd8dndo ZeeRjdd8ddqdrZeeRjdd8e fdsdtZeeRj]dd8ddudvZ]eeRjdd8ddwdxZeeRjdd8ddydzZdd{d|ZeeRjdd}d~ZeeRjdd8ddewdewdewfddZeeRjdd8ddewdewdewfddZeeRjdd8ddewdewdewfddZeeRjdd8dd ZeeRjdd8dddZeeRjdd8dddZeeRjdd8dddZeeRjdd8dd ZeeRjdd8dd ZeeRjdd8dd ZdddZeeRjdddZdd Ze  dddZe ddd ZĐddejjfddZǐddejjfddZȐddejjfddZːdddZdd ZeejIjjdd8dd ZeeRjdd8dd ZeeRjdd8dd ZeeRjjdd8dd ZeeRjՃdd Ze ddd Zdd ZeeRjjكZeeRjjۃZeeRjjكZeeRjjۃZeeRj eeRj؃dd ZeeRj݃dd Zee1jdd8dd Zee1jdd8d@ejfddZee1jdd8ddĄ Zee1jdd8ddƄ Zee1jdd8d dǜdeew de?dedewfdd̄Zee1jdd8d dǜdewdewdeew de?dewf
ddЄZeeRjdd8d*d*dќde?de?detdetfddׄZddل Zddۄ Zdd݄ Zdd߄ ZdhZeeRje eeRjbe eeRje eeRje eeRjed*d eeRjed*d eeRje eeRje eeRj eeRj eeRje eeRj dd ZeeRjjed*d eeRj jed*d eeRjjed*d eeRjjed*d eeRjje eeRjje eeRjje eeRjje eeRj eeRjj eeRj	 eeRj
e eeRj eeRje eeRjje eeRje eeRj eeRj eeRj eeRj eeRj eeRj eeRj eeRj eeRjd*d eeRjd*d eeRj eeRj eeRj eeRj eeRj eeRjd*d eeRjd*d eeRj eeRj  eeRj eeRj  eeRj! eeRj" eeRj# eeRj$ eeRj% eeRj& eeRj'd*d eeRj(d*d eeRj) eeRj* eeRj+ eeRj, eeRj- eeRj. eeRj/ eeRj0 eeRj1 eeRj2 eeRj3 eeRj4 eeRj5 eeRj6 eeRj7 eeRj8 eeRj9 eeRj: eeRj; eeRj< eeRj= eeRj> eeRj? eeRj@ eeRjA eeRjB eeRjC eeRjD eeRjE eeRjF eeRjG eeRjH eeRjI eeRjJ eeRjK eeRjL eeRjM eeRjN eeRjO eeRjP eeRjQd*d eeRjRd*d eeRjSd*d eeRjT eeRjU eeRjV eeRjWd*d eeRjX eeRjY eeRjZd*d eeRj[d*d eeRj\d*d eeRj] eeRj^ eeRj_ eeRj` eeRja eeRjbd*d eeRjc eeRjd eeRjed*d eeRjfd*d eeRjg eeRjh eeRjid*d eeRjj eeRjk eeRjl eeRjm eeRjn eeRjo eeRjp eeRjq eeRjr eeRjs eeRjt eeRju eeRjv eeRjw eeRjxd*d eeRjyjz eeRj{jك eeRj|jك eeRj} eeRj~ eeRj eeRj eeRjj eeRjjك eeRjjك eeRj eeRj eeRj eeRj eeRj eeRj eeRjjd*d eeRj eeRjj eeRjjك eejjj eejjj eeRjjd*d eeRjdd8dddZeeRjddddZeeRd	rLeeRje eeTjdd ZeeRjdd8dewdewfddZeeRjdd8dddZdd ZeejeRjgdddd*dddZeejdddZeejdd ZeeRjdd Zdd ZeeRjdd8dd  Zdd ZeejeRjgdddddddddZdd Zdd	 ZeeRjeeZeedZeed Zd
d ZeeRjdddddddZeeRjdddddddZeeRjdddddddZeeTjjكdd ZeejeRjgdd ZeeRjdd8dddZeeRjdd8d ddZdd Zdd Zdd  ZeeRjdd8d!d ZeeRjdd8d"d# ZeeRjdd$d%ZeeRjdd&d'Zd(d) Zd*d+ ZeeRjdd8dd,d-Zee1jdd8dd.d/Zd0d1 Zd2d3 Zee1jdd*d4d5d6 ZeeRjdd8dd7d8ZeeRjdd8dewfd9d:ZddEd;dewd<ee d=etfd>d?ZeeRjdd8dd@dewd<ee fdAdBZeeRjdd8dewfdCdDZeeRjdd8dewfdEdFZeeRjdd8dewfdGdHZeeRjdd8dEdIdewd=etfdJdKZ			*d!dLeee dMf dNewdOetfdPdQZeeRjjكddRee fdSdTZeeRjjكddRee fdUdVZeeRjfjك	ddWee dXee fdYdZZfeeRjgjك	ddWee dXee fd[d\ZgeeRjjك			d"d]ee dWee dXee fd^d_ZeeRjjك			d"d]ee dWee dXee fd`daZΐdbdc ZeeRjhjك		dddetdWee dXee fdedfZeeRjуdgdh ZeeRj҃didj ZeeTjjكdkdl ZeeRjdd8ddmdnZԐdoejdeejewf fdpdqZ֐doejdejfdrdsZאdoejdejdejfdtduZؐd#dwdxZِdydz ZeeRjcjd*d{ZeeRjcdd8	*d$d|d}ZceeRjdjd*d{ZeeRjddd8d~d Zddd Zݐdd ZeeRjjd*d{ZeeRj߃dd ZeeRjjك	d%ddZeeRj^jd*d{ZeeRj^dd8		 	*	E	d&ddZ^eeRj_jd*d{ZeeRj_dd8	dddZ_dd Zdd ZddefddZeeRjd'ddddZdd Zdd Zdd ZeeRjeTjgddd*dddZeeRjddd*dddZdd Zedd ZeeRjjd*d{ZeeRjjd*d{ZeeRjjd*d{ZeeRjdEddd ZdddZeeRjdd ZeeRjdd8dddZedd Zedd ZeeRjdEddddZeeRjgdEddd ZeeTjgdEddd ZeeRj eRjjgdEe!jdJeZeeRjeTjgdEddd ZeeRjdd ZeeRjeTjgd'ddddZeeRjd'ddddZeeRjd'ddZ	eeRj
dd8d'dÐdĄZeeRjdd8d'dŐdƄZeeTjedǃ eeRjedȃZeeRjedɃZeeRjedejndˍZeeRjedejndˍZeeRjdEd͐d΍ZdϐdЄ Zdѐd҄ ZeeRjZeeRjZeeRjZeeRjZeeRjZeeRjZeeRj Z eeRj!dEdӍZ!eeRj" eeRj# eeRj$Z$eeRj%Z%eeRj&Z&eeRj'dԐdՍZ'eeRj(Z(eeRj)Z)eeRj*Z*eeRj+ eeRj,Z,eeRj-e!jd8e, eeRj. eeRj/ eeRj0 eeRjM eeRj1ddEejtd֍Z1eeRj2ddEejtd֍Z2eeRj3ddEejtd֍Z3eeRj4ddEejtd֍Z4eeRj5Z5eeRj6Z6eeRj7e5 eeRj8e6 eeRj9Z9eeRj:Z:eeRj; eeRj<dאdՍZ<eeRj eeRj=ejtdˍ eeRj>e9 eeRj?ejtdˍ eeRj@ejtdˍ eeRjAejtdˍ eeRjBejtdˍZBeeRjCejtdˍ eeRjDejtdˍ eeRjE eeRjF eeRjG eeRjH eeRjI eeRjJ eeRjK eeRjL eeRjM eeRjN eeRjO eeRjP eeRjQ eeRjR eeRjS eeRjTjedEdӍZUeeRjTjedEdӍZVeeRjTjedEdӍ eeRjWjeZXeeRjWjeZYeeRjZje! eeRjZje! eeRj[jِe9 eeRj\jِe$ eeRj]je eeRj]j^e eeRj_jeZ`eeRj_jeZaeeRjbe eeRjcje5 eeRjcje5 eeRjdje6 eeRjdje6 eeRjeje5 eeRjeje5 eeRjfje6 eeRjfje6 eeRjge: eeRjhe< eeRjie dؐdل ZjejeRjkjeRjTjeU ejeRjkjeRjTjeV ejeRjljeRjWjeX ejeRjljeRjWjeY ejeRjmjeRj_je` ejeRjmjeRj_jea dڐdۄ ZneneRjoe eneRjpe% eneRjqe& eneRjre' eneRjse( eneRjte) eneRjue* eneRjve eneRjwje eneRjwjxe eneRjye1 eneRjze2 eneRj{e3 eneRj|e4 eneRj}e! eneRj~e eneRje eeRje% eeRje& eeRje( eeRje) eeRje* eneRjeRj eneRjeRj eneRjeRj eneRjeRj eneRjeRj eeRjdܐd݄ ZeeRjjwdސd߄ ZeeRjjwdd ZeeRjdd Ze' D ]\ZZee(ee qeeRjՃdd ZeejIjjdd ZeejIjjdd Zeedd Zeedd Zzd dlZejIjZeejdd Zeejdd Zeejdd Zeejdd Zeejdd Zeejdd Zeejdd Zeejdd Zeejdd ZejIjZeejd d Zeejdd Zeejdd Zeejdd Zeejdd	 Zeejd
d Zeejdd Zeejdd Zeejdd ZW n ey   eMd Y nw ddl/mZ e.e ddl/mZ e  dS ((      N)defaultdict)Iterable)AnyCallableDictListOptionalTupleUnion) triton_kernel_wrapper_functionaltriton_kernel_wrapper_mutation)
canonicalize_dimcanonicalize_dimscheckdtype_to_typeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDis_boolean_dtypeis_float_dtypeis_integer_dtypeNumber)magic_methodsmethod_to_operator)CeilDivFloorDivModularIndexing   )import_submodule   )configinductor_primsirtest_operators)decompositionsget_decompositions)
ExpandViewIndexingConstant	is_tritonops_wrapperPermuteView	Pointwise	ReductionSqueezeView	TensorBoxvalidate_irView)ceildivdecode_device
is_dynamicis_pointwise_usepad_listlikesympy_product)opsVc                 C   s   | s	t d| d S )Nzinductor does not support NotImplementedError)condmsg r<   CC:\wamp64\www\opt\env\Lib\site-packages\torch/_inductor/lowering.py
assert_nyiI   s   r>   c                 C   s\   t | tttfrdd | D S t|  t | tjjr*| 	 D ]}tt
| | qd S d S )Nc                 S      g | ]}t |qS r<   )add_needs_realized_inputs.0xr<   r<   r=   
<listcomp>P       z-add_needs_realized_inputs.<locals>.<listcomp>)
isinstancelisttuplesetneeds_realized_inputsaddtorch_opsOpOverloadPacket	overloadsgetattr)fnoverloadr<   r<   r=   r@   N   s   
r@   c                 C   s:   t | tjjr|  D ]	}|tt| |< qd S |t| < d S N)rF   rL   rM   rN   rO   layout_constraintsrP   )rQ   
constraintrR   r<   r<   r=   add_layout_constraintW   s
   rV   )r   r   r                     	   
         dtypec                 C   s2   t | ts| S | tv sJ d|  dt|  } | S )Nzid z missing from DTYPE_ID_LOOKUP)rF   intDTYPE_ID_LOOKUPra   r<   r<   r=   decode_dtype   s
   
re   c                 C   sB   t | trt|  pt|  S t | tjr| jdu S t | tS NT)	rF   r-   r   	get_dtyper   sympySymbol
is_integerrb   rC   r<   r<   r=   is_integer_type   s
   


rl   c                 C   s    t | trt|  S t | tS rS   )rF   r-   r   rg   boolrk   r<   r<   r=   is_boolean_type   s   

rn   type_promotion_kindc                    s0   dd   fdd|D }t |d| i\}}|S )Nc                 S   sF   t | ttjfr
| S t| dsJ t|  }tjdg| | 	 dS )Nrg   r   rd   )
rF   r   rh   ri   hasattrlenget_sizerL   zerosrg   )inpdimr<   r<   r=   construct_input   s
   z+get_promoted_dtype.<locals>.construct_inputc                       g | ]} |qS r<   r<   )rB   argrv   r<   r=   rD      rE   z&get_promoted_dtype.<locals>.<listcomp>ro   )r   )ro   argsZinps_ra   r<   ry   r=   get_promoted_dtype   s   	r|   c                 C   sh   t | ttfs| g} nt| } t| D ]}t |tjjr1| D ]}t||}|tvr0| 	| q q| S rS   )
rF   rG   rH   rL   rM   rN   rO   rP   	loweringsappend)aten_fnrQ   rR   Zother_fnr<   r<   r=   get_overloads   s   

r   c                    s   dd t  D |s|r4r4|rtjndd  D }t|d|i fddfdd D  |rrrrtt fddD  D ]\}}| |< qFtt D ]}t | t	j
rqt | t d	    |< qU S )
Nc                 S      g | ]\}}t |tr|qS r<   rF   r-   rB   irC   r<   r<   r=   rD          z"transform_args.<locals>.<listcomp>c                 S   s$   g | ]}t |tst|d r|qS rg   )rF   r   rp   rB   ar<   r<   r=   rD      s    
ro   c                    s@   t | tr
t| S t | tjrt| j d   S | S Nr   )rF   r-   to_dtyper!   Constantvalue
get_device)rx   )rz   ra   indicesr<   r=   promote   s
   

ztransform_args.<locals>.promotec                    rw   r<   r<   r   )r   r<   r=   rD      rE   c                       g | ]} | qS r<   r<   rB   r   rz   r<   r=   rD      rE   r   )	enumeraterL   rm   r|   zipbroadcast_tensorsrangerq   rF   r!   r   r%   createrG   rr   )rz   	broadcastro   convert_input_to_boolZpromoting_argsr   rC   r<   )rz   ra   r   r   r=   transform_args   s,   $
$r   c                    sD   t   fddt| }t| tfdd|D  S )a  
    Add a foreach lowering to lowerings dict.

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s*   t | dksJ  | i |}t| |S )Nr   )rq   r.   )rz   kwargsout)	decomp_fnr<   r=   wrapped   s   z+_register_foreach_lowering.<locals>.wrappedc                       i | ]}| qS r<   r<   rB   rQ   r   r<   r=   
<dictcomp>       z._register_foreach_lowering.<locals>.<dictcomp>)	functoolswrapsr   foreach_opsupdater}   )r   r   Zaten_fnsr<   )r   r   r=   _register_foreach_lowering   s   
r   c                    sB   t  fddt  tfdd D  S )a  
    Add a lowering to lowerings dict

    Arguments:
        aten_fn: torch.ops.aten.* fn we are lowering
        decomp_fn: alternate implementation on our IR
        broadcast: True to apply broadcasting to tensor inputs
        type_promotion_kind: kind of type promotion applied to tensor inputs, `None` means no type promotion
        convert_input_to_bool: some logical ops require inputs are converted to bool
    c                     s   t | } d}t| dkrt| d t tfrd}| d } tdd | D r*J dtdd | D r@td	d  D s@J t| } |rL| g} | i |}t	| |S )
NFr   r   Tc                 s       | ]}|d kV  qdS )r   Nr<   rA   r<   r<   r=   	<genexpr>      
z6_register_lowering.<locals>.wrapped.<locals>.<genexpr>zout= ops aren't yet supportedc                 s   s    | ]}t |tV  qd S rS   r   rA   r<   r<   r=   r         c                 s   s    | ]}|t v V  qd S rS   )	fallbacksr   r<   r<   r=   r     r   )
rG   rq   rF   rH   anykeysvaluesallr   r.   )rz   r   unpackedr   )r   r   r   r   ro   r<   r=   r   
  s*   
z#_register_lowering.<locals>.wrappedc                    r   r<   r<   r   r   r<   r=   r   *  r   z&_register_lowering.<locals>.<dictcomp>)r   r   r   r}   r   )r   r   r   ro   r   r<   )r   r   r   r   ro   r   r=   _register_lowering   s
   r   Fc                 C   s   t jt| |||dS )z+
    Shim to support decorator syntax.
    r   ro   r   )r   partialr   )r   r   ro   r   r<   r<   r=   register_lowering.  s   	r   c                 C   s   g }t jt| t|tddD ];\}}|dkr|| q|dkr)|| qtjj	|| t
t|jt
t|jk rG|| q|| qtt|S )z
    Broadcasting logic based on symbolic shapes.

    We give the shapes 0 and 1 concrete values, while all other shapes
    are symbolic sympy formulas.
    r   )	fillvalue)	itertoolszip_longestreversedrh   Integerr~   r7   graphsizevarsguard_equalsrq   expandZfree_symbolsrH   )r   boutputrC   yr<   r<   r=   broadcast_symbolic_shapes@  s    r   c                    s   t dd | D s| S tdd | D r-|pt| dtjifdd  fdd| D S td	d | D }g }| D ]A}t|ttfrZ|	t
t|| | t|  q:t|tjrv|	t
t|| | t|  q:|	| q:|S )
Nc                 s   s"    | ]}t |tjttfV  qd S rS   )rF   rh   Exprrb   floatrA   r<   r<   r=   r   Y       z$promote_constants.<locals>.<genexpr>c                 s   s"    | ]}t |tttjfV  qd S rS   )rF   rb   r   rh   ri   rA   r<   r<   r=   r   [  r   ro   c                    s0   t | tjrt|  td S t|  td S rS   )rF   rh   ri   r!   r&   r1   r   rk   rd   r<   r=   
const_func`  s   z%promote_constants.<locals>.const_funcc                    rw   r<   r<   rA   )r   r<   r=   rD   f  rE   z%promote_constants.<locals>.<listcomp>c                 s   s"    | ]}t |ttfr|V  qd S rS   )rF   r-   r%   rA   r<   r<   r=   r   g  r   )r   r   r|   r   DEFAULTnextrF   rb   r   r~   r%   r   r!   r   rg   r   rG   rr   rh   r   r&   )inputsoverride_return_dtypeexr   rC   r<   )r   ra   r=   promote_constantsX  s8   
r   c                    s*   d ddt t f fdd}|S )Nalphar   c              	      sD  t |	}r| d ur| dkrt|}t|d | |d< n| d u s#J dd |D |d  	p7|d   t|d  jdk|dd  D ]!}t|t	j
sjtt| ksjJ d d d|  qI fd	d
}sd }|D ]}| jdkr| } nq}|s|d  }p|}tj| |dS )Nr   c                 S      g | ]}|  qS r<   make_loaderrA   r<   r<   r=   rD     rE   z1make_pointwise.<locals>.inner.<locals>.<listcomp>r   cudazndim mismatch  c                    s   t  t ksJ d  d tjkr&d ur& fddD  S r:r:tjkr: fddD  S  fddD  S )Nzwrong ndim r   c                       g | ]}| qS r<   r<   rB   loadindexr<   r=   rD     rE   zCmake_pointwise.<locals>.inner.<locals>.inner_fn.<locals>.<listcomp>c                    r   r<   r<   r   r   r<   r=   rD     rE   c                    r   r<   r<   r   r   r<   r=   rD     rE   )rq   rL   rm   float64r   )ra   rQ   is_cudaloadersoverride_fn_when_cuda_float64override_fn_when_input_boolrangesr   r=   inner_fn  s   $z/make_pointwise.<locals>.inner.<locals>.inner_fndevicera   r   r   )r   rG   mulrr   rg   r1   r   typerF   r!   BaseConstantrq   r*   r   )r   r   otherr   r   r   allow_alpharQ   override_devicer   r   r   )ra   r   r   r   r=   inner  sF   
	zmake_pointwise.<locals>.innerr   r-   )rQ   r   r   r   r   r   r   r<   r   r=   make_pointwise}  s   &-r   c                    s&   dddt t t  f fdd}|S )Nr   r   r   c                    s|  dd }t tjjjdkptjjjtv }tjjjD ]}|jD ]}|jdkr*|jtv s,d}qqd }|D ]}t	|t
tfr?|} nq2|d usHJ dg }|D ]}t	|t
tfs`||gt |  qL|| qL|t| }	d gt | }
|	 D ]9\\}}}g }|D ]&\}} r|d| i}n| }||
|< |jdkr|r|r||  q|rtj| qwtd	d
 |
D sJ |
S )Nc                 S   st   t t}t| D ]/\}}t|  }d }|D ]}t|tr#|j } nq|d us,J d|||f ||f q|S )Nz.foreach op should have at least one tensor arg)	r   rG   r   r2   rF   r-   datar   r~   )Z	arg_pairsr   r   rz   use_foreachr   tr<   r<   r=   
group_args  s   



z9make_foreach_pointwise.<locals>.inner.<locals>.group_argsr   call_functionTz1at least one input must be a list to a foreach opr   r   c                 s   s    | ]}|d uV  qd S rS   r<   rA   r<   r<   r=   r         z8make_foreach_pointwise.<locals>.inner.<locals>.<genexpr>)rq   r7   r   current_nodeuserstargetinplace_foreach_opsopr   rF   rG   rH   r~   r   itemsr   realizeZregister_listr   )r   r   r   Zrealize_outputsnodeuserZa_list_inputinputZbroadcast_inputsgroupsoutputsr   r   groupZbuffer_listZ
output_indrz   r   r   pw_fnr<   r=   r     sX   

z%make_foreach_pointwise.<locals>.innerr   )r  r   r   r<   r  r=   make_foreach_pointwise  s   "Er  rC   c                    s>   |    kr|rt| S | S  fdd}t| d| S )Nc                    s   t j|  dS )N)	src_dtype)r6   r   rk   ra   r  r<   r=   	_to_dtype     zto_dtype.<locals>._to_dtyper   )rg   cloner   )rC   ra   copyr  r<   r  r=   r     s
   r   ro   c                 C      t | |ddS NTr
  r   rC   ra   r<   r<   r=   _convert_element_type	     r  r  c                   sv   |    kr|rt| S | S dd }||   }| }||kr-td|    d  d fdd}t| d| S )	Nc                 S   s   | j r	t| jS t| jS rS   )Zis_floating_pointrL   Zfinfobitsiinford   r<   r<   r=   _get_primitive_bitwidth  s   z1to_dtype_bitcast.<locals>._get_primitive_bitwidthzbitcast z to different bitwidth type z is not supported yet.c                    s   t |  S rS   )r6   to_dtype_bitcastrk   rd   r<   r=   _to_dtype_bitcast     z+to_dtype_bitcast.<locals>._to_dtype_bitcastr  )rg   r	  r9   r   )rC   ra   r
  r  Zsrc_bitsZdst_bitsr  r<   rd   r=   r    s   r  c                 C   s<   |j s|  j rttjtjjj	j
| |S t| |ddS r  )
is_complexrg   r-   r   r!   ZComplexViewrL   r6   atenviewra   r  r  r<   r<   r=   _view_dtype%  s
   r  r   c                C   s8   t |}|  |kr|rt| S | S ttj| |S rS   )r1   r   r	  r-   r   r!   Z
DeviceCopy)rC   r   r
  r<   r<   r=   	to_device.  s   r  c                 C   r  r  )r  )rC   r   r<   r<   r=   _device_put5  r  r  Tc	                 C   s   |p| j }t|}	|rtd| }
|durt|}t|	|||r!|
nd|d}	t| |||d|	}	tt|rAttt|d|d|	 |	S )z3A pointwise function that maps ops.{name} to inputsZ
libdevice_N)r   r   r   r   r   )ro   r   )__name__r(   r   r   rp   primsrP   )r   namer   ro   r   r   r   r   use_libdevice_for_f64rQ   Zfn_libdevicer<   r<   r=   register_pointwise:  s<   


r#  c                 C   s   t ||d}t| |}|S )Nr   )r  r   )r   Zpointwise_lowering_fnr   rQ   r<   r<   r=   register_foreach_pointwised  s   
r%  )r   ro   c                    s  dd }t |ttfrt||}t |ttfrt||}| ||g t d  d tjd}dd t D }t|t	 fdd|D  D ]\}}| |< qFt
t D ]}t  | tjrqt | t |d	    |< qUt||d
 d	 t d |t d |S )Nc                  W   
   t j|  S rS   )r6   wherer   r<   r<   r=   rQ   p     
zwhere.<locals>.fnr   r   r  c                 S   r   r<   r   r   r<   r<   r=   rD   |  r   zwhere.<locals>.<listcomp>c                    r   r<   r<   r   r   r<   r=   rD   }  rE   r   r  )rF   r   rb   constant_liker|   r   r   r   r   r   r   rq   r!   r   r%   r   rG   rr   r   r   )r:   r   r   rQ   ra   r   r   rC   r<   r   r=   r'  n  s&   
$
$
r'  c                  G   s   t | dkrt| d ttfrt| d  S ttdd | D g }g }| D ]$}| }t |t |ks?t	dd t
||D rDt||}|| q%|S )Nr   r   c                 S   r   r<   )rr   rA   r<   r<   r=   rD     rE   z%broadcast_tensors.<locals>.<listcomp>c                 s   s4    | ]\}}|d kr|d kp|d ko|d kV  qdS r   Nr<   rB   r   r   r<   r<   r=   r     s    $
z$broadcast_tensors.<locals>.<genexpr>)rq   rF   rG   rH   r   r   reducer   rr   r   r   r   r~   )r   r   r   rC   sizesr<   r<   r=   r     s   
r   c                 C   s   | S rS   r<   rk   r<   r<   r=   nop  s   r.  
lift_freshc                 C   s   t | tsJ |d u rtt| jS tt|  |}tt |t	s%|fn|}g }t
|  D ]\}}||v rCtjjt|dsH|| q0||  krTt| |S | S Nr   )rF   r-   r,   r   r   r   rq   rr   rI   rH   r   r7   r   r   evaluate_exprrh   Eqr~   r  )rC   ru   dims	new_shapedsr<   r<   r=   squeeze  s   
r7  c                 C   s   t t| |S rS   )r	  r7  )rC   ru   r<   r<   r=   squeeze_copy  r  r8  c                 C   2   t | |}t| tsJ t|tsJ |j| _| S rS   )r7  rF   r-   r   rC   ru   valr<   r<   r=   squeeze_  
   
r<  c                 C   2   t | rt| dtjdS td}t|tjd| S )NFrd   isinfr  rl   	full_likerL   rm   r(   r   rC   rQ   r<   r<   r=   r?       r?  c                 C   r>  )NFrd   isnanr  r@  rB  r<   r<   r=   rD    rC  rD  c                 C   $   t | rt| S td}t|| S )Nceilrl   r	  r(   r   rB  r<   r<   r=   rF       rF  c                 C   rE  )NfloorrG  rB  r<   r<   r=   rI    rH  rI  c                 C   rE  )NroundrG  rB  r<   r<   r=   rJ    rH  rJ  c                 C   rE  )NtruncrG  rB  r<   r<   r=   rK    rH  rK  c                 C   s   t | g\} t| tjrt| t|S t| tsJ t|ttfs$J t| 	 t|kr0| S t
dd | 	 D satjjt| 	 }|dkrat
dd |D sa| tjjt||  tt| jt|S )Nc                 s        | ]}t jjj|V  qd S rS   r7   r   r   	shape_envZis_unbacked_symintrB   r6  r<   r<   r=   r         zexpand.<locals>.<genexpr>r   c                 s   rL  rS   rM  rO  r<   r<   r=   r     s    
)r   rF   r!   r   r%   r   rH   r-   rG   rr   r   r7   r   r   	size_hintr5   
mark_reuser   )rC   r-  Zx_size_productr<   r<   r=   r     s    r   c                 C   sL   t |}|D ]}d||< q| }t|D ]\}}|dkr t||}qt||S )Nr   )rG   r   	unsqueezer   )r   shapeZbroadcast_dimensionsr6  Zbroadcast_dimensionvidxrC   r<   r<   r=   broadcast_in_dim  s   


rW  c                 C   s   t | | S rS   )r   rr   rC   r   r<   r<   r=   	expand_as  r  rY  c                    sP  t |   tt kr%tdgtt      t| t  } tt|  ks1J t |  }d}ttD ]}| dkrId}|| |  ||< q?|rat||  | 	 dS t
dd t D rrt| |S  fdd	}tjjt }|dkr| tjjt||  |  tj| 	 |  |t |d
S )Nr   Fr   Tra   r   c                 s   s$    | ]\}}|d kp|d kV  qdS r*  r<   r+  r<   r<   r=   r   1     " zrepeat.<locals>.<genexpr>c                    sv   t | t ks
J t| } tt D ]"}| dkr6 | dkr*td| |< qt| | d | | |< q| S Nr   r   )rq   rG   r   rh   r   r   )r   r   Zold_sizerepeatsx_loaderr<   r=   r   6  s   zrepeat.<locals>.inner_fnr   )rG   rr   rq   rh   r   r  r   emptyrg   r   r   r   r   r7   r   r   rQ  r5   rR  r   r*   r   )rC   r^  new_sizeZzero_tensorr   r   Zold_size_productr<   r]  r=   repeat  s8    
rb  c                 C   s2   t | tsJ t |ttfsJ tt| j|S rS   )rF   r-   rG   rH   r/   r   r   )rC   r-  r<   r<   r=   r  Q  s   r  c                 C   s6   t | tsJ t |ttfsJ tt| jt|S rS   )rF   r-   rG   rH   r)   r   r   )rC   r3  r<   r<   r=   permuteZ  s   rc              c                 C   s|   t | tsJ t| |d}|  | }tjjt	|| dr"d}tjjt	|| dr1d}tt
j| j||||S r   )rF   r-   _validate_dimrr   r7   r   r   r1  rh   Ltr!   	SliceViewr   r   )rC   ru   startendstepdim_sizer<   r<   r=   slice_a  s   rl  c                    st  t |ts|f}t |ts|f} fdd|D }t  dkr%t S t|}t|}|dks5|dkr|dkr=td|dkr^|dkr^t t  g}t||d}t|t	  S ||krltd| d| |dd }|dd }t |d |d }	t|	||S |\t
jj   |d     fd	d
}
tj    |
  dS )z
    This is based on torch._refs.roll(), but uses ModularIndexing().

    We can't use the ref here because it is based on multiple calls to
    torch.cat() that this will result in terrible code.
    c                    s   g | ]}t  |qS r<   )re  rB   r5  r   r<   r=   rD   z      zroll.<locals>.<listcomp>r   r   z`shifts` requiredz*shifts and dimensions must align. shifts: z, dims: Nc                    s4   t | } t|   tdt| <  | S r0  )rG   r   rh   r   r   r   )a_loaderru   sizerh  r<   r=   rQ     s
   zroll.<locals>.fnr   )rF   r   r5   rr   r	  rq   RuntimeErrorr  rollrG   r7   r   r   evaluate_static_shaper   r*   r   r   rg   )r   Zshiftsr3  Z
len_shiftsZlen_dimsZflatZrolledZtail_shiftsZ	tail_dimsZfirst_dim_rolledrQ   r<   )r   rp  ru   rq  rh  r=   rs  m  sF   
	
rs  c              	   C   s   t | trt | jtjr| j } |   t| s"td|  dt	| \}}t
|j|jdd |D dd |D t|p@d}tt||S )Nzunrealized as_strided(z, ...)c                 S      g | ]}t |qS r<   rh   r   rO  r<   r<   r=   rD     ro  zas_strided.<locals>.<listcomp>c                 S   ru  r<   rv  rO  r<   r<   r=   rD     ro  r   )rF   r-   r   r!   BaseViewunwrap_viewr   is_storage_and_layoutr9   as_storage_and_layoutFixedLayoutr   ra   rh   r   ZReinterpretView)rC   rq  stridestorage_offsetstorageZ
old_layoutZ
new_layoutr<   r<   r=   
as_strided  s   

r  c                 C   s$   t | tsJ t| |||j| _| S rS   )rF   r-   r  r   )rC   rq  r|  r}  r<   r<   r=   as_strided_  s   r  c                 C   s   t | |||}t|S rS   )r  r	  )rC   rq  r|  r}  resultr<   r<   r=   as_strided_copy  s   r  c                    s   g d}D ]} |||    f d d }qdd D  fdd}td  }d d | < tjd  d  ||dS )Nr   r   c                 S   r   r<   r   rB   rt   r<   r<   r=   rD     rE   z!pointwise_cat.<locals>.<listcomp>c              	      s  t |  tj}g g ttD ]ldkr t dtjn
t  d tj}t  d tj}t ||}t ||}dkrI|}ntd krT|}nt 	||}
| t|    d 8  < 
t |fddd q fdd  dS )Nr   r   c                      s     S rS   r<   r<   )r   idx_loadinputs_loadersr<   r=   <lambda>  s    z1pointwise_cat.<locals>.inner_fn.<locals>.<lambda>        c                    s6   | t d krt|  |   | d S d S )Nr   r   )rq   r6   r'  )r   )get_masked_valr   masked_loadsmasksr<   r=   r    s   
z7pointwise_cat.<locals>.inner_fn.<locals>.get_masked_val)r6   
index_exprrL   int64r   rq   constantgeltand_r~   rG   masked)rV  idx_dimrh  ri  Z
start_condZend_condmaskru   r   r  Zinputs_ranges)r  r   r  r  r  r=   r     s8   

zpointwise_cat.<locals>.inner_fnr   )r~   rr   rG   r*   r   r   rg   )r   ru   Zprev_endrt   r   ra  r<   r  r=   pointwise_cat  s   1

r  c                    s2  t dd | D r/| D ]}|  qt dd | D r&ttjg| R  \} }ttjj| |S t| dkr;t| d S t	| d |d}t
| dtji  fdd| D } d	tffd
dt| tjkrt dd tjjD }t fdd| D }tfdd| D }|s|r|rt| |S ttj| |S )Nc                 s   s    | ]
}|  tju V  qd S rS   )rg   rL   uint8rB   r   r<   r<   r=   r     s    zcat.<locals>.<genexpr>c                 s   s     | ]}t | d kV  qdS )rX   N)rq   rr   r  r<   r<   r=   r     rP  r   r   ro   c                    s   g | ]}t | qS r<   r  r  rd   r<   r=   rD      ro  zcat.<locals>.<listcomp>returnc                    s   t | rt j| dd\}}t j| S t| tr-t| jt jr( | j	 S  | jS t| t j
r8 | jS t| t jr@dS dS )NF)freezeT)r!   ry  rz  ConcatKernelZcan_realize_into_without_copyrF   r-   r   rw  rx  
StorageBoxr*   )rC   r~  r{   should_lower_cat_inputr<   r=   r  "  s   



z#cat.<locals>.should_lower_cat_inputc                 s   s    | ]}t |V  qd S rS   )r3   )rB   Zuser<   r<   r=   r   8  r   c                 3       | ]} |V  qd S rS   r<   r  r  r<   r=   r   9  r   c                 3   r  rS   r<   r  r  r<   r=   r   :  r   )r   r   require_channels_lastr  catfallback_handlerdefaultrq   r	  re  r|   r   r   rm   r   Zmax_pointwise_cat_inputsr7   r   r   r   r  r-   r!   r  r   )r   ru   r   r{   Zpointwise_usesZall_pointwise_inputsZany_pointwise_inputsr<   )ra   r  r=   r    s.   

r  offsetdim1dim2c                    s   |   ttdtdtkfdd tjjt	|d}|r<t
t |  d}nt
t  | d}d |rT| df nd|f fddtD }||  fdd	}ttj| ||S )
N)rV  rankc                      s   d  d S )Nz(diagonal dimensions cannot be identical z, r<   r<   r  r  r<   r=   r  J      zdiagonal.<locals>.<lambda>r   )r   r   c                    s    g | ]\}}| fvr|qS r<   r<   )rB   r   r6  r  r<   r=   rD   Y       zdiagonal.<locals>.<listcomp>c                    s   | d }dgt  }d}tD ]&}|kr | d  ||< q|kr-| d  ||< q| | ||< |d7 }q|t d ksBJ |S )Nr   r   r   r   )rq   r   )rV  Zdiag_idxZoriginal_idxZcur_dimr5  Zbase_idxr  r  Znum_dimsZoriginal_shaper<   r=   	reindexer\  s   
zdiagonal.<locals>.reindexer)rr   rq   r   r   r7   r   r   r1  rh   rf  maxminr   r~   r-   r!   GenericViewr   )r   r  r  r  Zoffset_negativeZ	diag_sizer-  r  r<   r  r=   diagonalB  s&   
r  c                 C   s   t t| |||S rS   )r	  r  )r   r  r  r  r<   r<   r=   diagonal_copyo     r  c                 C   $   t | }t||||}t|| |S rS   )r	  r  	mutate_to)r   srcr  r  r  r   r   r<   r<   r=   diagonal_scattert     
r  c                 C   s,   t ||  | }tt| |||d |S r0  )r/   Zhandle_negative_indexrr   r7  rl  )rC   ru   rV  r<   r<   r=   select|  s   r  c                 C   s   t | |d}tjj|  | }t|tjrtjj|}t|t	tj
fr1|g|| d |  }g }d}|D ]}|| }|t| ||| |}q7|S Nr   r   )re  r7   r   r   rt  rr   rF   rh   r   rb   r   r~   rl  )rC   r-  ru   x_sizer  rh  rq  ri  r<   r<   r=   split  s   r  c                 C   s   t | ||S rS   )r  )rC   r-  ru   r<   r<   r=   split_with_sizes     r  c                 C   sJ   t | |d}tjj|  | }g }t|D ]}|t| || q|S r   )	re  r7   r   r   rt  rr   r   r~   r  )rC   ru   r  r  r   r<   r<   r=   unbind  s   r  c                    s   |   }t|}t|| |dkrtt| d|dS |  }tjj}||| |	d t
|| d }||dkrK| |t|| | g |d   || d d  |}	 fdd}
ttj| |	|
S )Nr   )ri  r   c                    s:   | d |     }g | d   ||  d d R S )Nr   r   r<   )rV  Zdim_idxru   rj  r<   r=   r    s   &zunfold.<locals>.reindexer)rr   rq   r   rl  rS  r7   r   r   	guard_leqguard_ltr   rQ  rR  r   r-   r!   r  r   )rC   	dimensionrq  rj  r-  ndimrk  r   Znew_dim_sizeout_sizer  r<   r  r=   unfold  s   
(r  c                 C   s4   t | |d}t|  }||td t| |S r0  )re  rG   rr   insertrh   r   r  )rC   ru   r4  r<   r<   r=   rS    s   
rS  c                 C   r9  rS   )rS  rF   r-   r   r:  r<   r<   r=   
unsqueeze_  r=  r  c                 C   sR   t |tsJ t|  }|dk r||| 7 }d|  kr$|| k s'J  J |S r   )rF   rb   rq   rr   )rC   ru   r  r  r<   r<   r=   re    s    re  r   c                 C   sT   t | |d}tjj|  | d }t| |d|}t| |||d }t|t|S )Nr   r   )	re  r7   r   r   rt  rr   rl  r   sigmoid)rC   ru   Znew_lenr   r   r<   r<   r=   glu  s
   r  c               !   C   sj  t jjr2t jjjt jjjt jjjt jjjt	j
jt jjjg} tt jjjdtdtdtfdd}tt jjjjdtdtdtdtfdd}tt jjjjdtdtdtdtfd	d
}tt jjjdtdtdtfdd}tt jjjjdtdtdtdtfdd}tt jjjdtdtdtfdd}tt	j
jdtdtdtdtdtdtdtdtdtt dtdtdtdtd td!td"tf d#d$}tt jjjd d%dtd&td'td(tdtf
d)d*}tt jjjjd d%dtd+td&td'td(tdtfd,d-}	tt jjjd d%dtd&td'td(tdtf
d.d/}
t jjr,| t jjj tt jjjdtd0td1tdtfd2d3}t|  d S 	 d S )4NrC   weightbiasc
           
      S   s$   t tj| |||||||||	
S rS   )r-   r   r!   ZConvolutionUnary)
rC   r  r  paddingr|  dilationr   attrscalars	algorithmr<   r<   r=   convolution_unary  s   z5register_onednn_fusion_ops.<locals>.convolution_unaryr   c                 S   *   t tj| |||||||||	|
||S rS   )r-   r   r!   ZConvolutionBinaryrC   r   r  r  r  r|  r  r   binary_attrZbinary_alpha
unary_attrunary_scalarsZunary_algorithmr<   r<   r=   convolution_binary
  "   z6register_onednn_fusion_ops.<locals>.convolution_binaryc                 S   r  rS   )r-   r   r!   ZConvolutionBinaryInplacer  r<   r<   r=   convolution_binary_inplace,  r  z>register_onednn_fusion_ops.<locals>.convolution_binary_inplacewr   c              
   S      t tj| |||||S rS   )r-   r   r!   ZLinearUnary)rC   r  r   r  r  r  r<   r<   r=   linear_unaryN  s   z0register_onednn_fusion_ops.<locals>.linear_unaryr   c              	   S      t tj| ||||S rS   )r-   r   r!   ZLinearBinary)rC   r   r  r   r  r<   r<   r=   linear_binaryV     z1register_onednn_fusion_ops.<locals>.linear_binaryc                 S   s&   t tj| |||||||||	|
S rS   )r-   r   r!   ZConvolutionTransposeUnary)rC   r  r  r  Zoutput_paddingr|  r  r   r  r  r  r<   r<   r=   convolution_transpose_unaryZ  s   z?register_onednn_fusion_ops.<locals>.convolution_transpose_unaryw0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                 S   s4   t tjtj| |||||||||	|
|||||S rS   )pytreetree_mapr-   r   r!   ZMkldnnRnnLayer)rC   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r<   r<   r=   mkldnn_rnn_layerx  s*   z4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layerr  packed_weightw_scalew_zpc                 S   s2   t tj| |||||||||	|
||||||S rS   )r-   r   r!   ZQConvPointWisePT2E)rC   x_scalex_zpr  r  r  r  r|  r  r  r   o_inv_scaleo_zero_pointoutput_dtyper  r  r  r<   r<   r=   qconvolution_unary  s*   z6register_onednn_fusion_ops.<locals>.qconvolution_unaryaccumc                 S   s<   t tj| |||||||||	|
|||||||||||S rS   )r-   r   r!   ZQConvPointWiseBinaryPT2E)rC   r  r  r  Zaccum_scaleZaccum_zpr  r  r  r  r|  r  r  r   r  r  r  r  r   r  r  Zunary_algorithmmr<   r<   r=   qconvolution_binary  s4   z7register_onednn_fusion_ops.<locals>.qconvolution_binaryc                 S   r  rS   )r-   r   r!   ZQLinearPointwisePT2E)rC   r  r  r  r  r  r  r  r  r  r  r  r  r<   r<   r=   qlinear_unary  r  z1register_onednn_fusion_ops.<locals>.qlinear_unarypacked_worig_wc                 S   s.   t tj| |||}|d urt||}|S rS   )r-   r   r!   ZMKLPackedLinearrK   )rC   r  r  r   Z
batch_sizer  r<   r<   r=   mkl_packed_linear&  s   
z5register_onednn_fusion_ops.<locals>.mkl_packed_linear)rL   Z_CZ_has_mkldnnr6   ZmkldnnZ_convolution_pointwiseZ_convolution_pointwise_Z _convolution_transpose_pointwiseZ_linear_pointwiser  r  r  ZonednnZqconv2d_pointwiser   r-   binaryrm   r   rb   Zqlinear_pointwiseZhas_mklr~   ZmklZ_mkl_linearr@   )Zcpu_needs_realized_inputsr  r  r  r  r  r  r  r  r  r  r   r<   r<   r=   register_onednn_fusion_ops  s  
	!!
	
()	
3
!r  c                    s   |rt    fdd}|S )Nc                     s$   t tjtjj g| R i |S rS   )r  r  r-   r   r!   FallbackKernelrz   r   kernelr<   r=   handlerA  s   z!fallback_handler.<locals>.handler)r   rK   )r  add_to_fallback_setr  r<   r  r=   r  =  s   
r  c                   C      t d d S )NzjTorchinductor does not support code generation for complex operators. Performance may be worse than eager.)warningswarnr<   r<   r<   r=   _warn_complex_not_supportedI  s   r  r   c                 C   s0   |   r|r|jtjjjjkrdS t  dS dS )z0Do not support reading or writing to this tensorFT)r  r   rL   r6   r  r  ra   r  r   parentr<   r<   r=   unsupported_input_tensorR  s   r  c                 C   s   t | |rdS | jotjS )z2Do not support writing tensor but can read from itT)r  Zis_cpur   Zdisable_cpp_codegenr  r<   r<   r=   unsupported_output_tensor]  s   
r  r   c                 C   sh   | j tjju r	dS | j tjju rdS dd }tj| ji | jD ]}||| ddr, dS q || | ddS )NFc                 S   sp   t | tjjs	dS d| jvrdS t| jd D ]}t |tjjs"q|r-t	||r, dS qt
||r5 dS qdS )NFr;  T)rF   rL   fxNodemetar  Ztree_leaves_subclasses
FakeTensorr  r  )r   r  	is_outputr  r<   r<   r=   check_skip_conditionm  s   


zCfallback_node_due_to_unsupported_type.<locals>.check_skip_condition)r  T)	r   r  view_as_complexr  lift_fresh_copyr  Zarg_tree_leavesrz   r   )r   Zallow_cpu_inputsr  rx   r<   r<   r=   %fallback_node_due_to_unsupported_typed  s   r  c                    s   | t vsJ d|  |r:ttdr:t| gr:tjr#| tjj	j
v s:tjjjr2dtjj_td td|  d fdd}t| tjjrY|  D ]}t| |}|| qKd S t| tjjtjjfrj||  d S td	|  d
t|  )Nz*both a fallback and a decomp for same op: CIFznA make_fallback error occurred in suppress_errors config, and suppress_errors is being disabled to surface it.zmake_fallback(a.  ): a decomposition exists, we should switch to it. To fix this error, either add a decomposition to core_aten_decompositions (preferred) or inductor_decompositions, and delete the corresponding `make_fallback` line. Get help from the inductor team if unsure, don't pick arbitrarily to unblock yourself.c                    s.   t |   d urt|   t| d dt| S Nr  )r@   rV   r   r  )op_overloadlayout_constraintr<   r=   register_fallback  s   

z(make_fallback.<locals>.register_fallbackzUnsupported fallback z with type )r#   rm   osgetenvr$   r   fallback_randomrL   _decompZdecompositions_for_rngZextra_random_decompsZ_dynamoZsuppress_errorslogwarningAssertionErrorrF   rM   rN   rO   rP   Z
OpOverloadZHigherOrderOperatorrr  r   )r   r  r  r   Zolr  r<   r  r=   make_fallback  s6   




r(  c                 C   s$   d}| D ]}|| }qt |tjdS )z
    TorchInductor offset calculation differs from PyTorch eager offset
    calculation for random ops (tl.rand vs torch.rand). In future, we should
    strive for same impl for tl.rand and torch.rand.
    r   rd   tensorrL   r  )rT  Znumelr6  r<   r<   r=   philox_rand_offset  s   
r+  c           	         sd   t | | t j|  | |  fdd}tj| |t| d}t	| }||fS )Nc                    sV   t g tj}t g tj}t t | tj|}t ||}t | S rS   )r6   r   rL   int32rK   r  rand)r   Zseed_index_exprZoffset_index_exprZrand_index_exprr  ra   Zoffset_loader
random_posseed_loaderr<   r=   r     s   zphilox_rand.<locals>.inner_fnr   )
r!   r{  FlexibleLayoutcontiguous_stridesmake_indexerr   r*   r   rG   r+  )	rq  seedr  r|  r   ra   r   Zrandom_values_nodeZoffset_noder<   r.  r=   philox_rand  s&   
r5  c              	   C   s.   t jrttjtjtj	j
| ||S td)Nz&should be handled in replace_random.py)r   r#  r  r  r-   r   r!   r  r  native_dropoutr  r'  )rC   pr  r<   r<   r=   r6    s   r6  c                 G   s>   t js|  tdksJ d|   tj| g|R   | S NcpuzTthis should be handled in decomps unless config.fallback_random or the device is CPU)r   r#  r   rL   r   r   r!   ZInplaceBernoulliFallbackrC   rz   r<   r<   r=   
bernoulli_  s   r;  c                 G   s4   t js|  tdksJ dtt| g|R  S r8  )r   r#  r   rL   r   r;  r	  r:  r<   r<   r=   bernoulli_p   s   r<  c                 C   s   t  rS   r'  r{   r<   r<   r=   _foobar	  s   r?  c                 C   r	  )Nz1using triton random, expect difference from eager)r%  info)saltr<   r<   r=   _warn_triton_random  r  rB  c                   C   s   t tjj d S rS   )rB  r7   r   Zcreation_timer<   r<   r<   r=   warn_triton_random     rC  c                  O   F   | dd d urt| i |S tjr|dd  t| i |S tdN	generatorz-should have been handled in replace_random.py)getfallback_rand_generatorr   r#  popfallback_rand_defaultr'  r  r<   r<   r=   r-       r-  c                  O   rE  rF  )rH  fallback_randn_generatorr   r#  rJ  fallback_randn_defaultr'  r  r<   r<   r=   randn)  rL  rO  c                 C   s   t |}t j| |S rS   )r!   get_stride_orderExternKernelrequire_stride_order)Zinput_tensorr|  stride_orderr<   r<   r=   inductor_force_stride_order3  s   
rT  c                 C      t d)Nz.should be handled in fuse_seed_creation_pass()r=  )r   r<   r<   r=   inductor_seed9     rV  c                 C   s   t   tt| t|S rS   )rC  r-   r   r!   ZRandomSeedsr1   )countr   r<   r<   r=   inductor_seeds>  s   rY  c                    s(    fdd}t j  |g dS )Nc                    s   t   S rS   )r6   Z	load_seedget_namer>  r   seedsr<   r=   r   F  r  z&inductor_lookup_seed.<locals>.inner_fnr   )r*   r   r   rg   )r\  r   r   r<   r[  r=   inductor_lookup_seedD  s   r]  r  rq  r4  r  c                   s   t jrJ  dv sJ g | } tj}| }tj||| tj| |d	 |
  fdd}tj|||g | d}|  |S )N)r-  rO  r^  c                    s"   t t g t| tjS rS   )rP   r6   r  rL   r,  r   r  r/  r0  r<   r=   r   ]  s   z!inductor_random.<locals>.inner_fnr   )r   r#  rL   float32r   r!   r{  r1  r2  r3  r   r*   r   r   )rq  r4  r  r  ra   r   r   r  r<   r_  r=   inductor_randomQ  s(   
ra  lowhighc                   sp   t jrJ g |}tj}| }tj|||tj||d	 |
  fdd}tj|||g |dS )Nr^  c                    s"   t g t | tj S rS   )r6   Z	randint64r  rL   r,  r   rc  rb  r/  r0  r<   r=   r   z  s   z"inductor_randint.<locals>.inner_fnr   )r   r#  rL   r  r   r!   r{  r1  r2  r3  r   r*   r   )rb  rc  rq  r4  r  ra   r   r   r<   rd  r=   inductor_randintm  s"   
re  	out_int32rightr   
boundariesrg  rh  c                   s   t   dks
J t| rt s ttjjdd|  |dS      d   }| 	 }|  |r;t
jnt
j fdd}tj|||  dS )	Nr   Fr  rf  r   c                    s"   | }t |  }|S rS   )r6   	bucketizerZ  )r   r;  r   ri  Zboundaries_sizeZindex_dtypeZinput_loaderrh  r<   r=   r     s   zbucketize.<locals>.inner_fnr   )rq   rr   r'   r  r  rk  Tensorr   r   r   rL   r,  r  r*   r   )r   ri  rg  rh  Zboundaries_loaderr   r   r<   rl  r=   rk    s$   rk  c                 O   $   t tjtjj||f\}}||fS rS   )r  tree_map_onlyr!   IRNoderQ  Zrequire_stride1r{   rz   r   r<   r<   r=   require_dense     rr  c                 O   rn  rS   )r  ro  r!   rp  rQ  require_contiguousrq  r<   r<   r=   rt    rs  rt  c                 O   rn  rS   )r  ro  r!   rp  rQ  r  rq  r<   r<   r=   r    rs  r  c                    sF   dd  t  fddt|jD } fdd| D }||fS )Nc                 S   s2   t | tjrt|jd  }tj| |S | S Nr;  )rF   r!   rp  rP  r  r|  rQ  rR  )rx   fx_argrS  r<   r<   r=   apply_constraint  s   z1constrain_to_fx_strides.<locals>.apply_constraintc                 3       | ]
\}} ||V  qd S rS   r<   rB   rx   rv  rw  r<   r=   r         
z*constrain_to_fx_strides.<locals>.<genexpr>c                    "   i | ]\}}| |j | qS r<   r   rB   krU  rw  fx_noder<   r=   r        " z+constrain_to_fx_strides.<locals>.<dictcomp>rH   r   rz   r   r  rz   r   r<   r  r=   constrain_to_fx_strides  s   
r  ztorchvision::roi_align)r  c                    sJ   fdd t  fddt|jD } fdd| D }||fS )Nc                    s  t | tjs| S |jd }|js| S t| }|r-|d dkr-ttt	t
|  }d jtjjtjjfv } fdd}t | tsGJ t | jtjtjfr]|| s]||  r]| S  fdd}t | jtjr| j r|rt
| j d	kr|| r| S tj| |S )
Nr;  r   r   r\   c                    s   t jj|  d   dkS )Nr   r   )r7   r   r   rQ  rr   rk   	ALIGNMENTr<   r=   
is_aligned  s   z=sdpa_constraint.<locals>.apply_constraint.<locals>.is_alignedc                    sF   t  fddtt  d D }tjj  d dko"|S )Nc                 3   s.    | ]}t jj |   d kV  qdS r   N)r7   r   r   rQ  
get_strider   )r  rC   r<   r=   r     s
    
zYsdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_backward.<locals>.<genexpr>r   r   )r   r   rq   r  r7   r   r   rQ  )rC   Zaligned_stridesr  rk   r=   is_aligned_backward  s   zFsdpa_constraint.<locals>.apply_constraint.<locals>.is_aligned_backwardrX   )rF   r!   rp  r  r   rP  r|  rG   r   r   rq   rr   r   r  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_flash_attention_backwardr-   r   rg  r%   rx  r  is_input_bufferrQ  rR  )rx   rv  Zmeta_valrS  Zis_backwardr  r  )r  r  r=   rw    s:   

z)sdpa_constraint.<locals>.apply_constraintc                 3   rx  rS   r<   ry  rz  r<   r=   r   '  r{  z"sdpa_constraint.<locals>.<genexpr>c                    r|  r<   r}  r~  r  r<   r=   r   *  r  z#sdpa_constraint.<locals>.<dictcomp>r  r  r<   r  r=   sdpa_constraint  s   6
r  c                 C   sn   |}|   |  krt||   }|  | kr t||  }|  | kr3t||  }t|S t|S rS   )r   r  rg   r   rr   r   r	  )selfr  non_blockingrC   r   r<   r<   r=   r
    s   r
  )memory_formatc                C   s&   t j|  |  |  t|  dS )Nr   )r*   r   r   rg   r   rG   rr   )rC   r  r<   r<   r=   r	    s   
r	  r  c                   s(    fdd}t jt| || gdS )Nc                    s   t j| d    dS )Nr   rd   r6   r  r   ra   rh  rj  r<   r=   rQ   	     ziota.<locals>.fnr   )r*   r   r1   )lengthrh  rj  ra   r   Zrequires_gradrQ   r<   r  r=   iota  s   
r  ru   r   c                    s   |   |  ks
J |  t|  d tjjtdr'| 	    tjj
d tjj| 	    tt| | 	 }|  fdd}tj|  |   |t| 	 dS )Nr   c              	      s6   t t t |   tjt tj| | S rS   )r6   r'  eqr  rL   r,  rV  ru   r   
src_loaderr_  r<   r=   r   	  s   z select_scatter.<locals>.inner_fnr   )rg   r   re  r7   r   r   r1  rh   rf  rr   r  r  r   rS  r*   r   r   rG   )rC   r  ru   r   r   r<   r  r=   select_scatter	  s    

r  c                    s$    |  ks
J  t d    d ur-tjjt	dr- d ur@tjjt	dr@ d u rFdd u sVtjj
   rXt }t d  | < t||}|  fdd}tj   |t dS )Nr   r   c              
      s4  dkrkrdkr| S t |  tj}t|  t|    < g }dkr?|t |t t	tj krT|t 
|t t	tj dkrt|t t t|   dtjt dtjj |sxJ tt j|}t | fddtrdnd}t ||| S )Nr   r   c                          S rS   r<   r<   )src_idxr  r<   r=   r  i	      z1slice_scatter.<locals>.inner_fn.<locals>.<lambda>r  )r6   r  rL   r  rG   r   r~   r  rh   r   r  r  r   r  r   r,  r  r  rl   r'  )rV  r  r  Zsrc_valru   rk  ri  r  rh  rj  rC   r_  )r  r=   r   D	  sR   zslice_scatter.<locals>.inner_fnr   )rg   r   re  rr   r7   r   r   r1  rh   rf  Zstatically_known_leqrG   r   r   r*   r   r   )rC   r  ru   rh  ri  rj  Zsrc_sizer   r<   r  r=   slice_scatter0	  s.    
.
r  c                 C   s*   t | ttfrt| dkrt| d S | S r   )rF   rG   rH   rq   _unwraprk   r<   r<   r=   r  z	  s   r  ra   r   layout
pin_memoryc                   s  t |d tjfv d|  t | d tt tr ptjnp%t g }t tj	r6 fdd}nBt t
tfrE fdd}n3t dksZt d t
tfrlt dkrl|tt   fdd}ntjtj |d	S tjt|||d
S )Nlayout=r  c                       t  S rS   r  r   r   ra   r<   r=   r   	  r  ztensor.<locals>.inner_fnc                    r  rS   r6   r  r   r  r<   r=   r   	  r  r   r\   c                    s8    fdd t dkrtdS  dt S )Nc              	      sr   | |k sJ ||  dkrt |  S ||  d |  }t t t d tjt |tj | | ||S )Nr   r   r   )r6   r  r'  r  r  rL   r  )rh  ri  mid)binary_searchr   ra   r   r<   r=   r  	  s   z/tensor.<locals>.inner_fn.<locals>.binary_searchr   )rq   r6   r  r   r  )r  r   r=   r   	  s   rZ  r   )r>   rL   stridedrF   r  rb   r  get_default_dtyperh   r   r   rq   r~   r   r7   r   Zadd_tensor_constantr*  r*   r   r1   )r   ra   r   r  r  r   r   r<   r  r=   r*  	  s,   *r*  c                 C   s@   t | tr|d urt| |} |d urt| |} | S t| ||dS )NrZ  )rF   r-   r   r  r*  )r   ra   r   r<   r<   r=   	as_tensor	  s   


r  c                 C      t | tjdS )Nrd   r)  )r   r<   r<   r=   long_tensor	  r  r  c                 C   s0   t jjjd jj}t|| }t j||_	|S ru  )
r7   r   r   r  r   exprr!   ZDynamicScalarregister_bufferr!  )r   symbufferr<   r<   r=   _local_scalar_dense	  s   r  c                    s   | t | ttfstdrjt ttfr  fdd}n"t tjr. fdd}nt dks8J 	 fdd}t
j| |t|dS )Nr   c                       t  S rS   r  r   ra   r   r<   r=   r   	  r  z_full.<locals>.inner_fnc                    r  rS   r  r   r  r<   r=   r   	  r  r   c                    s    g S rS   r<   r   )value_loaderr<   r=   r   	  s   r   )rF   rb   r   rp   r   rh   r   rq   rr   r   r*   r   rG   )
fill_valuer   ra   rq  r   r<   )ra   r   r  r=   _full	  s    r  c                 K   s   t t|| fi |S rS   create_tensor_liketensor_constructor)rC   r  r   r<   r<   r=   rA  	     rA  c                    s    d d d d dd d fdd
}|S )NF)namesra   r   r  r  r  c                    s   t | d u d t |d tjfv d|  t | d t|}|p#t }t|dkr;t|d tttj	fr;t|d }dd |D }t
 |||S )Nnamed tensorsr  r  r   r   c                 S   ru  r<   rv  rO  r<   r<   r=   rD   
  ro  z5tensor_constructor.<locals>.inner.<locals>.<listcomp>)r>   rL   r  r1   r  rq   rF   rG   rH   Sizer  )r  ra   r   r  r  r  rq  r  r<   r=   r    
  s   	"z!tensor_constructor.<locals>.innerr<   )r  r   r<   r  r=   r  	  s   r  )r  ra   r  r   r  r  c                 G   sX   t | d u d t|}t|dkr"t|d tttjfr"t|d }t|d ||||dS )Nr  r   r   ra   r  r   r  )	r>   r1   rq   rF   rG   rH   rL   r  empty_strided)r  ra   r  r   r  r  rq  r<   r<   r=   r`  
  s   
"r`  c                    s   dddddd fdd
}|S )zZ
    Shim to convert X_like(...) into X(...).  For example zeros_like() into zeros().
    NF)ra   r   r  r  r  c                   sj   t | d t |d tjfv d|  |d u r|  }nt|}|p%|  }t|  } |||||dS )Nr  r  r  )r>   rL   r  rg   re   r   rG   rr   )rC   ra   r   r  r  r  rq  creation_fnr<   r=   _constant_like.
  s   

z*create_tensor_like.<locals>._constant_liker<   )r  r  r<   r  r=   r  )
  s   
r  c                 C   s   t t| S rS   r  r  r<   r<   r=   r)  @
  r  r)  c                    s   d d d d d fdd
}|S )Nr  c                   sp   t |ttfs	J t| d t|d tjfv d|  t|p#|  }|p)|  }dd |D }t	 |||S )Nr  r  c                 S   ru  r<   )rh   r   rO  r<   r<   r=   rD   R
  ro  z7new_constant.<locals>._new_constant.<locals>.<listcomp>)
rF   rG   rH   r>   rL   r  re   rg   r   r  rC   rq  ra   r  r   r  r  r<   r=   _new_constantJ
  s   z#new_constant.<locals>._new_constantr<   )r  r  r<   r  r=   new_constantI
  s   r  r  c                C   s4   |d u r|   }|d u r|  }t|d ||||dS Nr  rg   r   r  r  r<   r<   r=   	new_emptyX
  s   r  c                C   s   t | ttfs	J t |tttd fsJ t| d t|d tjfv d|  t|p/t }|p7t	dj
}td||| d}|  |jj}dgt|  |j_t |tjsYJ dd | D } |ridd |D ntj| }tj||| |d	|_|S )
Nr  r  r  r   )r  r   ra   rq  c                 S   ru  r<   rv  rO  r<   r<   r=   rD   s
  ro  z!empty_strided.<locals>.<listcomp>c                 S   ru  r<   rv  rO  r<   r<   r=   rD   u
  ro  )r   ra   rq  r|  )rF   rG   rH   r   r>   rL   r  re   r  r*  r   r  r   r   rq   r   r!   ComputedBufferr1  r2  r{  r  )rq  r|  ra   r  r   r  Z	pointwiser  r<   r<   r=   r  c
  s.   
r  c                C   s4   |d u r|   }|d u r|  }t||||||dS r  r  )rC   rq  r|  ra   r  r   r  r<   r<   r=   new_empty_strided
  s   r  c                 C   s2   dd |D }t tt||jd}tj| |S )Nc                 S      g | ]	}t jj|qS r<   )r7   r   r   rQ  rO  r<   r<   r=   rD   
      z copy_strided.<locals>.<listcomp>)key)sortedr   rq   __getitem__r!   rQ  rR  )rC   r|  rS  r<   r<   r=   copy_strided
  s   r  c                 K   s*   | dd usJ dt|| fi |S )Nra   z(dtype should be handled by decomposition)rH  r  )rq  r  r   r<   r<   r=   full
  s   r  c                    s   t | tsJ | tjksJ |  tdk}t|  | |  |  fdd}t	j
|  |  || dS )Nr   c                    s4   t | } t| dkrt|   |  < | S r   )rG   rq   r6   indirect_indexingr  ru   index_loaderrq  r_  r<   r=   rQ   
  s   zgather.<locals>.fnr   )rF   r-   rg   rL   r  rr   rq   re  r   r*   r   r   )rC   ru   r   Zsparse_gradr  rQ   r<   r  r=   gather
  s   r  c                    s   |rJ t | tsJ t |tsJ dt| v sJ |  |  t| |  g | dd   fdd}tj| 	 |  |dS )Nrb   r   c                    s\   t | t ksJ |  d  | d  }t|d gg | d   }|S )Nz != r   )rq   r6   r  )rV  Z	var_indexZ
weight_idxindices_loaderZindices_ndimra  Zweight_loaderZweight_sizer<   r=   rQ   
  s   "
zembedding.<locals>.fnr   )
rF   r-   strrg   r   rq   rr   r*   r   r   )r  r   Zpadding_idxZscale_grad_by_freqsparserQ   r<   r  r=   	embedding
  s    r  c                    s   t dd  D sJ ddd  D  tdd  D r"tddd t D }t|d	ks5J d
d gt  }t|t fdd|D  D ]\}}| |krXtd|||< qJ||fS )Nc                 s   s4    | ]}|d ur|  tjtjtjtjfv V  qd S rS   )rg   rL   r  r,  rm   r  r   r<   r<   r=   r   
  s    z.check_and_broadcast_indices.<locals>.<genexpr>z)indices must be int64, byte or bool. Got c                 S   s   g | ]
}|d ur|  qS rS   r   r   r<   r<   r=   rD   
      z/check_and_broadcast_indices.<locals>.<listcomp>c                 s   s,    | ]}|d ur|  tjtjfv V  qd S rS   )rg   rL   rm   r  r   r<   r<   r=   r   
  s    "zFallback for bool indicesc                 S   r   r<   r   r   r<   r<   r=   rD   
  r   r   z"requires at least 1 non-None indexc                    r   r<   r<   r   r   r<   r=   rD   
  rE   z.Fallback when indices is on a different device)r   r   r9   r   rq   r   r   r   )r   r   Z
valid_idxsZnew_indicesr   rC   r<   r  r=   check_and_broadcast_indices
  s"   
$
r  c              	      s   dt dd  D ]\}}	|	| dkrdq	fddtD g 	tt d  d }
r> nd |
  |
d    f	dd}|fS )	NFr   Tc                    s    g | ]\}}|d u r | qS rS   r<   )rB   r   r;  r  r<   r=   rD     r  z2index_output_size_and_inner_fn.<locals>.<listcomp>r   c           	   	      s  t | t ks
J t t ksJ t }g }d }r"dn|}d}td d D ]E}||kr8||7 }| d u rR|t | k sFJ || |  |d7 }q.| }|d us\J | }|tj|| |||  | d q.g || |d  }d u r|S |S )Nr   r   r   r   )rq   r   r~   r6   r  )	rV  r  	new_indexfirst_tensor_indexZstart_offsetZnext_idxr   loaderrq  )	r   indexed_sizer   indices_loadersnon_consecutive_tensorsoutput_sizetensor_indicestensor_sizer_  r<   r=   rQ     s<   

z*index_output_size_and_inner_fn.<locals>.fn)r   r   rq   )r  r   r  r  r  r  r_  r   previouscurrentr  rQ   r<   )
r   r  r   r  r  r  r  r  r_  r  r=   index_output_size_and_inner_fn
  s$    


!r  c           
   
      s   t  ttfs	J |  }t |  \ }t|dks J ddd  D }t |d   }|   fddtt D }d|v rOd|vrOt	dfddtt D }t
 ||||||d\}}	tj|  |  |	|d	S )
Nr   z Must have at least one valid idxc                 S       g | ]}|d ur|  nd qS rS   r   r   r<   r<   r=   rD   9  r  zindex_impl.<locals>.<listcomp>c                    s    g | ]} | d ur| qS rS   r<   r   r   r  r<   r=   rD   A  r  z0index is out of bounds for dimension with size 0c                    r   r<   r<   r   r  r<   r=   rD   E  rE   r  r   )rF   rG   rH   r   r  r   rq   rr   r   
IndexErrorr  r*   r   rg   )
rC   r   r   r_  r  r  r  r  r  r   r<   r  r=   
index_impl3  s6   
r  c                 C   sB   zt | |ddW S  ty    |   ttjjdd| | Y S w )NTr  Frj  )r  r9   r   r  r  r   rm  rC   r   r<   r<   r=   r   Y  s   c                 C   r  NFr  )r  r  r<   r<   r=   _unsafe_indexe  r  r  c                 C      t t| |||S rS   )
index_put_r	  rC   r   r   
accumulater<   r<   r=   	index_putr  r  r  c                 C   s   t t| |||ddS r  )index_put_impl_r	  r  r<   r<   r=   _unsafe_index_putw  s   r  c                 C   sB   |  |   krt||   }|rt| |}t| t|d || S r   )r   r  rK   r  r'  )r  r   r   r  r<   r<   r=   index_put_as_masked_fill|  s
   
r  c                 C   sl   t  }t|r,|s|r,dtj_|sdnd}tjjjdd  }r(| d| }|tj_	t
| ||| | S )NTzindex put with accumulate.zdeterministic index put.stack_tracez Found from : 
 )rL   $are_deterministic_algorithms_enabledr'   r7   r   Zdisable_cudagraphsr   r  rH  Zdisable_cudagraphs_reasonr!   ZIndexPutFallback)r  r   r   r  Zdeterministicr;   r   r<   r<   r=   index_put_fallback  s   r  c                 C      t | |||ddS )NTr  r  r  r   r   r  r<   r<   r=   r    r  r  c                 C   r  r  r  r  r<   r<   r=   _unsafe_index_put_  r  r  c                 C   s   | t jt jt jhv S rS   )rL   r  rm   bfloat16rd   r<   r<   r=   ,needs_fallback_due_to_atomic_add_limitations     r  c              
      sP  |  dkr9t|dkr9|d  tjtjhv r9|d }tt| t|  D ]}t|d}q)t	| |g||S t
 rDt| |||S |D ]}|d ur_| tjtjhv r_t| |||  S qF|   t }|rt|  r|dkrzt| dg} t| |||} |dkrt| g } | S t||  }zt||  \}}	W n ty   t| ||| Y S w dd |D }
t| tsJ |   |dkrt| dg} t||	d   } fddtt|D }t ||	||
|d |d\}}t||}tj|  |  | |||rdnd d	}td t| |}tj||_ |dkr&t| g } | S )
Nr   r   r   c                 S   r  rS   r   r   r<   r<   r=   rD     r  z#index_put_impl_.<locals>.<listcomp>c                    r   r<   r<   r   r  r<   r=   rD     rE   r  
atomic_addr   ra   r   r   output_indexerZscatter_mode)!	get_numelrq   rg   rL   rm   r  r   rr   rS  r  r  r  r  r  r   r  r   r9   rF   r-   r   rG   r  r   r!   Scatterr   r  MutationLayoutr7   r   r  r!  )r  r   r   r  r   r  r{   r   Zx_ndimr  r  r  r  Zexpected_vals_sizer   scatterr  r<   r  r=   r    s   




r  )ro   r   c           	         s   dd | ||fD \}}}|   |  ksJ | tjtjhv s#J | |  | | |  fdd}tj| 	 |  ||  d}t
||   S )Nc                 s   s    | ]}t |d V  qdS )r   N)r  rA   r<   r<   r=   r     r   z,masked_scatter_with_index.<locals>.<genexpr>c                    sH    }t  tj} fdd}t ||d}t |||S )Nc                     s    } t | }|gS rS   )r6   r  )Zsource_idx_valr   )rV  source_idx_loadersource_loadersource_numelr<   r=   load_source_val  s   
zDmasked_scatter_with_index.<locals>.inner_fn.<locals>.load_source_valr   )r6   r   rL   rm   r  r'  )rV  Zself_valZmask_valr  Z
source_valZmask_loaderZself_loaderr  r  r  r  r=   r     s
   z+masked_scatter_with_index.<locals>.inner_fnr   )rr   rg   rL   rm   r  r   r  r*   r   r   r  )	r  r  Z
source_idxsourceZ	self_flatZ	mask_flatZsource_flatr   Zresult_flatr<   r  r=   masked_scatter_with_index  s    r  c                 C   r  rS   )r	  r  copy_)r  r  rq  r|  r}  r   Zoutput_viewr<   r<   r=   as_strided_scatter"  r  r  c                 K   s   t t| |||fi |S rS   )scatter_r	  )rC   ru   r   r  r   r<   r<   r=   r  *  r  r  r,  include_selfr,  r  c             	   C   s   | dkrdnd}|d |hvsPt |tr$| jtdjkr$t| sP| dkr>|dkr>t |tr>| tdkr>tj	j
sP||krL| tjtjhv sPt r^tj| ||||||d |S d S )Naten.scatter_rK   sumr   aten.scatter_reduce_r9  r  )rF   r-   r   r   rL   r   r  rg   r   cppZfallback_scatter_reduce_sumrm   r  r  r!   ZScatterFallback)rQ   r  ru   r   r  r,  r  Z	reduce_tyr<   r<   r=   scatter_fallback/  s,   

r"  r,  c                C   sR   |dv sJ t d| ||||d}|r|S |dkrd}n|dkr!d}t| ||||S )N>   rK   multiplyNr  r#  rK   r  r$  prod)r"  scatter_reduce_)r  ru   r   r  r,  fallback_resultr<   r<   r=   r  S  s   r  c                 C   r  rS   )scatter_add_r	  rC   ru   r   r  r<   r<   r=   scatter_addf  r  r*  c                 C   s   t | |||dS )Nr  )r&  r)  r<   r<   r=   r(  k  rD  r(  c                 K   s   t t| ||||fi |S rS   )r&  r	  )rC   ru   r   r  reduction_typer   r<   r<   r=   scatter_reducep  s   r,  )r  c             	      s  |dv sJ t d |||d}|r|S ttsJ dt| v s&J t }|dkr6tdgttrIt dkrItdgt|tr\t| dkr\t|dg}t  	  |
 ttrr
 nd  fdd}fd	d
}	dd }
|stj  fdd| |d d}td t|}tj||_tj  |	| ||
|d}td t|}tj||_|dkrtg S )N>   r  meanr%  amaxaminNr   r  rb   r   r   c                    s@     }t|}t| }t| |dkrdn|  | < |S r  )rr   rq   rG   r6   r  )rV  rT  r  Zindirect_idx)ru   r  r  r<   r=   r    s   z'scatter_reduce_.<locals>.output_indexerc                    s   r| S t   S rS   r6   r  rg   r  )r  r  r  r<   r=   rQ     s   zscatter_reduce_.<locals>.fnc                 S   s   | dkrdS | d u sJ d S )Nr  r
  r<   r#  r<   r<   r=   backend_reduce_str  s   z+scatter_reduce_.<locals>.backend_reduce_strc                    s   t d  S r   r0  r   )r  r<   r=   r    r  z!scatter_reduce_.<locals>.<lambda>r  )r"  rF   r-   r  rg   rq   rr   r  re  r   r   r!   r  r   r  r  r7   r   r  r!  )r  ru   r   r  r,  r  r'  r  r  rQ   r1  Zzero_outr  r  r<   )ru   r  r  r  r  r=   r&  u  sx   




r&  scales_x.nexactc           
         s   |    |  |   d  |  d   }dd D t|ks)J |}dd t|D t|D ]
\}}|rC||< q9 fddfdd}	tj|  | 	 |	g ||dS )	Nc                 S   r  r<   )r7   r   r   rt  r   r<   r<   r=   rD     r  z&upsample_nearestnd.<locals>.<listcomp>c                 S   s   g | ]\}}|| qS r<   r<   )rB   r   or<   r<   r=   rD     s    c                    s\   t | tj}  rt | t dtj} t | t |tj} t | tj} t j	| |ddS )N      ?Fr  )
r6   r  rL   r`  rK   r  r   r   r,  r  )rC   scalerq  )r4  r<   r=   scale_fn  s   z$upsample_nearestnd.<locals>.scale_fnc                    sB   |  d  }| d   }g |fddt | D S )Nc                    s   g | ]\}}} |||qS r<   r<   )rB   r   r6  rq  )r8  r<   r=   rD     r   z2upsample_nearestnd.<locals>.fn.<locals>.<listcomp>)r   )rV  rC   r   )i_sizesr3  r8  scalesr_  r<   r=   rQ     s
    zupsample_nearestnd.<locals>.fnr   )
realize_hintr   rr   rq   r   r   r*   r   r   rg   )
rC   r  r2  r3  r4  batchZo_sizesr   r7  rQ   r<   )r4  r9  r3  r8  r:  r_  r=   upsample_nearestnd  s(   
r=  r:  c                 C   s   t | ||fddS )Nr   r3  r=  rC   r  r:  r<   r<   r=   upsample_nearest1d	  r  rA  c                 C   s   t | ||fdddS )Nr   Tr3  r4  r?  r@  r<   r<   r=   _upsample_nearest_exact1d  r	  rC  scales_hscales_wc                 C   s   t | |||fddS )Nr   r>  r?  rC   r  rD  rE  r<   r<   r=   upsample_nearest2d  s   rG  c                 C   s   t | |||fdddS )Nr   TrB  r?  rF  r<   r<   r=   _upsample_nearest_exact2d  s   rH  scales_dc                 C   s   t | ||||fddS )NrW   r>  r?  rC   r  rI  rD  rE  r<   r<   r=   upsample_nearest3d!  s   rK  c                 C   s   t | ||||fdddS )NrW   TrB  r?  rJ  r<   r<   r=   _upsample_nearest_exact3d,  s   rL  c                    s   t  fdd|D S )Nc                 3   s    | ]	}t | V  qd S rS   r  r   rd   r<   r=   r   :      z$_create_constants.<locals>.<genexpr>rH   )ra   rz   r<   rd   r=   _create_constants9  s   rO  align_cornersc              
      s   |    |  |  \}}	
|\}}tjj		tjj

dd ddd}	dd dd d	d
 fddfdd|		| ||	
| |dd  	
f
dd}
tj| 	 | 
 |
||t|t|gdS )Nc                 S   s   | t t jjkrt jS t jS rS   )rL   r  r,  r  r  )maxvalr<   r<   r=   get_int_dtypeN  s   z1upsample_bicubic2d_default.<locals>.get_int_dtypec                 S   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S r\  r<   )Zin_sizer  rP  r7  r<   r<   r=   compute_scaleS  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   sJ   t |tj}t | tj} |rt | |S t dtj}| ||  | S )Nr6  )r6   r  rL   r`  r  r   )r7  Z	dst_indexrP  Zdst_index_ieZhalfr<   r<   r=   compute_source_indexY  s   z8upsample_bicubic2d_default.<locals>.compute_source_indexc                 S   s8   t |d |d dtjd\}}}||  | |  |  | S )Nr   rW   r   rd   rO  rL   r`  )rC   AZ_Ap2Z_Ap3_1r<   r<   r=   cubic_convolution1b  s    z6upsample_bicubic2d_default.<locals>.cubic_convolution1c                 S   sD   t |d| d| d| tjd\}}}}||  | |  | |  | S )NrX   rY   r\   rd   rU  )rC   rV  Z_AZ_4AZ_5AZ_8Ar<   r<   r=   cubic_convolution2f  s   z6upsample_bicubic2d_default.<locals>.cubic_convolution2c                    sb   d}t dtj}t | ||} | |}t || } ||}t |||}||||fS )Ng            ?)r6   r  rL   r`  rK   sub)r   rV  rW  Zc0c1Zx2c2c3)rX  rY  r<   r=   get_cubic_upsample_coefficientsl  s   

zCupsample_bicubic2d_default.<locals>.get_cubic_upsample_coefficientsc                    sH    |}| d |d  | d |d   | d |d   | d |d   S )Nr   r   r   rW   r<   )Zxsr   cs)r_  r<   r=   cubic_interp1dw  s   @z2upsample_bicubic2d_default.<locals>.cubic_interp1dc                 S   s   t |t || S rS   )r6   maximumminimum)rU  r  r  r<   r<   r=   clamp     z)upsample_bicubic2d_default.<locals>.clampc           
         s   | \ }}
|}t |}t ||
|}t |}t ||} 	fddt |d t |d tfdddD }tfdddD fdd	tfd
d|D }	|	|S )Nc                    sr   t dtj}t d tj}t d tj}t j| ||dd}t j|||dd} ||gS )Nr   r   Fr  )r6   r  rL   r,  r  )fyr  Z_0ZiHm1ZiWm1iyix)crd  iHiWr3  r_  r<   r=   load_bounded  s   z<upsample_bicubic2d_default.<locals>.fn.<locals>.load_boundedr   c                 3       | ]	}t  |V  qd S rS   r6   rK   rB   Zofs)rg  r<   r=   r     rM  z9upsample_bicubic2d_default.<locals>.fn.<locals>.<genexpr>)r   r   r   r   c                 3   rm  rS   rn  ro  )rh  r<   r=   r     rM  c                    s"   t  fddD }|S )Nc                 3   s    | ]} |V  qd S rS   r<   rA   )rl  r   r<   r=   r     r   zOupsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interp.<locals>.<genexpr>rN  )r   Zcoeffs_x)ra  ixs_ofsrl  t_x)r   r=   get_x_interp  s   
z<upsample_bicubic2d_default.<locals>.fn.<locals>.get_x_interpc                 3   r  rS   r<   )rB   r   )rr  r<   r=   r     r   )r6   rI  r[  r   rH   )
rV  ZoyoxZreal_xZin_xZreal_yZin_yZt_yZiys_ofsZcoeffs_y)
rP  rd  rT  ra  rR  height_scalerj  rk  width_scaler_  )ri  rr  rh  rp  rg  rl  r3  rq  r=   rQ     s   

	
z&upsample_bicubic2d_default.<locals>.fnr   rS   )r;  r   rr   r7   r   r   rt  r*   r   r   rg   rh   r   )rC   r  rP  rD  rE  NCZoHZoWrS  rQ   r<   )rP  rd  rT  rX  rY  ra  r_  rR  rt  rj  rk  ru  r_  r=   upsample_bicubic2d_default=  s.   
	 rx  c                    s   t |dksJ |\}}|  |  ^ } dd  fdd}tj|  |  |g |  |  | dS )NrX   c                 S   sj   |}t |d tj}t | tj} t | t |tj} t |t t |t | } t j| |ddS )Nr   Fr  )r6   r  rL   r,  r[  absr  )rC   rq  r  Zsize_numr<   r<   r=   reflect  s    z!reflection_pad2d.<locals>.reflectc                    s8   | ^ }}}| }|}g |||S rS   r<   )rV  r   rC   r   hleftrz  topr  r_  r<   r=   rQ     s   zreflection_pad2d.<locals>.fnr   )rq   r   rr   r*   r   r   rg   )rC   r  rh  botr<  rQ   r<   r{  r=   reflection_pad2d  s   r  c                    s   t |dksJ |\ | ^ }tjjd tjjd |  |  ^ }}} fdd}tj| 	 | 
 |t| dS )NrX   r   c                    s  | ^  }} fdddd | |	 | 	| }}d  | d 	 | }}d  f}d	 
 f}t ||}	t |	fddd	dfd
d	}
|
|||d	f |
|||
 d f |
||df| |
|| d f| |
|||df|d	f |
|||df|
 d f |
||| d f|d	f |
||| d f|
 d f S )Nc                    s   g  | |S rS   r<   rX  )r   grad_loaderr<   r=   load_from_output  re  z?reflection_pad2d_backward.<locals>.fn.<locals>.load_from_outputc                 S   sP   | \}}}t |tj}t |tj}t |tj}t t ||t ||S rS   )r6   r  rL   r,  r  r  r  le)Zindex_ranger   ZlbZubr<   r<   r=   index_range_condition  s
   
zDreflection_pad2d_backward.<locals>.fn.<locals>.index_range_conditionr   r   c                      s
    S rS   r<   r<   )center_xcenter_yr  r<   r=   r       
 z7reflection_pad2d_backward.<locals>.fn.<locals>.<lambda>r  c                    s   |d |d k }t |tr|rd S |}|d ur2|d |d k }t |tr*|r*d S t||}t| fddd}t|d S )Nr   r   c                      s
    S rS   r<   r<   )r  out_xout_yr<   r=   r    r  zKreflection_pad2d_backward.<locals>.fn.<locals>.accumulate.<locals>.<lambda>r  )rF   rm   r6   r  r  rK   )r  r  Zindex_range1Zindex_range2Zupper_less_than_lower1r:   Zupper_less_than_lower2g)gradr  r  )r  r  r=   r    s   z9reflection_pad2d_backward.<locals>.fn.<locals>.accumulater   rS   )r6   r  r  )rV  rC   r   Ztop_reflect_xZleft_reflect_yZbot_reflect_xZright_reflect_yZrange_cxZrange_cyr:   r  r  r  r|  r}  rh  r~  r  )r   r  r  r  r  r  r=   rQ     s0   """$z%reflection_pad2d_backward.<locals>.fnr   )rq   rr   r7   r   r   rt  r   r*   r   r   rg   rG   )grad_outputrC   r  r{   Zh_gradZw_gradrQ   r<   r  r=   reflection_pad2d_backward  s   B
r  c                    s:   |   |   fdd}tj|  |  |dS )Nc                    sF   t | } t| tksJ  D ]}| d | |  | |< q| S r0  )rG   rq   )rV  ru   r3  r-  r_  r<   r=   r  #  s
   zrev.<locals>.loaderr   )r   rr   r*   r   r   rg   )rC   r3  r  r<   r  r=   rev  s   r  c              	      st  t |d dks
J tdd |D rt| S |  }tttt|d d d |dd d  t |t   g  D ]\}}t|tj	rP|j
sPtjj|n|}||f q<t|d  }g t |d  D ]\\}}	}
|
 |t|
| |	  qmt |t |ksJ t|   fddfdd	}|  tj|  |  ||d
S )Nr   r   c                 s   r   r  r<   )rB   r7  r<   r<   r=   r   6  r   z"constant_pad_nd.<locals>.<genexpr>r   c                    s~   g }t  d  D ]\}\}}}|dkr|t|d |dkr+|t|| qttj|}t| fddS )Nr   c                      r  rS   r<   r<   )r   r_  r<   r=   r  X  r  z/constant_pad_nd.<locals>.mask.<locals>.<lambda>)	r   r~   range_mask_lowrange_mask_highr   r,  r6   r  r  )r   r  rV  rb  rc  r  )boundsr  
mask_sizesr3  r_  r   r=   r  P  s   "zconstant_pad_nd.<locals>.maskc                    sZ   t | d  }t| d   D ]\}\}}|||  qt|t| ks)J |S rS   )rG   r   r~   rq   )r   r  rV  rb  rc  )bounds_precompr  r3  r<   r=   	offset_fnZ  s
   z"constant_pad_nd.<locals>.offset_fnr   )rq   r   r	  rr   rG   r   r   rF   rh   r   	is_numberr7   r   r   Zlookup_precomputed_sizer~   r   r   rg   r   r*   r   r   )rC   r  r  r-  lr|  Z	l_precompr  rb  rc  rq  r  r<   )r  r  r  r  r  r3  r_  r=   constant_pad_nd3  s>   *


r  r   c                 C   s&   t t | tjt t|tjS rS   )r6   r  r  rL   r  rh   r   )r   rb  r<   r<   r=   r  j  s   r  c                 C   s    t t | tjt |tjS rS   )r6   r  r  rL   r  )r   rc  r<   r<   r=   r  q  s   r  c                 C   s   t t| |t| |S rS   )r6   r  r  r  )r   rc  rb  r<   r<   r=   
range_maskx  s   r  rZ  c              	      sX     ^ } rd ndrd nd f	dd}|S )Nr   r   c                    sr   | ^  t t   t	  }r+t | 
fddS t | fddS )Nc                      s   t g  S rS   )constant_boundary_condition_2dr<   )ihiwpad_fill_valueprefixrC   r<   r=   r    s    z>constant_boundary_condition_2d.<locals>.load.<locals>.<lambda>c                      s   g  S rS   r<   r<   )r  r  r  r_  r<   r=   r    rE   )r6   r  r  r  )r   r  	r  r|  r  r  Z	padding_hZ	padding_wr  rC   r_  )r  r  r  r=   r     s   	z,constant_boundary_condition_2d.<locals>.loadrr   r   )rC   r  r  r  r{   r   r<   r  r=   r    s   r  c                 C   s   t | d||   || d  || d  || }|r|t | d||   || d  d|| d   || }tjj|d ||  |  ||  dkra|d8 }tjjd|||  |  ||   tjj|| dkrztjj|| d}||fS |}||fS )Nr   r   r   F)r   r7   r   r   rQ  r  r   )rC   r   kernel_sizer|  r  	ceil_modeZx_outZx_altr<   r<   r=   pooling_size  s    ,0*$r  rj  c                    s  dkrddg|dkrddg}s t  d t dt dt |d}t| ts/J t dks7J tdks?J tdksGJ t|dksOJ t|  dv sYJ |   |  ^ }}t|d |\}}	td |\}
}d sd s|	s|rt| tdn| 	 t
|||
g } d  d  }|dkstdd |D rt|  ||S  fd	d
}tj|  |  tj|dd|d}tj|  tjtj|dd|d}||fS )Nr   r   r   rW   rX   z-inf   c                 s       | ]}|d kV  qdS r*  r<   rm  r<   r<   r=   r     r   z*max_pool2d_with_indices.<locals>.<genexpr>c                    s   | ^ }}}d }d }t t d t d D ]R\}}|d  | d  }|d  | d  }g |||}	|r]t| | tj}
|d u rR|
}ntt|	||
|}|d u rd|	}qt	|	|}q|ro|S |S r  )
r   productr   r6   r  rL   r  r'  gtrb  )rV  return_indexr  bhbwrQ  Zmaxindexr  r  r;  r   r  r  r|  r  r_  r<   r=   rQ     s$   $z#max_pool2d_with_indices.<locals>.fnF)r  r   T)r4   rF   r-   rq   rr   r;  r  r  r   r   rG   r    fallback_max_pool2d_with_indicesr*   r   r   rg   r   r   rL   r  )rC   r  r|  r  r  r  r<  r|  h_out
ceil_mode1w_out
ceil_mode2ra  window_sizerQ   r1r2r<   r  r=   max_pool2d_with_indices  sV   



r  c                    s  dkrddg|dkrddg}st |tsJ tdks#J tdks+J tdks3J t|dks;J t| dv sEJ |   z|  }W n tyZ   d }Y nw t |trt |jjtr|jj}	t	j
d t	j|	 |	 |	 d|	d}
|
  |
 }nz| }W n ty   d }Y nw |d ur|d dkp|d uo|d dk}tjptjptj}tdd |D s|r|st| ||||S |  | ^ }}
|  ^ }| |   t| }tfd	d
td d D tfdd
td d D 		 }|dkr)t| ||||S |  	
fdd}tj|  |  ||dS )Nr   r   r   r  )r   ra   rq  )r!  r  r   c                 s   r  r*  r<   rm  r<   r<   r=   r   C  r   z3max_pool2d_with_indices_backward.<locals>.<genexpr>c              	      8   g | ]}t |d   t d | d   d    dqS r   r   r  rB   r|  r  r|  r<   r=   rD   S      *z4max_pool2d_with_indices_backward.<locals>.<listcomp>c              	      8   g | ]}t |d   t d| d   d    d qS r   r   r  rB   r  r  r<   r=   rD   Y  r  r  c                    sV  | ^ }}}t |
 | tj}|d  }|d  }t t|d  d  d tj}t t|d  d  d tj}t t|d d tj}t t|d d tj}t |t dtj}t |t dtj}t |t tj}t |t tj}d }	tD ]}
t	D ]}t 	|t |
tj}t 	|t |tj}g |t j
t |t |t dtjd ddt j
t |t |t dtjd dd}|} |}t ||}|	d u rt ||t dtj}	qt t t ||t |||}t |t 	|	||	}	qq|	d us)J |	S )Nr   r   Fr  r   r  )r6   r  rL   r,  r   rb  r  rc  r   rK   r  r[  r  r'  r`  r  r  )rV  r  r|  r  Z
index_testphstartpwstartphendpwendgradientph_pw_phpwZ
grad_indexZindex_actualZ	grad_partr   r  r  h_window_sizer  Zindices_sizer  r  pooled_heightpooled_widthr|  w_window_sizewidthr<   r=   rQ   i  sl     


#z,max_pool2d_with_indices_backward.<locals>.fnr   )rF   r-   rq   rr   r;  r  AttributeErrorr   r*   r!   r  r1  r   rg   Zdecide_layoutr   Zcoordinate_descent_tuningZmax_autotuneZmax_autotune_pointwiser   )fallback_max_pool2d_with_indices_backwardr   rG   r  r   r   )r  rC   r  r|  r  r  r  r   Z	gO_strider   Zx_bufferZx_strideZis_channels_lastZautotuner<  heightr{   ra  r  rQ   r<   r  r=    max_pool2d_with_indices_backward  s   	

 ;r  c                    s(   |   ^ }}}|    fdd}|S )Nc              
      s   |\|\ |\}}t t t   tjt |tjt t  tjt |tj}t | fdddS )Nc                      s   g    S rS   r<   r<   )h_start_indexr  r  r  w_start_indexr_  r<   r=   r    r  z3pad_adaptive_loader.<locals>.load.<locals>.<lambda>r  )r6   r  r  r  rL   r  r  )r  Z
incrementsZstart_indicesZend_indicesh_end_indexw_end_indexr  r_  )r  r  r  r  r  r=   r     s$   z!pad_adaptive_loader.<locals>.loadr  )rC   r{   r|  r  r   r<   r  r=   pad_adaptive_loader  s   r  c                    s(   |\|\  fdd}|S )Nc                    s   | ^ }}}|} |}|}|}d }	t td td D ]\}
}|||
|g||g||g}|	d u r>|}	q&t||	}	q&|	S r  r   r  r   r6   rK   )rV  r  r  r  r  r  r  r  r  totalr  r  r;  Zh_end_index_fnZh_start_index_fnkernel_maxesZw_end_index_fnZw_start_index_fnr<   r=   fn_sum  s"   $z)_adaptive_pooling_idx_sum.<locals>.fn_sumr<   )r  Zstart_index_fnsZend_index_fnsr  r<   r  r=   _adaptive_pooling_idx_sum  s   r  c                    s  t tsJ t|dksJ    ^ }}}tjj|}tjj|}|\}}||kr9||kr9t	S |dksA|dkrTg |||}t
|  dS || dkrm|| dkrm|| || g}t|S t|| d |}	t|| d |}
t|||g } }dd }dd }tj|||d	}tj|||d	}tj|||d	}tj|||d	}|	|
 }|d
krt|S t|	|
g||g||g tt fdd}tj |||d}|S )Nr   r   rZ  r   c                 S      t | | |S rS   r   r   out_diminp_dimr<   r<   r=   start_index
     z)_adaptive_avg_pool2d.<locals>.start_indexc                 S   s   t | d | | d |S r0  r  r  r<   r<   r=   	end_index  r  z'_adaptive_avg_pool2d.<locals>.end_indexr  r  r  c                    s   t  | t | S rS   )r6   truedivr  r  r  ones_loaderrC   r<   r=   rQ   #  s   z _adaptive_avg_pool2d.<locals>.fnr   )rF   r-   rq   r;  rr   r7   r   r   rt  r	  r`  rg   r   
avg_pool2dr0   rG   r   r   fallback_adaptive_avg_pool2dr  r  	ones_liker*   r   )rC   r  r<  Zh_inZw_inr  r  Zo_sizer  h_kernel_maxw_kernel_maxra  ra   r  r  r  r  r  r  r  rQ   rvr<   r  r=   _adaptive_avg_pool2d  sT   

r  c                    s"      ^ }}}tjj|}tjj|}|^ }}}	|| dkr9||	 dkr9t|| ||	 gddS t||}
t||	}dd fdd}tj	||d}tj	|||d}tj	|	|d}tj	||	|d}t
|
|g||g||g  fd	d
}tj  |t|d}|S )Nr   r   )divisor_overridec                 S   r  rS   )r   r  r<   r<   r=   r  D  r  z0upsample_nearest2d_backward.<locals>.start_indexc                    s    | d ||S r0  r<   r  )r  r<   r=   r  G  r  z.upsample_nearest2d_backward.<locals>.end_indexr  c                    s    | t S rS   )r  r  )r  rC   r<   r=   rQ   V  r  z'upsample_nearest2d_backward.<locals>.fnr   )r;  rr   r7   r   r   rt  r  r0   r   r   r  r*   r   r   rg   rG   )rC   r  Z
input_sizerD  rE  r<  Zinp_hZinp_wZout_hZout_wr  r  r  r  r  r  r  rQ   r  r<   )r  r  rC   r=   upsample_nearest2d_backward2  s8   

r  r<   c                    s  ss
ddgt dt dt dt| ts J tdks(J tdks0J tdks8J t|  dv sBJ |   |  ^ }}}	t|d|\}
}t|	d|\}}d spd sp|sp|rxt| dd}n|  d}t	||
|g }| 
  d d  }|dkrt| |||S fd	d
|r|r|rd| n
dd d    fdd}ntt| d|rЈnd fdd}tj|   ||d}|S )Nr   r   r  r   r  TFr  c           	         s   | ^ }}}d }t t d t d D ]3\}}|d  | d  }|d  | d  }|g |||}|d u rC|}qt||}q|S r  r  )	rV  r  r  r  r  r  r  r  r;  )r  r  r|  r<   r=   r    s   $zavg_pool2d.<locals>.fn_sumrZ  c                    s   t | t  S rS   )r6   r   r  r  )ra   r  r7  r_  r<   r=   rQ     r  zavg_pool2d.<locals>.fnc                    s   t  |  | S rS   r6   r  r  )r  r  r_  r<   r=   rQ     r  r   )r4   rF   r-   rq   rr   r;  r  r  r   rG   rg   fallback_avg_pool2dr  r*   r   r   )rC   r  r|  r  r  count_include_padr  r<  r|  r  r  r  r  r  had_paddingra  r  rQ   r  r<   )ra   r  r  r  r  r7  r|  r_  r=   r  h  sf   






r  c                    s  d u sdksJ dssddgt | tsJ t |ts$J tdks,J tdks4J tdks<J t| dv sFJ |   | ^ }td|\}	}
td|\}}|  d pwd pw|
pw||  ^ }	
t| }| }t	fddt
d d D t	fddt
d d D  }|d	krt| ||S fd
d  	
fdd}tj|  |||d}|S )Nr   zdivisor must be not zeror   r  r   c              	      r  r  r  r  r  r<   r=   rD     r  z'avg_pool2d_backward.<locals>.<listcomp>c              	      r  r  r  r  r  r<   r=   rD     r  r  c              	      sX  t d tj}t d tj}t d tj}t d tj}t d tj}t d tj}t t | ||}t t |||}	t t ||t t  tj|}
t t |	|t t tj|}t 	|t dtj}t 	|	t dtj}	t |
t  tj}
t |t tj}t t |
|t ||	}|S )z{
        This computes the scaling factor that we will divide an element
        by when `count_include_pad=False`
        r   r   )
r6   r  rL   r,  r[  r   rc  rK   r  rb  )r  r  Zstride_hZstride_wZpad_hZpad_wZkernel_hZkernel_wZhstartZwstartZhendZwendZdivide_factor)r  r  r  r|  r  r<   r=   !compute_pool_size_without_padding  s,   

z>avg_pool2d_backward.<locals>.compute_pool_size_without_paddingc                    sR  | ^ }}}|d  }|d  }t t|d  
d  
d tj}t t|d  
d  
d tj}t t|
d d tj}t t|
d d tj}t |t dtj}t |t dtj}t |t tj}t |t 	tj}d }tD ]}	tD ]}
t 	|t |	tj}t 	|t |
tj}d ur}nssd d  }n ||}t 
g |t jt |t |t dtjddt jt |t |t dtj	dd|}t t ||t ||}|d u rt ||t dtj}qt |t 	|||}qq|d us'J |S )Nr   r   Fr  r  )r6   r  r   rL   r,  rb  r  rc  r   rK   r  r  r[  r  r  r'  r`  )rV  r  r|  r  r  r  r  r  r  r  r  r  r  r7  partr  )r  r  r  r  r  r  r  r  r  r  r|  r  r<   r=   rQ   )  sv     
	


*zavg_pool2d_backward.<locals>.fnr   )rF   r-   rq   rr   r;  r  r   rG   rg   r  r   fallback_avg_pool2d_backwardr*   r   r   )r  rC   r  r|  r  r  r  r  r<  r  r  r  r  r{   ra  ra   r  rQ   r  r<   )r  r  r  r  r  r  r  r  r  r  r  r|  r  r  r=   avg_pool2d_backward  sf   "Ar  c                 C   s   |   }t|tr|g}n|stt|}t|dkr*t|dv s(J d| g S t|}tt|D ]5}|| dk rL||  t|rHt|nd7  < d||   krZt|k sin t|dkrg|| dksiJ q4tt|t|ksxJ d|S )Nr   )r<   r   r  zinvalid axis: r   zreduction axis not unique)rr   rF   rb   r   rq   rH   rG   rI   )rC   axisrq  r   r<   r<   r=   _validate_reduction_axiss  s    
 :r  c          
         s   |d ur	t | |} |  tt| |}g }g g }g ttD ]}||v r5| ||  q"| ||  q" fdd}r_t}	D ]	}t	d|	|< qTn|}	| 
  t|  |pn|  |  ||	|dS )Nc                    s   t |t ks
J rt  t ksJ  fddD  t  t ks)J d gt  t |  }tt t|D ]\}}|||< q@|S )Nc                    r   r<   r<   r   r   r<   r=   rD     rE   z9_make_reduction_inner.<locals>.loader.<locals>.<listcomp>)rq   r   chainr   )r   Zreduction_indexr  rV  varZinner_loaderkeepdimsZkept_idxZreduced_idxrq  r   r=   r    s   
z%_make_reduction_inner.<locals>.loaderr   )r   	dst_dtyper  r   r   reduction_ranges)r   rr   rI   r  r   rq   r~   rG   rh   r   r   dictr   rg   )
rC   r  r  ra   r   Z
kept_sizesZreduced_sizesr   r  ra  r<   r  r=   _make_reduction_inner  s<   



r  r+  c                    s   dd d fdd}|S )NFrd   c                   sB   t | ||| d}tjd| d|}t|jjtr|  |S )Nr  r  ra   r   )r+  Z
input_noder<   )r  r+   r   rF   r   r   )rC   r  r  ra   r   r  r   r+  r<   r=   r     s   zmake_reduction.<locals>.innerNFr<   )r+  r   r   r<   r	  r=   make_reduction  s   r  rd   c                   s   |d ur	t | |} |   t| |}|  }|tjtjfv r$t | tj} t| ||}t	 fdd|D }t
||  |  }t|t| }t t|||S )Nc                 3       | ]} | V  qd S rS   r<   r   rq  r<   r=   r     r   zmean.<locals>.<genexpr>)r   rr   r  rg   rL   float16r  r   sum_r5   r!   r&   r   r%   r   rG   div)rC   r  keepdimra   r  
sum_resultdenomr<   r  r=   r-    s   

r-  c           
         s   |d u rd}|    t| |}t| |dd}|r|  tt| |}t|||}t fdd|D }|r>t	|| d}t
||  |  }t|t|  }t||}	|s\|	S |r`|nt||}|	|fS )Nr   T)r  c                 3   r  rS   r<   r   r  r<   r=   r     r   z var_mean_sum_.<locals>.<genexpr>r   )rr   r  r-  r   squarer[  r  r5   rh   ZMaxr!   r&   rg   r   r%   r   rG   r  r7  )
rC   r  
correctionr  return_meanZx_meanZdiffsr  r  Zx_varr<   r  r=   var_mean_sum_  s&   

r  c                 C   sV   t | |}t| ||d d d}|d }t|d }t|tjo*t|tjk o*t|dkS )Nr  r   r  r   )	r  r  r5   rF   rh   r   rb   r   Zunroll_reductions_threshold)rC   r  r  r   r   Zreduction_numelr<   r<   r=   use_two_step_variance  s   


r  c                   s    d u rd t | ||d d d}|d}|d |d tjjd|fd|  d|\}}}	|  |  |  t| |}t	fdd	|D d
d  fdd}
t
|
|}|rj|  ||fS |S )Nr   r  r   r  r  Zwelford_reduce)Z	inner_fnsr+  ra   c                 3   r  rS   r<   r   r  r<   r=   r     r   z$var_mean_welford_.<locals>.<genexpr>c                 S   s4   t | tjr| jstt| tj|S t	| |S rS   )
rF   rh   r   r  r6   r   r  rL   r  r  r  r<   r<   r=   get_constant_or_index_expr  s   z5var_mean_welford_.<locals>.get_constant_or_index_exprc                    s4    }}t d}| t |||  S r   )r6   r  rb  )r   ri  rv  zero)r  ra   r  rnumelr<   r=   r8  !  s   

z#var_mean_welford_.<locals>.scale_fnr<   )r  rJ  r!   ZWelfordReductionr   rg   r   rr   r  r5   r   )rC   r  r  r  r  r   r  r-  m2r{   r8  r  r<   )r  ra   r  r  rq  r=   var_mean_welford_  s6   




r  )r  r  c                C   2   t | ||drt| |||ddS t| |||ddS )Nr  r  Fr  r  r  r  r  r  r  rC   r  r  r  r<   r<   r=   var_/     

r#  c                C   r  )Nr  Tr   r!  r"  r<   r<   r=   var_mean;  r$  r%  c                 C   st   |dk rt t| | |S |dkrtd|S |dkr| S t | |d |}t||}|d dkr8t|| }|S )Nr   r   r   )pow_recursiver6   
reciprocalr  r   )rC   r   ra   r  r<   r<   r=   r&  G  s   r&  c                 C      t | |S rS   )r6   powr   r   r<   r<   r=   
pow_nativeV  r  r+  )r   c                    sV  t trtkrt tS t trdkrt S t tr,dkr,t S tdd  fD }t|}t toQd  k oIdk n  pQ|oQdk}|ro   fdd	}t	j
    |  d
S t  tr dkr}tdS  dkrt rtS |rt  trt S t trt S t S t S )Nr6  r   c                 s   s$    | ]}t |tjr| V  qd S rS   )rF   r!   r-   rg   rA   r<   r<   r=   r   n  r[  zpow.<locals>.<genexpr>i    r   c                    s   t |   S rS   )r&  rg   r  r   r   r  r<   r=   rQ   x  re  zpow.<locals>.fnr   r   )rF   r   rb   r)  sqrtr	  r   r   r   r*   r   r   rg   rr   r   rA  r   exp2fallback_pow_scalarfallback_pow_tensor_scalarfallback_pow_tensor_tensorr+  )r   r   ra   Zis_integer_powZembed_exponentrQ   r<   r-  r=   r)  d  s@   
"







r)  c                 C   s   t | tr	| j}n| }t |tr|j}t |tjs3tj|  |  |	 | 
 dj}t |tjs3J t |tjrN| sNt |jtjsN|  |j|_| S tjj|||d | S )Nr   unsafe_alias)rF   r-   r   r!   r  r*   r   r   rg   r   rr   r  Z	NopKernelr   r  Zrealize_into)changedr;  r4  Zchanged_datar<   r<   r=   r    s2   

r  c                 C   s   t | t| |S rS   )r  rA  )rC   r  r<   r<   r=   fill_  rD  r6  c                 C   s4   t ||  }t||  }t||  }t| |S rS   )r  r   r   rg   r   rr   r  )dstr  r  r<   r<   r=   r    s   
r  c                 C   r(  rS   )r6   floordivr*  r<   r<   r=   r8    r  r8  c                 C   r(  rS   )r6   truncdivr*  r<   r<   r=   r9    r  r9  c                 C   s   t | ot |}t| ot|}|dkr(|rJ d|r!t| |S tt| |S |dkr@|r2J d|r9t| |S tt| |S t| |S )NrI  z5floordiv operands can not be boolean at the same timerK  z5truncdiv operands can not be boolean at the same time)rl   rn   r8  rI  r  r9  rK  )r   r   Zrounding_modeZboth_integerZboth_booleanr<   r<   r=   div_mode  s   
r:  c                 C   s8   t | ot |}|rt| |S ttjj}t|| |S rS   )rn   logical_andr(   r  r   r  r   )r   r   Z	both_boolrQ   r<   r<   r=   r     s
   
r   c                 C   s:   t dd | |fD }|rt| |S dd }t|| |S )Nc                 s   s     | ]}t |pt|V  qd S rS   )rn   rl   rA   r<   r<   r=   r     rP  zdiv_prim.<locals>.<genexpr>c                  W   r&  rS   r  r   r<   r<   r=   rQ     r(  zdiv_prim.<locals>.fn)r   r9  r   r   r   Zis_integralrQ   r<   r<   r=   div_prim  s
   
r=  c                 C   s4   t | pt| }|rdd }ndd }t|| |S )Nc                 S   r(  rS   )r6   modr*  r<   r<   r=   rQ     r  zfmod.<locals>.fnc                 S   r(  rS   )r6   fmodr*  r<   r<   r=   rQ     r  )rn   rl   r   r<  r<   r<   r=   r?    s
   
r?  c                 C   s:   |   }t|st|rt| t } dd }t|| S )Nc                 S   s
   t | S rS   )r6   rsqrtrk   r<   r<   r=   _rsqrt  r(  zrsqrt.<locals>._rsqrt)rg   r   r   r   rL   r  r   )rC   ra   rA  r<   r<   r=   r@  
  s
   r@  c                C   B   t |  st|  r|d u rtj}td|d}|| |||dS )Nr  r  rd   r   rg   r   rL   r  r  rC   r  r  ra   rQ   r<   r<   r=   r       

r  c                C   rB  )Nr%  r  rd   rC  rD  r<   r<   r=   r%  !  rE  r%  c                 C   s   t | tj} td| ||dS )Nr   r  r  )r   rL   rm   r  rC   ru   r  r<   r<   r=   
reduce_any,  s   rH  c                 C   2   |d urt | ||dt| ||dfS t | d |dS NrF  )reduce_amaxreduce_argmaxrG  r<   r<   r=   
reduce_max2  
   rM  c                 C   rI  rJ  )reduce_aminreduce_argminrG  r<   r<   r=   
reduce_min=  rN  rQ  xor_sumr  r  argmaxr  argmin
logical_or)r   r   c                 C   r  r  r#  r   INT_TO_FLOATr   r<   r<   r=   register_pointwise_numericW  s   rY  c                 C   s   t | tjddS )NT)ro   r"  rV  rX  r<   r<   r=    register_pointwise_numeric_ldf64]  s
   rZ  r$  logical_not)r   )ro   r   r   identityc                    s,   | t |< t|   fdd}t| | d S )Nc                     sB    | i |}g }t | d |D ]\}}|t||dd q|S )Nr   Tr3  )r   r~   r  )rz   r   resultsZmut_resultsrx   r  outplace_opr<   r=   rQ     s
   z$register_foreach_inplace.<locals>.fn)inplaceable_foreach_opsr   rK   r   )aten_opZoutplace_aten_opr_  rQ   r<   r^  r=   register_foreach_inplace  s   
rb  c                    s   t | d d fdd}|S )Nr  c                     s.    | i |}t || d  }t| d |S r   )r   rg   r  )rz   r   r  r^  r<   r=   rQ     s   zregister_inplace.<locals>.fn)r   )ra  r_  rQ   r<   r^  r=   register_inplace  s   
rc  c                 C   s"   t jj }| |jjjv sJ | S rS   )rL   Z_guardsZTracingContextrH  Z	fake_moderN  Zvar_to_range)r   r  r  Ztracing_contextr<   r<   r=   sym_constrain_range-  s   rd  c                 C   &   t jjjd }t|tjsJ |jjS ru  	r7   r   r   r  rF   rL   ZSymIntr   r  r   ru   r;  r<   r<   r=   sym_size4  s   rh  c                 C   re  ru  rf  rg  r<   r<   r=   
sym_strideE  s   ri  c                 C   s   |   S rS   )r  rn  r<   r<   r=   	sym_numelM  rW  rj  c                 O   rU  )NzHelpful for debuggingr8   )r  rz   r   r<   r<   r=   foobarV  rW  rk  c                 C   s   |    t| S rS   )r   r	  rk   r<   r<   r=   _realize[  s   rl  c                 C   s    |    |   t| | | S rS   )r   r!   ZAccumulateGrad)variableZnew_gradr<   r<   r=   accumulate_grad_a  s   rn  c                 C   s"   t j| ||d dd | D S )N)
kernel_idxgridZkernel_argsc                 S   s    i | ]\}}t |tr||qS r<   r   )rB   r  r;  r<   r<   r=   r   m  r  z'triton_kernel_wrap_.<locals>.<dictcomp>)r!   ZUserDefinedTritonKernelr   ro  rp  r   r<   r<   r=   triton_kernel_wrap_j  s   rr  c                    s$    fdd|  D }t| ||dS )Nc                    s&   i | ]\}}|| v rt |n|qS r<   r	  )rB   r  rC   tensors_to_cloner<   r=   r   r  s    z&triton_kernel_wrap.<locals>.<dictcomp>rq  )r   rr  )ro  rp  r   ru  r<   rt  r=   triton_kernel_wrapp  s   
rv  c                 C   s   t tj| S rS   )r-   r   r!   ZWait)r   r<   r<   r=   wait}  r  rw  c                 C      t j| ||||S rS   )r!   Z	Broadcastr   )r   r  tagranks
group_sizer<   r<   r=   r     r	  r   c                 C   rx  rS   )r!   Z	AllReducer   r   	reduce_opry  rz  r{  r<   r<   r=   	allreduce  r	  r~  c                 C   s    t tjtj| |||S rS   )r-   r   r!   ZAllGatherIntoTensorrQ  rt  )Zshardry  rz  r{  r<   r<   r=   all_gather_into_tensor  s
   r  c              	   C   r  rS   )r-   r   r!   ZReduceScatterTensorr|  r<   r<   r=   reduce_scatter_tensor  s   r  c                 C   rx  rS   )r!   ZAllReduceCoalescedr   r|  r<   r<   r=   all_reduce_coalesced  r	  r  c                 C   s"   t j| |||}tttj|S rS   )r!   ZAllGatherIntoTensorCoalescedr   rG   mapr-   )r  ry  rz  r{  r  r<   r<   r=    all_gather_into_tensor_coalesced  s   r  c                 C   s$   t j| ||||}tttj|S rS   )r!   ZReduceScatterTensorCoalescedr   rG   r  r-   )r  ZreduceOpry  rz  r{  r  r<   r<   r=   reduce_scatter_tensor_coalesced  s   
r  c              
   C   r  rS   )r-   r   r!   ZAllToAllSingle)r  Zoutput_split_sizesZinput_split_sizesry  rz  r{  r<   r<   r=   all_to_all_single  s
   r  c                 C   s"   t | } tjtjj| || | S rS   )r	  r!   _CollectiveKernelcreate_inplace_c10d_functionalall_reduce_r  rt   r}  
group_namer<   r<   r=   _all_reduce  s
   r  c                 C      t jtjj| || | S rS   )r!   r  r  r  r  r  r  r<   r<   r=   _all_reduce_  s   r  c                 C   s(   dd | D } t jtjj| || | S )Nc                 S   r?   r<   rs  r  r<   r<   r=   rD     rE   z)_all_reduce_coalesced.<locals>.<listcomp>r!   r  r  r  all_reduce_coalesced_r  r   r}  r  r<   r<   r=   _all_reduce_coalesced  s   r  c                 C   r  rS   r  r  r<   r<   r=   _all_reduce_coalesced_  s   r  c                 C   s   t jt jtjj| ||S rS   )r!   r-   r   r  create_out_of_placer  r  r  )rt   r{  r  r<   r<   r=   _all_gather_into_tensor  s   r  c              	   C   s"   t tjjtjtjj	| ||S rS   )
r  r  r!   r-   r   r  r  r  r  r  )r   r{  r  r<   r<   r=   !_all_gather_into_tensor_coalesced  s   r  c              	   C   s    t jt jtjj| |||S rS   )r!   r-   r   r  r  r  r  r  )rt   r}  r{  r  r<   r<   r=   _reduce_scatter_tensor  s   r  c              
   C   s$   t tjjtjtjj	| |||S rS   )
r  r  r!   r-   r   r  r  r  r  r  )r   r}  r{  r  r<   r<   r=    _reduce_scatter_tensor_coalesced  s   r  c                 C   s   t jtjj|  | S rS   )r!   Z_WaitKernelZcreate_waitr  wait_tensorr  )rt   r<   r<   r=   _wait_tensor  s   r  zRInductor support for distributed collectives depends on building torch.distributedr  )quantized_loweringsrS   )NNNNF)F)r   r   rd  r   r  )r   r   r   r  )Trf   )r   NNr   )NN)r   FF)r   F)NNN)NrZ  )Nr   r   F)NNNN)r<   r   FTNr
  (  r   r   loggingr!  r
  collectionsr   collections.abcr   typingr   r   r   r   r   r	   r
   rh   rL   Ztorch.fxZtorch.utils._pytreeutilsZ_pytreer  Z*torch._higher_order_ops.triton_kernel_wrapr   r   Ztorch._prims_commonr   r   r   r   r   r   r   r   r   r   Ztorch.fx.experimental.sym_noder   r   Ztorch.utils._sympy.functionsr   r   r   Z_dynamo.utilsr    r   r    r!   r"   decompositionr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   Zvirtualizedr6   r7   	getLoggerr  r%  r}   rT   rI   r   r  Ztr_c10dr   rJ   r   r   r  r`  r>   r@   rV   r  r  r  ZbmmZconvolutionZconvolution_backwardr  r  mmrG  rH  Zupsample_bicubic2dZ_int_mmr  Zint8Zint16r,  r  r  r`  r   Z	complex32Z	complex64rm   r  rc   rb   re   rl   rn   r|   r   r   r   r   r   r   r   r   r   r  ra   r   Zconvert_element_typer  r  r  r  r   r  Z
device_putr  r#  r%  r'  r   aliasdetachZdetach_ZliftZview_ofr.  rp   r/  r7  r8  r<  r?  rD  rF  rI  rJ  rK  r   rW  rY  rb  Z_unsafe_viewZreshaperc  slicerl  rs  rH   r  r  r  r  r  r  r  r  r  r  r  r  rS  r  re  r  r  r  	lru_cacher  r  r  r  r  r  r  r  r(  r+  Zrngprimsr5  r6  r;  Z	bernoullir7  r<  r?  rB  rC  r-  r  rK  rG  rI  rO  rN  rM  randintZforce_stride_orderrT  r4  rV  r\  rY  Zlookup_seedr]  randomr  ra  re  rk  rr  rt  r  r  ZFALLBACK_ALLOW_LISTZ_adaptive_avg_pool2d_backwardZ
_cudnn_rnnZ_cudnn_rnn_backwardZcumsumZcumprodZ_embedding_bagZ_embedding_bag_forward_onlyZ_fused_moving_avg_obs_fq_helperZ*_fused_moving_avg_obs_fq_helper_functionalZgrid_sampler_2d_backwardZrandpermr  Z'_scaled_dot_product_efficient_attentionr  Z#_scaled_dot_product_flash_attentionr  Z_flash_attention_forwardZ_flash_attention_backwardZ_efficient_attention_forwardZ_efficient_attention_backwardsortZstableZ(_sparse_coo_tensor_with_dims_and_tensorsZ_thnn_fused_lstm_cellZtopkZupsample_bicubic2d_backwardZ
_scaled_mmr  Zupsample_linear1dZupsample_trilinear3dZupsample_linear1d_backwardZupsample_trilinear3d_backwardZ_adaptive_avg_pool3dZadaptive_max_pool2dZadaptive_max_pool3dZaddbmmZaddmvZ_addmm_activationZ
avg_pool3dZ
block_diagZ_cdist_forwardZcummaxZcumminZdigammaZ_efficientzerotensorZ*_embedding_bag_per_sample_weights_backwardZfractional_max_pool2dZfractional_max_pool3dfrexpZgeqrfZhistcZi0ZigammaZigammacisinZkthvalueZlinalg_cholesky_exZlinalg_crossZ_linalg_detZlinalg_householder_productZlinalg_inv_exZlinalg_ldl_factor_exZlinalg_ldl_solveZ	linalg_luZlinalg_lu_factor_exZlinalg_lu_solveZlinalg_matrix_expZ	linalg_qrZ_linalg_slogdetZ_linalg_solve_exZlinalg_solve_triangularZ_linalg_svdZlogcumsumexpZ	lu_unpackZmax_pool3d_with_indicesZmax_unpool2dZmax_unpool3dZmedianr  Z	nanmedianZormqrZ_pdist_forwardZpixel_shuffleZpixel_unshuffleZ	polygammaputZreflection_pad1dZreplication_pad1dresizeZresize_Z	resize_asZ
resize_as_ZsearchsortedZspecial_airy_aiZspecial_bessel_j0Zspecial_bessel_j1Zspecial_bessel_y0Zspecial_bessel_y1Zspecial_chebyshev_polynomial_tZspecial_chebyshev_polynomial_uZspecial_erfcxZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_i0eZ
special_i1Zspecial_i1eZspecial_laguerre_polynomial_lZspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Zspecial_ndtriZ!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_spherical_bessel_j0Zspecial_zetaZtakeZ
_trilinearuniformZ_adaptive_avg_pool3d_backwardZadaptive_max_pool2d_backwardZadaptive_max_pool3d_backwardZavg_pool3d_backwardZ_cdist_backwardZ_embedding_bag_dense_backwardZfractional_max_pool2d_backwardZfractional_max_pool3d_backwardZ_linalg_check_errorsZ max_pool3d_with_indices_backwardZ_pdist_backwardZreflection_pad1d_backwardZreplication_pad1d_backwardZreplication_pad2d_backwardZsoft_margin_loss_backwardZlinalg_pinvZatol_rtol_tensorZsegment_reduceZ_segment_reduce_backwardZangleZcholesky_inverseZcholesky_solveZ_fft_r2cZ	histogramZbin_ctZ_histogramdd_bin_edgesZ_histogramdd_from_bin_ctsZindex_reduceZmasked_scatterZmasked_scatter_backwardZ	to_sparseZ
_to_sparseZtriangular_solvegcdZ_linalg_eighrs   r  ZnonzeroZ_primsZ	rng_primsZrun_and_save_rng_stateZrun_with_rng_stateZexponentialr
  r	  r  r  r  r  r  r*  Zscalar_tensorr  Z
LongTensorr  r  r  rA  r  r`  r  r)  Z
empty_liker  Z
zeros_liker  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r"  r  r*  r(  r,  r&  r   r=  rA  rC  rK  rL  rO  rx  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r-  r  r  r  r  r#  r%  r&  r+  r)  ZTensor_Tensorr2  ZScalarr0  ZTensor_Scalarr1  r  r6  r  r8  r9  r  r:  r   r=  Ztrue_dividerm  rW  r?  r@  r  r  r%  r   rH  r  rM  r  rQ  rR  r.  rK  r/  rO  rS  rL  rT  rP  rK   rY  rZ  expr/  expm1Zrelur  r.  r  r[  cossinry  Zbitwise_andZbitwise_left_shiftZbitwise_notZ
bitwise_orZbitwise_right_shiftZbitwise_xorlgammaerfZspecial_erflog1ptantanhr;  r[  rU  logical_xorrb  rc  Z	clamp_minZ	clamp_maxnegr'  	remaindersignZsignbitZ	_neg_viewr  r  r  r  r  necoshsinhacosacoshasinasinhatan2atanatanhcopysignerfcZerfinvhypotlog10	nextafterZ_foreach_addZforeach_add_listZforeach_add_scalarZ_foreach_mulZforeach_mul_listZforeach_mul_scalarZ_foreach_subZ_foreach_negZ_foreach_absZ_foreach_powZScalarAndTensorZ_foreach_divZforeach_div_listZforeach_div_scalarZ_foreach_sqrtZ_foreach_maximumZ_foreach_minimumZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_reciprocalZ_foreach_signZ_foreach_copyrb  Z_foreach_add_Z_foreach_mul_Z_foreach_div_rc  Zadd_Zbitwise_and_Zbitwise_left_shift_Zbitwise_not_Zbitwise_or_Zbitwise_right_shift_Zbitwise_xor_Zmul_Zdiv_ZTensor_modeZlogical_and_Zlogical_not_Zlogical_or_Zlogical_xor_Zsub_Zrelu_Zsigmoid___and__
__lshift____or__
__rshift____xor____iand____ilshift____ior____irshift____ixor__rd  rh  ri  rj  r   methodfuncrk  Z_inductor_testr   rl  Zinductorrn  rr  rv  Z)torch.distributed._functional_collectivesZc10d_functionalr  rw  r   Z
all_reducer~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ImportErrorr@  r  r  Zregister_quantized_opsr<   r<   r<   r=   <module>   sv   $04 
		"4
'8I
-














1
:F3,
	


		  Y
%/
&


	




	
	


&.


?



I
8







G&

Z
!$($h/""




l
T*6"*
P
 


E0
]
	 (1
+


/























	
	

