o
    I&i                 -   @   s+  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZmZmZmZm Z m!Z!m"Z" d d
l#m$Z$m%Z%m&Z&m'Z'm(Z( d dl)m*Z*m+Z+ d dl,m-Z. ej/j0Z0ej12dddZ3dd Z4defddZ5dd Z6dd Z7e4e0j8e0j9ge( dddej:ddfddZ;e4e0j<j=e0j<j>ge( dd Z?e4e0j@j=e0j@j>ge( ddd d!Z@e4e0jAe( d"d# ZAe4e0jBj=e0jBj>e0jCj=e0jCj>ge(d$d%d&d' ZDe4e0jEj=e0jEj>ge( d(d) ZEd*d+ ZFe4e0jGj=e0jGj>ge( d,d- ZHe4e0jIj=e0jIj>ge( d.d/ ZJe4e0jKjLdd0d1d2ZMe4e0jKj=ejNdddd3d4d5ZOe4e0jPj=ejNdddd3d6d7ZQe4e0jPjRejNdddd3d8d9ZSe4e0jTj=ddddd3d:d;ZUe4e0jVj=e0jVj>ge( d<d= ZWe4e0jXj=dd>d?ZYd@dA ZZe4e0j[j=dBdC Z\e4e0j]		ddDedEedFedGee dHee^ f
dIdJZ_e4e0j`				ddKejdLejdGee dMee dNeeja dOebfdPdQZce4e0jdj=dRdSdTedUeedVedWejdXe^dYebdZefd[d\Zfe4e0jgj=dRdSdTedUeedVedWejdXe^dYebdZefd]d^Zhe( e4e0jij=d_d` Zje4e0jkj=dddd dddadbedXe^dcee d%ee ddee deeedfebdZefdgdhZle4e0jmj=e0jmjnge( didj Zoe4e0jmjpddkdlZqe4e0jrj=e0jrjnge( dmdn Zse4e0jrjpddodpZte4e0juj=dqdr Zve4e0juj>dsdt Zwe4e0jxj=dudv Zye4e0jxjzdwdx Z{e4e0j|j=ddddddydzd{Z}e4e0j~j=dd|d}Z~e4e0jj=dd~dZe4e0jj=dddZe4e0jj=dd Ze4e0jjzdd ZdTede^fddZdTedede^fddZ	Rddede^debfddZddede^de^fddZdededebde^fddZ	dde^dedDede^fddZde^fddZe4e0jj=e0jjge(dd		Rddede^debfddZdedZefddZe4e0je( dTededebdZefddZe4e0je( ddTededebdZefddZe4e0je( ddTedebdZefddZe4e0je( ddTedebdZefddZe4e0jj=ddedebdebfddZe4e0jj=e0jj>ge( dDededZefddZe4e0jj=ddedebfddZe4e0jj=e0jj>ge(dddddddTedebdebdZe	eeef fddZe4e0jj=e0jj>ge( dddedededebdZef
ddÄZe4e0jj=e0jj>ge(dddŃdRdƜdedebdZe	eeef fddɄZe4e0jj=e0jj>ge(ddddRdd˜dedebdebdZe	eeef fdd̈́Ze4e0jj=e0jj>ge( dRddΜdedededebdebdZefddфZe4e0je(dddŃ	R	RddededebdebdZe	eeef f
ddՄZde^dZe	ebebf fdd؄Ze4e0jj=e0jj>ge(ddڃ	ېddede^dZe	eef fdd݄Ze4e0jj=e0jjge(dddddedZe	eeeef fddZe4e0jj=		R	ddedebdebdee^ fddZdededZe	eee eee f fddZdededee^ dZe	eef fddZdDededZebfddZe4e0jdRdddddddededebdebdee dee dee dee dZe	eeeef fddZe4e0jj=e0jj>gdRddddededebdebdebdee dZefddZe4e0je(dd	R		ddTededebdebdebdZe	eef fddZe4e0jj=dd Ze4e0je( 	R	ddDedededebdebdZefdd Zdd Zdd Ze4e0je( dd Ze4e0je( dd Zd	d
 Ze4e0je(ddd Ze4e0je(ddd Zdd Ze4e0jÃe( dd Ze4e0jŃe( dd Ze4e0jj=e0jje0jj=e0jjge(ddd Zʐdd Ze4e0j̃e( dd Ze4e0j΃e( dd Ze4e0jj=e0jje0jj=e0jjge(ddd Ze4e0jӃe( ddTed!edZefd"d#Ze4e0jփe( d$edTed!eԐd%edZef
d&d'Ze4e0jj=e0jj>ge( d(d(d)d*d+Ze4e0jj=e0jj>ge( dd0d,d-Ze4e0jjԃdd/d0Ze4e0jjރdd1d2Ze4e0jj=		dِd3d4Ze4e0je( d5d6 Zd7d8 Zdd:d;Z	dd<ejdEejd=e
eee eef d>e
eee eef d?e
eee eef d@ebdAeedBee
eee eef  fdCdDZdEdF Ze4e0jj=d<ejdEejdGejd=eee d>eee d?eee d@ebdBeee dAeefdGdHZejjrej12dIddZe4ej/jjj=dJdK Ze4ej/jjj=dLdM ZejjrOej12dNddZe4ej/jjdOdP Zej12dQddZe4ej/jjj=dRdS Ze4ej/jjj=dTdU Zej12dVddZe4ej/jj	W	X	Y	ddZd[Z d\d] Ze4e0jj=	W	X		R	dd^d_Zd`da Ze4e0jj=dbdc Ze4e0je( 	W	X		R	ddddeZe4e0j	e(ddfdg Z
e4e0jj=dhdi Ze4e0jj=djdk Ze4e0jj=dldm Ze4e0je(ddndo Zdpede^fdqdrZe4e0je(dd%dsdt Ze4e0je(ddudv Ze4e0je(dd%dwdx Ze4e0je(ddydz Ze4e0jjdd{d|Ze4e0jj=e0jj>ge( d}d~ Ze4e0j j=e0j j>ge( dddeedeefddZ e4e0j!je0j"jgdd Z#e4e0j$j=gdd Z%e4e0j&j=e0j&j>ge( d(d(d)ddZ'dd Z(e(g e0j)e0j*e0j+e0j,e0j-e0j.e0j/e0j0e0j1e0j2e0j3e0j4e0j5e0j6e0j7e0j8e0j9e0j:e0j;e0j<e0j=e0j>e0j?e0j@e0jAe0jBe0jCe0jDe0jEe0jFe0jGe0jHe0jIe0jJe0jKe0jLe0jMddddZNe(g e0jOe0jPe0jQe0jRe0jSe0jTe0jUe0jVe0jWe0jXe0jYe0jZe0j[e0j\e0j]e0j^e0j_e0j`e0jae0jbe0jce0jde0jee0jfe0jge0jhe0jie0jje0jke0jle0jme0jne0joe0jpe0jqe0jre0jse0jtddddZue4e0jvjwgdd Zxdd Zye4e0jze0j{gdd Z|e4e0j}e0j~gdd Ze4e0jje0jjgdYddZe4e0jje0jjgdd Ze4e0jje0jjgdYddZe4e0jj=gdddddZe4e0jj=gdddddZe4e0jge( dd Ze4e0jgdd Ze4e0jgdd Ze4e0jj=dd Ze4e0je( dd Ze4e0jj=		 				dddZe4e0jj=dd ZdՐddZe4e0jj=e0jj>ge( dddddZe4e0jj=e0jj=gdd Ze4e0jjpe0jje0jjpe0jje0jj=e0jjge(d$d%dddZe4e0jj=dd Ze4e0jj=dd Ze4e0jj=dd Ze4e0jje0jje0jje0jje0jj=e0jj=e0jj=gdd Ze4e0jje0jje0jje0jjgdYdÐdĄZe4e0jj=e0jjgdŐdƄ ZdǐdȄ Ze4e0jje0jjgdɐdʄ Ze4e0jje0jjgdːd̄ Ze4e0jj=d͐d΄ Ze4e0jje0jjgdϐdЄ Ze4e0jje0jjgdѐd҄ Ze4e0jj=dӐdԄ Ze4e0jj=e0jj=gdѐdՐdքZe4e0jƐjdאd؄ Ze4e0jȃdِdڄ Ze4e0jʃe( dېd܄ Ze4e0j̃dݐdބ Ze4e0jj=dѐdߐdZe4e0jj=dd ZѐdddZe4e0jj=dd ZԐdd ZՐdd Z֐dd Zאdd Z	ddDedeedeedeedeedeedeedeedeedeedeedeedeedeedeedeedeedeed eedeede^debf,ddZِdd ZdDedpedeedeedeedeedeedeedeedeedeedeedeedeedeedeed eedeede^f&ddZېd	d
 Ze4e0jj=dd Ze4e0jj=	dddZe4e0je( dd Zdd Ze4e0je( dd Ze4e0je(dd%	W	X	Y	dddZe4e0je(ddd ZdDedefddZG dd deZdDededeefdd Ze4e0jj=d!d" Ze4e0je( d#d$ Ze4e0je(dd%d&d' Ze4e0jj=gd(d) Ze4e0jj=					dd*d+Ze4e0jjed,d- Ze4e0jj=d.d/ Ze4e0jj=dd0d1ZddUeed2eed3ebfd4d5Zd6d7 Zd8d9 Ze4e0jj=dѐd:d;Zdѐd<d=Zdd>d?Zd@dA Z ddBdCZddDdEZe4e0jj=dFdG Ze4e0jdHdI Ze4e0jje0jj	e0jj
e0jjge( ddJdKZe4e0jje0jj	e0jj
e0jjgddLdMZe4e0jg	N			ddOedPedQedReԐdSebdTebdUee fdVdWZe4e0jg	ddXedOedPedQededYedZed[ed\eed]eedReԐdSebd^ed_edUee fd`daZe4e0jg	N		ddOedPedQedbee dcebdSebdUee fdddeZe4e0jg		ddXedOedPedQedbee dedYed^ed_edReԐdfeeb dSebdUee fdgdhZe4e0jg	ddOedPedQedZee d[ee d\eed]eedReԐdSebdTebdUee fdidjZe4e0jg	ddXedOedPedQededYedZed[ed\eed]eedReԐdSebd^ed_edUee fdkdlZe4e0jg				ddOedPedQedGee dmee dnee doeee dReԐdpeedcebdUee dqee dree fdsdtZe4e0jg		dҐdXedOedPedQedGee dmee dnee doeedueedYedReԐd^ed_edpeedvebdUee dweee f"dxdyZe4e0jj=g						ddTejdzejdGeej dNeeja d{eej d|eej d}eej d~ebfddZ e4e0j!j"e0j!j#ge( dՐddZ$e4e0j%j"dՐddZ&e4e0j'j=e0j'j>ge( ddd0ddZ(dd Z)dd Z*e4e0j+j=e0j,j=gdddZ+e4e0j-j=e0j.j=gdҐddZ-e4e0j/j=e0j0j=g		dҐdpedee
eeej1f  dee
eeej1f  dee dee f
ddZ/e4e0j2j=e0j3j=gdԐddZ2e4e0j4j=e0j4j5e0j4je0j4j6gdddZ7e4e0j8j5dddddZ9dd Z:e4e0j;j=	dҐddZ<e4e0j=j=dd Z=e4e0j>j=dd Z>dd Z?dd Z@e4e0jAj=e0jBj=gdddZCe4e0jDj=dddZDe4e0jEj=dddZFejGZHdd ZIe4e0jJj=dd ZJe4e0jKj=dd ZKe4e0jLj=dd ZMe4e0jNj=dd ZNe4e0jOje0jOjPge( dddddZQe4e0jRj=	dҐddZSe4e0jTj=dd ZTe4e0jUj=e0jUj>ge( dԐddZUe4ej/j0jVdd ZVe4ej/j0jWdÐdĄ ZWe4e0jXe( dddddŜdƐdǄZYe4e0jZe( deedTedZefdɐdʄZ[dːd̄ Z\d͐d΄ Z]e\e0j^ e\e0j_ e\e0j` e\e0ja e\e0jb e\e0jc e\e0jd e\e0je e\e0jf e]e0jg e]e0jh e]e0ji e]e0jj e]e0jk d dl)Zd dllZd dlmZdϐdЄ Znen  dS (      N)Enum)partial)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)corresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDIntLikemake_contiguous_strides_for
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_pytreeatenZIMPLMetac                        fdd}|S )Nc                    s$   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn DC:\wamp64\www\opt\env\Lib\site-packages\torch/_meta_registrations.pyregister/      z0register_meta.<locals>.wrapper.<locals>.register)r   pytree	tree_map_r)   r,   r&   r(   r+   wrapper,   s   zregister_meta.<locals>.wrapperr*   )r'   r1   r*   r&   r+   register_meta+   s   	r2   type_promotionc                    s>   t j|d| i\}  fdd|D }t| }t|dtjiS )Ntype_promotion_kindc                    s   g | ]}t | qS r*   )r   .0xresult_dtyper*   r+   
<listcomp>A       z$elementwise_meta.<locals>.<listcomp>r3   )utilsr   r    r   r   DEFAULT)r3   args_r*   r8   r+   elementwise_meta8   s   
r@   c                 C   s(   t jt jt jt jt jt ji}|| | S r%   )torchZ	complex32halfcfloatfloatcdoubledoubleget)dtypeZfrom_complexr*   r*   r+   toRealValueTypeL   s
   rI   c                    s2   t tg|R   t k fdd d S )Nc                         d d  S )Nzoutput with shape z# doesn't match the broadcast shape r*   r*   Zbroadcasted_shape
self_shaper*   r+   <lambda>Y       z)check_inplace_broadcast.<locals>.<lambda>)tupler   rA   _check)rL   Z
args_shaper*   rK   r+   check_inplace_broadcastU   s
   rQ   Fc	           	         s  t tjrt dkdd  t tjr$t dkdd  tdd fD rMtt  d u r> ntt	 fdd npRt t tj
s[J tt tfdd t tsqJ tdkd	d  tjf|d
||dS )Nr   c                   S      dS Nz:linspace only supports 0-dimensional start and end tensorsr*   r*   r*   r*   r+   rM   m       z(meta_linspace_logspace.<locals>.<lambda>c                   S   rR   rS   r*   r*   r*   r*   r+   rM   r   rT   c                 s       | ]}t |tV  qd S r%   )
isinstancecomplex)r6   argr*   r*   r+   	<genexpr>u       z)meta_linspace_logspace.<locals>.<genexpr>c                         d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r*   r*   )default_complex_dtyperH   r*   r+   rM   ~   rN   c                      s*   dt j dt  j dt j dS )Nz4received an invalid combination of arguments - got (, ))type__name__r*   )endstartstepsr*   r+   rM      s    c                   S   rR   )Nz$number of steps must be non-negativer*   r*   r*   r*   r+   rM      rT   meta)rH   layoutdevice
pin_memoryrequires_grad)rV   rA   r   rP   dimanyr<   r   Zget_default_dtypeZis_complex_dtyperH   _check_typer   empty)	rb   ra   rc   baserH   rf   re   rg   rh   r*   )r\   rH   ra   rb   rc   r+   meta_linspace_logspace]   sH   

rn   c                    sN   t  jt jk fdd t |  dko  dk dd  |  jS )Nc                         d j  S )Nz2take(): Expected a long tensor for index, but got rH   r*   indexr*   r+   rM          zmeta_take.<locals>.<lambda>r   c                   S   rR   )Nz*take(): tried to take from an empty tensorr*   r*   r*   r*   r+   rM      rT   )rA   rP   rH   long_check_indexnumel	new_emptyshape)selfrr   r*   rq   r+   	meta_take   s   

rz   ri   c                   sh   j }j }t||kdd  t dko dk fdd tjj}|S )Nc                   S   rR   )Nz=linalg.cross: inputs must have the same number of dimensions.r*   r*   r*   r*   r+   rM      rT   zlinalg_cross.<locals>.<lambda>   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer*   ri   otherry   r*   r+   rM      s
   )ndimrA   rP   r   r   rx   rw   )ry   r   ri   Zx_dZy_d	out_shaper*   r   r+   linalg_cross   s   
r   c                 C   s$   t | d t| d tj| tjdS )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrA   
empty_likecontiguous_formatry   r*   r*   r+   linalg_matrix_exp   s   

r   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkr'| jdkr't|| j ||fS )Nrf   rH   r   )	rA   rl   rx   rf   rH   int64rv   r   maybe_wrap_dim)ry   ri   r   r   r*   r*   r+   	cummaxmin   s
   r   c                 C   s   t || j t|  S r%   )r   r   rA   r   
contiguous)ry   ri   r*   r*   r+   logcumsumexp   s   r   c                    s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< qg g }}|D ]}
|	|
 s6||
 q*||
 q*|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< |}tt|D ]}|||  ||d	 < q| 	|} d
d t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }|dkst||D ]}| d	||  ||| < q| |||  S )Nc                 S      g | ]}d qS Fr*   r6   r?   r*   r*   r+   r:      rN   z_exec_fft.<locals>.<listcomp>Tc                        |  S r%   r*   r7   Zself_stridesr*   r+   rM          z_exec_fft.<locals>.<lambda>keyreverser{   r      c                 S   r   r   r*   r   r*   r*   r+   r:     rN   )r   lenlistrangeappendstridesortpermuterx   Zreshaper   
as_stridedstorage_offset)outry   	out_sizesri   forwardr   Zsignal_ndim
batch_dimsZdim_permuteZis_transformed_dimdleftrightZ	batch_endtmpinputZbatched_sizes
batch_sizeZbatched_out_sizesiZout_stridesZbatch_numelr*   r   r+   	_exec_fft   sL   





r   c                    sb   | j jsJ | j}| |}|s|S |d d  }|   |j fdddd t|| |||}|S )Nc                    r   r%   r*   r   r   r*   r+   rM     r   zmeta_fft_c2c.<locals>.<lambda>Tr   )rH   
is_complexrx   rw   r   r   r   )ry   ri   normalizationr   r   outputZsorted_dimsr*   r   r+   meta_fft_c2c  s   
r   c                 C   sR   | j jsJ t|  }|r|d }|| d d }|||< | j|t| j dS )Nr{      r   rp   )rH   is_floating_pointr   r   rw   r<   r   )ry   ri   r   Zonesidedoutput_sizesZlast_dimZlast_dim_halfsizer*   r*   r+   meta_fft_r2c"  s   r   )	generatorc                C   s   t |t| gS r%   )r   rA   Size)nr   r   r*   r*   r+   meta_randperm2  s   r   rH   re   rf   rg   c                C      t j| ||||dS Nr   rA   rl   )r   rH   re   rf   rg   r*   r*   r+   meta_randperm_default7     
r   c                C   s   t j|||||dS r   r   )highr   rH   re   rf   rg   r*   r*   r+   meta_randint@  r   r   c                C   s   t j|||||dS r   r   )lowr   r   rH   re   rf   rg   r*   r*   r+   meta_randint_lowI  s   
r   c                C   r   r   r   )r   rH   re   rf   rg   r*   r*   r+   meta_rand_defaultY     
r   c                 C   s8   | j jsJ t|  }|||d < | j|t| j dS )Nr{   rp   )rH   r   r   r   rw   rI   )ry   ri   r   Zlastdimr   r*   r*   r+   meta_fft_c2r`  s   r   c                 C   sR   t | dkrtdt|tr'|| |}|  | kr'tj	||   | S )Nr   zQmore than one element of the written-to tensor refers to a single memory location)
rA   Z_debug_has_internal_overlapRuntimeErrorrV   r   tor   r"   Zexpand_copydefault)ry   srcZnon_blockingZintermediater*   r*   r+   
meta_copy_i  s   
r   c                 C   sX   t |  }t |  }||  krdn|| ||  }||d ||| ||fS Nr   )r   r   r   ri   insert)tensorri   Zresult_sizesZresult_strides
new_strider*   r*   r+   inferUnsqueezeGeometry|  s    r   c                 C   s0   t ||  d }t| |\}}| || | S r   )r   ri   r   as_strided_)ry   ri   Zg_sizesZ	g_stridesr*   r*   r+   meta_unsqueeze_  s   r   r   weight_metabias_activation_optc                 C   s   t | j}|d ur|d|dksJ d|d| dd ks%J |d|d< t| jdks7J dd| df}| j|| jtjkrJ| jntjd	||}|S )Nr   zoutput size mismatchr   r{   r   z*we can only handle the squashed input caserp   )
r   rx   r   r   rw   rH   rA   int8int32r   )r   r   r   r   r   r   Ztransposed_stridesr   r*   r*   r+   meta_sparse_structured_linear  s   
r   compressed_Adense_Balpha	out_dtypetranspose_resultc                 C   s   |j tjtjtjhv sJ d| j |j ksJ dt|jdks$J d| j tjk}|r.dnd}|d}|d}	|  d	 ||  }
|d urQ|
|dksQJ |d ur`|r\|tjks`J d
|rf|	|
fn|
|	f}|j	||d}|S )Nz2_cslt_sparse_mm only supports fp16, bf16, and int8zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   r   r      z1out_dtype is only supported for i8i8->fp16 matmulrp   )
rH   rA   Zfloat16bfloat16r   r   rx   r   rv   rw   )r   r   r   r   r   r   Zis_int8_input_typeZcompression_factorkr   moutput_shaperesultr*   r*   r+   meta__cslt_sparse_mm  s2   	

r   T)include_selfry   ri   rr   sourcereducer   returnc                C   s   t j| t jdS Nr   )rA   r   r   ry   ri   rr   r   r   r   r*   r*   r+   meta_index_reduce  s   
r   c                C      | S r%   r*   r   r*   r*   r+   meta_index_reduce_  s   
r   c                 C   s.   t |  }|  dkr| ||< | |S )Nr   )r   r   ri   rv   rw   )ry   ri   rr   result_sizer*   r*   r+   meta_index_select  s   
r   )lengthsr   offsetsaxisunsafeinitialdatar   r   r   r   c          
         sf   |d urt d fdd}|d ur||jS |d ur/|jd d |jd d f }	||	S td)Nz?segment_reduce(): indices based reduction is not supported yet.c                    s(   t j| j d d   jdt jdS )Nr   rd   rH   rf   r   )rA   rl   rx   rH   r   )lengths_shaper   r  r*   r+   segment_reduce_lengths_tensor  s   z:meta_segment_reduce.<locals>.segment_reduce_lengths_tensorr{   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorrx   r   )
r  r   r   r   r   r   r   r   r  r  r*   r  r+   meta_segment_reduce  s   
r  c                 C   
   |  dS Nr*   rw   r   r*   r*   r+   meta_max     
r  c                 C   6   t | j|f}t| ||}| || j|tjdfS Nrp   r<   reduction_dimsrx   _compute_reduction_shaperw   rA   rt   ry   ri   keepdimr   r*   r*   r+   meta_max_dim  
   r  c                 C   r  r	  r
  r   r*   r*   r+   meta_min$  r  r  c                 C   r  r  r  r  r*   r*   r+   meta_min_dim*  r  r  c                 C   s4   |   r
t| j}n	t| tjd\}}tj| |dS )Nr4   rp   )r   r   rH   r   r   INT_TO_FLOATrA   r   )ry   r9   r?   r*   r*   r+   
meta_angle4  s   
r  c                 C   s$   t ||  | j |t | S r%   )rA   Z_resize_output_r   rf   copy_angle)ry   r   r*   r*   r+   meta_angle_out@  s   r  c                 C      d S r%   r*   )valr*   r*   r+   assert_asyncF     r   c                 C   r  r%   r*   )r  
assert_msgr*   r*   r+   assert_async_metaK  r!  r#  rH   re   rf   rg   r   c                 C   s   t jg ddS )Nrd   rf   r   r$  r*   r*   r+   make_dep_tokenP  s   	r&  c                 C   4   ddl m} t| ttfrtd|| ||d d S )Nr   )constrain_range'Constraining SymFloat or Symbool is nyiminmax)%torch.fx.experimental.symbolic_shapesr(  rV   r
   r	   
ValueError)r   r+  r,  r(  r*   r*   r+   sym_constrain_range\     r/  c                 C      t j| ||d |S Nr*  )r"   r/  r   r+  r,  	dep_tokenr*   r*   r+   functional_sym_constrain_rangef     r5  c                 C   r'  )Nr   )_constrain_range_for_sizer)  r*  )r-  r7  rV   r
   r	   r.  )r   r+  r,  r7  r*   r*   r+   sym_constrain_range_for_sizel  r0  r8  c                 C   r1  r2  )r"   r8  r3  r*   r*   r+   'functional_sym_constrain_range_for_sizev  r6  r9  c                 C   s   |S r%   r*   )r  r"  r4  r*   r*   r+   functional_assert_async_meta|  r!  r:  f_namec                 C   sX   |   dksJ | d| d| dks*J | d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.r{   z5: A must be batches of square matrices, but they are  by 	 matrices)ri   r   )ry   r;  r*   r*   r+   r     s    r   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                         dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r%  r*   r?  ry   r*   r+   rM     
   z(linearSolveCheckInputs.<locals>.<lambda>c                      rA  )Nz=Expected b and A to have the same dtype, but found b of type z and A of type rB  rp   r*   rC  r*   r+   rM     rD  r{   r<  c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are r<  r=  r{   r>  r   r*   r?  r*   r+   rM     s
   c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is r{   r=  z but each b matrix is r<  r   r*   r?  r@  ry   r*   r+   rM     s   )rA   rP   rf   rH   r   )ry   r?  r@  r*   rF  r+   linearSolveCheckInputs  s    


rG  tallow_low_precision_dtypesc                    s^   | j  t|  p|   fdd |s-t tjtjtjtjfv  fdd d S d S )Nc                          d  S )Nz<: Expected a floating point or complex tensor as input. Got r*   r*   rH   r;  r*   r+   rM         z(checkFloatingOrComplex.<locals>.<lambda>c                      rJ  )Nz*: Low precision dtypes not supported. Got r*   r*   rK  r*   r+   rM     rL  )	rH   rA   rP   r   r   rD   rF   rC   rE   )rH  r;  rI  r*   rK  r+   r     s   r   arg_namec                    s"   t |  dk fdd d S )Nr   c                          d  dS )Nz: The input tensor z! must have at least 2 dimensions.r*   r*   rM  r;  r*   r+   rM     rN   zcheckIsMatrix.<locals>.<lambda>)rA   rP   ri   )r?  r;  rM  r*   rO  r+   checkIsMatrix  s   
rP  Br   c                    sZ   t   t tr ddkn	 ddk fdd d S )Nr<  r{   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r<  r7   r{   r~   r^   r   r*   r?  rQ  r;  r   r*   r+   rM     s   
z#checkInputsSolver.<locals>.<lambda>)r   rP  rA   rP   r   )r?  rQ  r   r;  r*   rR  r+   checkInputsSolver  s   

*rS  r   fn_nameresult_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r%  r*   rT  r   r   rU  r*   r+   rM     s   z!checkSameDevice.<locals>.<lambda>)rA   rP   rf   )rT  r   r   rU  r*   rV  r+   checkSameDevice  s   
rW  UPLOc                    s8      }tt dko|dkp|dk fdd d S )Nr   ULc                      
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r*   r*   rX  r*   r+   rM        
 zcheckUplo.<locals>.<lambda>)upperrA   rP   r   )rX  ZUPLO_uppercaser*   r\  r+   	checkUplo  s
   
r_  eigenvaluesZeigenvectorsrZ  	compute_vc                 C   sp   t | d t| t| j}|r | |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighFZ	row_majorr   rp   )
r   r_  r   rx   rw   r   r   poprI   rH   )r?  rX  ra  rx   Zvecsvalsr*   r*   r+   meta__linalg_eigh  s   


re  r   c                 C   s   | j jtjdddS )Nr   r<  r{   )ZmTclonerA   r   	transpose)r   r*   r*   r+   cloneBatchedColumnMajor
  s   rh  r^  c                 C   s   t | S r%   )rh  )ry   r?  r^  r*   r*   r+   _cholesky_solve_helper  s   ri  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                         d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r*   r   r*   r+   rM     rL  z cholesky_solve.<locals>.<lambda>c                      rj  )Nz-u should have at least 2 dimensions, but has rk  rl  r*   rE  r*   r+   rM     rL  cholesky_solve)rA   rP   r   !_linalg_broadcast_batch_dims_nameri  )ry   r?  r^  Zself_broadcastedZA_broadcastedr*   rC  r+   rm    s   

rm  c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r   cholesky)rv   rA   r   legacy_contiguous_formatr   rh  ry   r^  r*   r*   r+   ro  %  s   
ro  c                 C   s   t | d t| S )Ncholesky_inverse)r   rh  rq  r*   r*   r+   rr  .  s   
rr  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   rp   )	r   r   rx   r   r   rw   r   rA   r   )r?  r^  rs  ZA_shaper   Z	L_stridesrZ  infosr*   r*   r+   linalg_cholesky_ex6  s   



ru  tauc                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkr[jd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   rR   )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r*   r*   r*   r*   r+   rM   O  rT   z,linalg_householder_product.<locals>.<lambda>r<  r{   c                   S   rR   )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r*   r*   r*   r*   r+   rM   S  rT   c                   S   rR   )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r*   r*   r*   r*   r+   rM   W  rT   r   c                         dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to rl  r*   r   rv  r*   r+   rM   \  
   c                      r[  )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r*   r*   actual_batch_tau_shaper*   r+   rM   f     c                      rw  )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rp   r*   ry  r*   r+   rM   n     
z torch.linalg.householder_productrv  Frb  r   r   rH   rf   )
rA   rP   r   r   rx   rH   rW  empty_stridedr   rf   )r   rv  Zexpected_batch_tau_shaper*   )r|  r   rv  r+   linalg_householder_productH  sD   


r  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)rI  rb  r<  rp   r   r   rw   rx   r   r   rA   r   )r?  rs  rZ  rt  r*   r*   r+   linalg_inv_ex_meta~  s   
r  LDpivotsinfo)	hermitianrs  r  c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFrb  r  r{   rp   r<  )
r   r   rA   r  rx   r   rH   rf   rw   int)ry   r  rs  r  r  r  r*   r*   r+   linalg_ldl_factor_ex_meta  s   


r  )r  c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      rj  )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has rk  rl  r*   )rQ  r*   r+   rM        z'linalg_ldl_solve_meta.<locals>.<lambda>r{   c                      rj  )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadrx   r*   r  r*   r+   rM     r  c                      ro   )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got rp   r*   r  r*   r+   rM     rs   c                      rw  )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype rp   r*   )rQ  r  r*   r+   rM         Frb  r  )r   r   rG  rA   rP   r   rx   r<   is_integer_dtyperH   _linalg_broadcast_batch_dimsr  r   rf   )r  r  rQ  r  Zexpected_pivots_shapeB_broadcast_sizer?   r*   )rQ  r  r  r+   linalg_ldl_solve_meta  s6   







r  PrY  )pivotr  c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |r+ |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      rj  )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r  r  r*   rE  r*   r+   rM     rL  z linalg_lu_meta.<locals>.<lambda>r<  r{   r   )rA   rP   r   r   rx   r+  rw   )	r?  r  sizesr   r   r   r  rZ  rY  r*   rE  r+   linalg_lu_meta  s$   





r  LU)r  rs  c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      rj  )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r  r  r*   rE  r*   r+   rM     rL  z*linalg_lu_factor_ex_meta.<locals>.<lambda>r<  r{   Frb  r  rp   )rA   rP   r   r   rx   r  r   rH   rf   rc  r+  rw   r  )	r?  r  rs  r  r   r   r  r  r  r*   rE  r+   linalg_lu_factor_ex_meta  s&   



r  )r   adjointr  c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkru|su| ru| }|S )Nztorch.linalg.lu_solvec                      rA  )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r  rp   r*   )rQ  r  r*   r+   rM     rD  z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   rR   )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r*   r*   r*   r*   r+   rM     rT   zlinalg.lu_solver{   c                   S   rR   )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr*   r*   r*   r*   r+   rM   !  rT   c                      rj  )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r  r  r*   r  r*   r+   rM   '  r  rb  r  r   )r   rA   rP   rH   r  r   rS  r   rx   r  r  r   rf   rv   r   Zconj)r  r  rQ  r   r  r  r?   r   r*   )rQ  r  r  r+   linalg_lu_solve_meta  s<   




r  unpack_dataunpack_pivotsc                    s   t  jdk fdd |rt |jt jkdd  t j}|d }|d }t||}||d< |r9 |}n dg}|rX||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      rj  )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r  r  r*   r  r*   r+   rM   G  rL  z lu_unpack_meta.<locals>.<lambda>c                   S   rR   )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr*   r*   r*   r*   r+   rM   L     r<  r{   r   )	rA   rP   r   rH   r   r   rx   r+  rw   )r  r  r  r  r  r   r   r   r  rZ  rY  r*   r  r+   lu_unpack_meta=  s4   





r  modec                    sd    dkrd}d}||fS  dkrd}d}||fS  dkr$d}d}||fS t d fdd ||fS )NreducedTZcompleteFrc                         d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r*   r*   r  r*   r+   rM   t  s   z _parse_qr_mode.<locals>.<lambda>rA   rP   )r  	compute_qr  r*   r  r+   _parse_qr_modeg  s"   	
r  QRr  c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r>t| j}|r*|n||d< | |}||t|dd n| dg}t| j}	|sM|sO|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrr<  r{   Frb  r   )	rP  r   r  rx   r+  r   rw   r   r   )r?  r  r  Zreduced_moder   r   r   ZQ_shaper  ZR_shaper  r*   r*   r+   linalg_qr_meta|  s"   








r  sign	logabsdetc                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFr<  rp   r  r{   )r   r   rx   rw   rI   rH   rA   r  r   rf   r   )r?  rx   r  r  r  r  r*   r*   r+   _linalg_slogdet  s   
r  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r]|||r*|n|g }| |}	|	|t|dd ||rB|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdr<  r{   Frb  cudar   rp   )rP  r   r   rx   r+  rw   r   r   device_hintrI   rH   )r?  r  r  r  r   r   r   r   ZU_shaperY  ZV_shapeVZis_cudaSr*   r*   r+   _linalg_svd_meta  s$   







r  arg1arg2c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nr<  r{   )rx   r   r   r   )r  r  Zarg1_batch_sizesZarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer*   r*   r+   r    s   
r  c                 C   sV   |rt | || t| |\}}|| jkr| n| |}||jkr"|n||}||fS r%   )rG  r  rx   expand)r  r  r@  r  r  Zarg1_broadcastedZarg2_broadcastedr*   r*   r+   rn    s   rn  r   c                 C   s6   | j d d }|jdkp| jd |jko|j |k}|S )Nr{   r   )rx   r   )r   r   Zexpected_batched_rhs_shapevector_caser*   r*   r+   linalg_solve_is_vector_rhs  s
   
r  )r   rs  r   r  r  r  c                   sn  t  d t jjk fdd t }|r dn}	t |	|d t|	 \}
}t|p6| dd  |rC|
d d n|
}tj|t	|| jj
d} j} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rt||D ]\}}t||j ||j|  t||dd q|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  r  rp   r*   r?  rQ  r*   r+   rM     rD  z"_linalg_solve_ex.<locals>.<lambda>r{   c                   S   rR   )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r*   r*   r*   r*   r+   rM     r  r  Frp   r<  c                 s   s    | ]}|d uV  qd S r%   r*   r5   r*   r*   r+   rY   1      z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toZexact_dtype)r   rA   rP   rH   r  	unsqueezerS  r  r  r   rf   rx   r   rw   r   allzipr   r   r   r   )r?  rQ  r   rs  r   r  r  r  r  B_ZB_broad_shaper?   Zresult_shapeZresult_rx   r   ZLU_Zpivots_Zinfo_r   resr  or*   r  r+   _linalg_solve_ex  sL   



r  )r   unitriangularr   r  r   c          	      C   s   |d u r
|  dg}t|tsJ t| ||d t|| d \}}|dd o+| }|r6t||j	}|S t
||j	rL||ddj	 |dd |S )Nr   zlinalg.solve_triangularr<  r{   )rw   rV   r   rS  rn  rg  is_contiguousZis_conjr   rx   r   Zresize_
transpose_)	r?  rQ  r^  r   r  r   r  ZA_Zavoid_copy_Ar*   r*   r+   linalg_solve_triangular_meta;  s   
r  solutioncloned_coefficientrg  c           	         s   t jdkfdd t  jdk fdd t d  jt jkrOt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}||fS  jt jks[ jt jkrjt }d	g}||fS t dd
d  ||fS )Nr   c                      rj  )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has rk  rl  r*   r   r*   r+   rM   `  r  z'triangular_solve_meta.<locals>.<lambda>c                      rj  )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has rk  rl  r*   rE  r*   r+   rM   g  r  triangular_solveFrb  r  r   c                   S   rR   )Nz+triangular_solve: Got an unexpected layout.r*   r*   r*   r*   r+   rM     rT   )rA   rP   r   rG  re   stridedr  r  r   rH   rf   
sparse_csr
sparse_bsrr   rw   )	ry   r?  r^  rg  r  Zself_broadcast_sizeZA_broadcast_sizer  r  r*   rC  r+   triangular_solve_metaU  s<   	




r  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detr<  Frb  r{   rp   r  )r?  Zdetr  r  r*   r*   r+   _linalg_det_meta  s   


r  c                    s  t jdkdd  t jdkdd  |rdndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   rR   )Nz3torch.ormqr: input must have at least 2 dimensions.r*   r*   r*   r*   r+   rM     rT   zormqr.<locals>.<lambda>c                   S   rR   )Nz3torch.ormqr: other must have at least 2 dimensions.r*   r*   r*   r*   r+   rM     rT   r<  r{   c                      r  )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r*   r*   left_size_conditionr*   r+   rM     rs   c                      r  )Nr  z"] must be equal to input.shape[-2]r*   r*   r  r*   r+   rM     rs   c                   S   rR   )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r*   r*   r*   r*   r+   rM     rT   r   c                      rw  )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to rx  rl  r*   ry  r*   r+   rM     rz  c                      rw  )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to rx  rl  r*   r   r   r*   r+   rM     rz  c                      r[  )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r*   r*   r{  r*   r+   rM     r}  c                      r[  )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r*   r*   )actual_batch_other_shaper*   r+   rM     r}  c                         d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rp   r*   ry  r*   r+   rM     rz  c                      r  )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rp   r*   r  r*   r+   rM     rz  ztorch.ormqrrv  r   Frb  r  )	rA   rP   r   rx   rH   rW  r  r   rf   )r   rv  r   r   rg  Zexpected_batch_shaper*   )r  r|  r   r  r   rv  r+   ormqr  sn   	







r  c                   s   t td  k fdd j}| d k}|}| }|r3td|D ]}|o0|dk}q&ntd|D ]}|oB|dk}q8t |pI| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: r   r*   )ri   paddingr*   r+   rM         z,_padding_check_valid_input.<locals>.<lambda>r   r   c                      s    d d  d d  dj  S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r  r*   )ri   r   r*   r+   rM     s   )rA   rP   r   r   r   r   )r   r  ri   Z	input_dimZis_batch_modeZvalid_batch_modeZvalid_non_batch_moder   r*   )ri   r   r  r+   _padding_check_valid_input  s$   r  c                   s   d}d d}j dkrd} d7  |d7 }t|dd |\|}   |rHtk o>k  fdd tdkfdd j dkra|fS ||fS )	Nr   r   r}   r|   c                         d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (r]   ) at dimension 
 of input r  r*   dim_wr   pad_lpad_rr*   r+   rM   $     z_pad1d_common.<locals>.<lambda>c                      r[   )Nz
input (W: z%) is too small. Calculated output W: r*   r*   )input_woutput_wr*   r+   rM   ,  rN   r   )r   r   r  rA   rP   rw   )r   r  is_reflection	dim_planenbatchnplaner*   )r  r   r  r  r  r  r+   _pad1d_common  s0   




r  c                 C      t | |ddS NTr  r  r   r  r*   r*   r+   meta_reflection_pad1d5     r  c                 C   r  NFr  r  r  r*   r*   r+   meta_replication_pad1d;  r  r  c                   s   d |st t|dkdd  jdkr d7  |\ }|  |r=t |k o3|k  fdd t  k fdd jS )Nr   r   c                   S   rR   )Nz padding size is expected to be 2r*   r*   r*   r*   r+   rM   D  rT   z(_pad1d_backward_common.<locals>.<lambda>r}   c                      r  r  r  r*   r  r*   r+   rM   Q  r  c                         d d   S Nz(grad_output width unexpected. Expected: , Got: r   r*   r  grad_outputr  r*   r+   rM   Y  r;   rA   rP   r   r   r   rw   rx   )r  r   r  r  r  r*   )r  r  r   r  r  r  r+   _pad1d_backward_commonA  s$   

r   
grad_inputc                 C      t | ||ddS r  r   r  r   r  r*   r*   r+   meta_reflection_pad1d_backward_     r  c                 C   r  r  r  r  r*   r*   r+   meta_replication_pad1d_backwarde  r  r  c                   s2  dd d}d}t |dd j}|dkr'd}d7  d7  |d7 }|\	
|} 
   	 |rptk oS	k 	fdd t
k ofk  
fdd tdkpydkfd	d jd
kr|fS ||fS )Nr   r   r   r|      c                      r  r  r  r*   r  r*   r+   rM     r  z_pad2d_common.<locals>.<lambda>c                         d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r]   r  r  r  r*   dim_hr   pad_bpad_tr*   r+   rM     r  c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r*   r*   )input_hr  output_hr  r*   r+   rM     s
   r}   r  r   r   rA   rP   rw   )r   r  r  Z
dim_slicesr  r   r  r*   )r  r  r   r  r  r  r  r  r  r  r  r+   _pad2d_commonk  sB   




r  c                 C   r  r  r  r  r*   r*   r+   meta_reflection_pad2d  r  r  c                 C   r  r  r  r  r*   r*   r+   meta_replication_pad2d  r  r  c                    s   dd d}d}|j }| dkr!|d }d7  d7  |d7 }|\}}}}	|| }
|  }| }|| |	 || | tkfdd t k fdd ||j S )Nr   r   r   r  c                      r  r  r   r*   r  r*   r+   rM     r;   z%meta_pad2d_backward.<locals>.<lambda>c                      r  Nz)grad_output height unexpected. Expected: r  r   r*   r  r  r  r*   r+   rM     r;   )rx   ri   rA   rP   r   rw   )r  ry   r  r  r  rL   r  r  r  r  r  r  r  r*   )r  r  r  r  r  r+   meta_pad2d_backward  s2   
r  c             	      s  ddd d}t |dd jdk}|r+d}d7 d7  d7  |d7 }|\
|}    
   	|rtk odk fdd tk ow
k 
fd	d tk ok  fd
d t	dkpdkpdk	fdd |r||	fS |	fS )Nr}   r   r   r   r|      c                      r  r  r  r*   r  r*   r+   rM     r  z_pad3d_common.<locals>.<lambda>c                      r	  r
  r  r*   r  r*   r+   rM     r  c                      r	  )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r]   r  r  r  r*   )dim_dr   pad_bkpad_fr*   r+   rM      r  c                      s(   d  d d d d d S )Nz
input (D:  H: r  z%) is too small. Calculated output D: r*   r*   )input_dr  r  output_dr  r  r*   r+   rM     s   r  )r   r  r  r  Z
batch_moder  r  r*   )r  r  r  r   r  r  r  r   r  r  r  r  r  r  r  r  r+   _pad3d_common  sP   





r!  c                 C   r  r  r!  r  r*   r*   r+   meta_reflection_pad3d  r  r#  c                 C   r  r  r"  r  r*   r*   r+   meta_replication_pad3d  r  r$  c                    s(  t t|dkdd  |jdksJ j|jksJ ddd |jdkr2d7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   rR   )Nz padding size is expected to be 6r*   r*   r*   r*   r+   rM   *  rT   z%meta_pad3d_backward.<locals>.<lambda>r}   r   r   r  c                      r  r  r   r*   r  r*   r+   rM   B  r;   c                      r  r  r   r*   r  r*   r+   rM   F  r;   c                      r  )Nz(grad_output depth unexpected. Expected: r  r   r*   )r  r  r   r*   r+   rM   J  r;   r  )r  r   r  r  r  r  r  r  r  r  r  r  r*   )r  r  r  r  r   r  r  r+   meta_pad3d_backward   s<   




r&  r   pc                 C   s^   t |  dd  | d}|dkr| dgjt jdS | ||d  d fjt jdS )Nc                   S   rR   )Nz(_pdist_forward requires contiguous inputr*   r*   r*   r*   r+   rM   T  rT   z%meta__pdist_forward.<locals>.<lambda>r   r   r   r   )rA   rP   r  r   rw   r   rp  )ry   r'  r   r*   r*   r+   meta__pdist_forwardP  s   
r(  gradpdistc                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   rR   )Nz._pdist_backward requires self to be contiguousr*   r*   r*   r*   r+   rM   c  rT   z&meta__pdist_backward.<locals>.<lambda>c                   S   rR   )Nz/_pdist_backward requires pdist to be contiguousr*   r*   r*   r*   r+   rM   f  rT   r   )rA   rP   r  r   rp  )r)  ry   r'  r*  r*   r*   r+   meta__pdist_backward_  s   r+  r   )betar   c          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  ko=jkn   fdd  j}j|d |d td kocd kfd	d   S )
Nr   r   r   r}   c                   S   rR   Nzbatch1 must be a 3D tensorr*   r*   r*   r*   r+   rM   r  rT   zmeta_baddbmm.<locals>.<lambda>c                   S   rR   Nzbatch2 must be a 3D tensorr*   r*   r*   r*   r+   rM   s  rT   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: rp   r*   )batch1batch2ry   r*   r+   rM   v      c                	      &   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [r]   z] but got: [r   r   ].r*   r*   batch2_sizesbscontraction_sizer*   r+   rM   ~  s   )r   r  rA   rP   ri   rH   rx   rw   )	ry   r/  r0  r,  r   dim1dim2Zdim3batch1_sizesr*   )r/  r0  r6  r7  r8  ry   r+   meta_baddbmmk  s&   


r<  c                C      t |  S r%   rA   r   r   )ry   r   r*   r*   r+   meta_bernoulli  s   r?        ?c                 C   r   r%   r*   ry   r'  r   r*   r*   r+   meta_bernoulli_  r!  rB  c                 C   r=  r%   r>  rA  r*   r*   r+   meta_bernoulli_p  r  rC  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   rR   )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r*   r*   r*   r*   r+   rM     rT   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>rp   )rA   rP   ri   r   bool)ry   Zobserver_onZfake_quant_onZrunning_minZrunning_maxscaleZ
zero_pointZaveraging_constZ	quant_minZ	quant_maxZch_axisZper_row_fake_quantZsymmetric_quantmaskr*   r*   r+   $meta__fused_moving_avg_obs_fq_helper  s   
rG  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   rR   )Nza must be 2Dr*   r*   r*   r*   r+   rM     rT   zmeta_mm.<locals>.<lambda>c                   S   rR   )Nzb must be 2Dr*   r*   r*   r*   r+   rM     rT   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [r]   z] X [r4  r*   r*   ZM1ZM2Nr  r*   r+   rM     s    )rA   rP   ri   rx   rw   abr*   rH  r+   meta_mm  s   

rM  c                    s0   |rt  fddtjD S tj S )Nc                 3   s&    | ]}| vrj | nd V  qdS )r   Nr  r6   r   dimsry   r*   r+   rY     s   $ z+_compute_reduction_shape.<locals>.<genexpr>)rO   r   r   r<   compute_reduction_output_shaperx   )ry   rP  r  r*   rO  r+   r    s   r  strc                 C   s   t | tjjr| jjS dS )Nr  )rV   rA   Z_subclassesZ
FakeTensorZfake_devicer_   )r   r*   r*   r+   r    s   r  input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                 C   s  dt dt dt dt dt dt fdd}dt dt dt dt dt d	t dt fd
d}	|jdd  }
| jdd  }|r<||jd  }n|jd }|jd | | jd krQtd| jd |g}t|tre|gt| }nt|dkrt|d gt| }t|tr|gt| }nt|dkr|d gt| }t|tr|gt| }nt|dkr|d gt| }d }|rt|tr|gt| }nt|dkr|d gt| }n|}tt|D ]2}|r||	|| || || |
| || ||  q|||| || || |
| ||  q|S )Nlnr'  r   r   sr   c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r*   )rX  r'  r   r   rY  r*   r*   r+   _formula  s   $z+calc_conv_nd_return_shape.<locals>._formular'   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r*   )rX  r'  r   r   rY  r'   r*   r*   r+   _formula_transposed  s   (z6calc_conv_nd_return_shape.<locals>._formula_transposedr   r   r   zInvalid channel dimensions)r  rx   r   rV   r   r   r   r   )rS  r   r   r  rT  rU  rV  rW  rZ  r[  kernel_sizerP  Zout_channelsZ	ret_shapeZoutput_padding_listr   r*   r*   r+   calc_conv_nd_return_shape  sZ   "
&




"r]  c                 C      t j| t jkS r%   rA   _prims_commonsuggest_memory_formatchannels_lasttenr*   r*   r+   is_channels_last5     re  c	              	      sH    fdd}	t  ||||||r|nd }
 |
}|j|	 d}|S )Nc                      s^   t  dkrt strtjS nt rtjS  jtjdr#tjS  jtjdr-tjS d S Nr  r   )r  re  rA   rb  r  r   preserve_formatr*   rS  r   r*   r+   pick_memory_formatE  s   z%meta_conv.<locals>.pick_memory_formatr   )r]  rw   r   )rS  r   r   r   r  rT  rU  rW  rV  rj  	shape_outr   r*   ri  r+   	meta_conv9  s   

rl  mkldnnc
              	   C   s6   t | ||||d|g }
| |
}tj}|j|d}|S )NFr   )r]  rw   rA   rb  r   )rS  r   r   r  r   rT  rV  attrscalars	algorithmrk  r   Zout_memory_formatr*   r*   r+   meta_mkldnn_convolution_defaultf  s   
rq  c                 C   s$   |  g | jd d |jd R S Nr{   r   rw   rx   )rS  r   r   rn  ro  rp  r*   r*   r+   meta_linear_pointwise_default{  s   $rt  mklc                 C   s$   |  g | jd d |jd R S rr  rs  )rS  Zpacked_weightZorig_weightr   r   r*   r*   r+   meta_mkl_linear  s   rv  onednnc              	   C   sJ   t | ||||	d|
d }|tjtjfv sJ | j||d}|jtjd}|S )NFrp   r   )r]  rA   float32r   rw   r   rb  )r7   x_scalex_zpww_scalew_zpr   r   r  rT  rV  output_scaleoutput_zero_pointoutput_dtypern  ro  rp  rk  r   r*   r*   r+   meta_qconv2d_pointwise  s   
r  c                 C   s>   t | j}|jd |d< |	tjtjfv sJ | j||	d}|S )Nr   r{   rp   )r   rx   rA   rx  r   rw   )r7   ry  rz  r{  r|  r}  r   r~  r  r  Zpost_op_nameZpost_op_argsZpost_op_algorithmr   r   r*   r*   r+   meta_qlinear_pointwise  s
   
r  	quantizedr*   r   r   c                 C   sr   t | |||||\}}}|  dkr| dnd}	tj}
|  dkr(|||g}n|	|||g}tj|| j| j|
dS Nr  r   r}   r  )#max_pool2d_checks_and_compute_shaperi   r   rA   rb  rl   rH   rf   r   r\  r   r  rT  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   r*   r*   r+   meta_quantized_max_pool2d  s$   r  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == r]   zbut got : dimension z] = ri   rx   r*   ri   dim_sizer   r   r*   r+   rM     s    z check_dim_size.<locals>.<lambda>)rA   rP   ri   rx   )r   ri   r  r   r*   r  r+   check_dim_size  s   r  c                 C   sb  dd }|d|\}}	t t|dv dd  t|dkr#||	}
}nt|dkr3|d |d }
}n|d	|\}
}|d
|\}}t |d u pJ|dkdd  |  dkrZ| dnd}| d}| d}| d}t||||
d|}t||	||d|}t| }t| ||	|
|||dd|||||| |  dkr|||g}n||||g}t j	|| j
| j|dS )Nc                    D   t t|dv  fdd |d }t|dkr|n|d }||fS )Nr   r   c                      r  )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr*   r*   r@  r*   r+   rM   	  rs   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   r   rA   rP   r   r@  r  HWr*   r  r+   unpack	     

zmeta_avg_pool2d.<locals>.unpackr\  r   r   r   c                   S   rR   NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rT   z!meta_avg_pool2d.<locals>.<lambda>r   r   r   r  c                   S   rR   Nzdivisor must be not zeror*   r*   r*   r*   r+   rM   	  rT   r  r  r<  r{   r}   r  )rA   rP   r   ri   r   pooling_output_shaper<   ra  pool2d_shape_checkrl   rH   rf   )r   r\  r   r  r  count_include_paddivisor_overrider  kHkWdHdWpadHpadWr  r  inputHeight
inputWidthr  r  r   r   r*   r*   r+   meta_avg_pool2d  sb   
	




r  c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nr   r}   r   )r  ri   r  )r   Z
gradOutputr  r  r  r  r  r  r  r  r  r  r  r  
mem_formatr   nOutputPlaner*   r*   r+   avg_pool2d_backward_shape_checkI	  s,   r  c                 C   s  t t|dkpt|dkdd  |d }t|dkr|n|d }	t t|dkp5t|dkp5t|dkdd  t|dkrB|n|d }
t|dkrN|	nt|dkrV|
n|d }t t|dkpgt|dkdd  |d }t|dkrx|n|d }t |d u p|dkdd  |j}| d	kr|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nr   r   c                   S   rR   )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rT   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   rR   r  r*   r*   r*   r*   r+   rM   	  rT   c                   S   rR   )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   	  rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM   	  rT   r  r  r  r<  r{   r  )rA   rP   r   rx   ri   r  r<   ra  r  rl   rH   rf   )ZgradOutput_r   r\  r   r  r  r  r  r  r  r  r  r  r  
input_sizer  r  r  r  r  r  r  r*   r*   r+   meta_avg_pool2d_backwardu	  sj   "(
r  c                 C   s
  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }	t | p2t|dv dd  |s;|n|d }
|sC|nt|dkrK|
n|d }|sS|	nt|dkr[|
n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t | jd	v d
d  t | p|dkdd  | d}| d}| d}| d}| d}t||||
d|}t||||d|}t||	||d|}t| ||||	|
|||||ddd||||||ddd | jdkr| ||||fS | |||||fS )Nr   r}   c                   S   rR   NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   	  rT   z!meta_avg_pool3d.<locals>.<lambda>r   r   r   c                   S   rR   NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   	  rT   c                   S   rR   NzBavg_pool3d: padding must be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   	  rT   r  r  c                   S   rR   Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr*   r*   r*   r*   r+   rM   	  rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM   	  rT   r  r  r<  r{   zavg_pool3d()T)check_input_sizer  )rA   rP   r   r   r   r  pool3d_shape_checkrw   )r   r\  r   r  r  r  r  kTr  r  dTr  r  padTr  r  r  nslicesitimeiheightiwidthotimeoheightowidthr*   r*   r+   meta_avg_pool3d	  s   
  






r  c                 C   s  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t |jd	v d
d  t | p|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )Nr  c                   S   rR   r  r*   r*   r*   r*   r+   rM   "
  rT   z*meta_avg_pool3d_backward.<locals>.<lambda>r   r   r   c                   S   rR   r  r*   r*   r*   r*   r+   rM   *
  rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM   2
  rT   r  c                   S   rR   r  r*   r*   r*   r*   r+   rM   :
  rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM   ?
  rT   r  r  r<  r{   zavg_pool3d_backward())	rA   rP   r   r   r   r  avg_pool3d_backward_shape_checkrw   rx   )r  r   r\  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zotime_for_shape_checkZoheight_for_shape_checkZowidth_for_shape_checkr*   r*   r+   meta_avg_pool3d_backward
  st   
  




r  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )Nr}   r  c                      ro   )Nz"Expected 3D or 4D tensor, but got r  r*   r   r*   r+   rM   h
  rs   z*meta_adaptive_avg_pool2d.<locals>.<lambda>r<  r  )
rA   rP   r   rx   rO   r<   ra  rl   rH   rf   )ry   output_sizer   r   r*   r   r+   meta_adaptive_avg_pool2dd
  s   

r  c                    s@   t  jdkp jdk fdd   jd d t| S )Nr  r  c                      ro   )Nz"Expected 4D or 5D tensor, but got r  r*   r   r*   r+   rM   z
  rs   z*meta_adaptive_avg_pool3d.<locals>.<lambda>r  )rA   rP   r   rw   rx   rO   )ry   r  r*   r   r+   meta_adaptive_avg_pool3dv
  s
   
r  c                    s    j }td|D ]t dk fdd qt|dkp$|dkfdd tj jk fdd tj}trDtj}	j
j|d	S )
Nr   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr  r*   )grad_outr   r*   r+   rM   
  s
    z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>r}   r  c                      ro   )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r  r*   r   r*   r+   rM   
  rs   c                      rw  Nzexpected dtype z! for `grad_output` but got dtype rp   r*   )r  ry   r*   r+   rM   
  r  r   )r   r   rA   rP   r   rH   r   re  rb  rw   rx   r   )r  ry   r   r   r*   )r  r   ry   r+   "meta__adaptive_avg_pool2d_backward
  s$   

r  c                 C   s   t | d tj|tjdS )NZadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrA   r   rp  r  ry   r*   r*   r+   "meta__adaptive_avg_pool3d_backward
  s   
r  r  c                    s<   j }td|D ]tdk fdd qd S )Nr   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r  r*   rM  r  r   r*   r+   rM   
  s
   z3_adaptive_pool_empty_output_check.<locals>.<lambda>)r   r   rA   rP   r   )r  rM  r   r*   r  r+   r  
  s   r  c                    s"  j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}j d	krGd}|d7 }|d }|\}}j d
krm|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS )Nr}   r  c                      ro   )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r  r*   r   r*   r+   rM   
  rs   z*meta_adaptive_max_pool2d.<locals>.<lambda>r   r   c                         dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r*   r   r   r*   r+   rM   
  
   r   c                   S   rR   )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r*   r*   r*   r*   r+   rM   
  rT   r  r}   rp   r   )r   rA   rP   r   r   r   rw   r   r<   ra  r   )r   r  r   ZdimHsizeBsizeDosizeHosizeWr   r   r   r   r*   r  r+   meta_adaptive_max_pool2d
  sD   







r  c                    sd    j }t|dv  fdd t d tj jk fdd t}jj	|dS )Nr  c                      ro   )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r  r*   r  r*   r+   rM   
  rs   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      rw  r  rp   r*   r  r   r*   r+   rM   
  r  r   )
r   rA   rP   r  rH   r<   ra  rw   rx   r   )r  r   r   r   r   r*   r  r+   !meta_adaptive_max_pool2d_backward
  s   



r  c                    s   j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}|d	krFd}|d7 }|}|\}}}|d
kr[||||f}	n|||||f}	|	}
j|	tjd}|
|fS )Nr  c                      ro   )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r  r*   r  r*   r+   rM   
  rs   z*meta_adaptive_max_pool3d.<locals>.<lambda>r   r   c                      r  )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r*   r  r*   r+   rM   
  r  r}   c                   S   rR   )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r*   r*   r*   r*   r+   rM      rT   r  r  rp   )r   rA   rP   r   r   r   rw   r   )r   r  r   ZdimDr  r  ZosizeTr  r  r   r   r   r*   r  r+   meta_adaptive_max_pool3d
  s8   





r  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)r  rw   rx   )r  r   r   r*   r*   r+   !meta_adaptive_max_pool3d_backward  s   
r  c                 C   s   |d u rt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r   rw   )repeatsr  r*   r*   r+   meta_repeat_interleave_Tensor   s   
r  c                 C   s:   | j jsJ |j jsJ t| j|j}| j|t| j dS r  )rH   r   r   rx   rw   r   )realimagr   r*   r*   r+   meta_complex'  s   r  )
fill_valuer   r  c                C   s   | j ||  ftjdS r  )rw   ri   rA   rt   )ry   r   r  r*   r*   r+   nonzero_static0  s   r  c              
      s  t tdd  g }tD ]q\ d ur|t jt jt jt jt jfv dd  jt jt jfv rv }t	|t 
j jkfdd tjD ]#t 
j j  k fdd ||d qQq| q| q|t t	jkfdd dd lm} t|j t	jk rd  t	jk sd}d	}D ]|dkrǈd urd}q|dkr҈d u rd
}qd ur nqd}|sg }g }tD ]\ d ur|  | qtD ]\ d u r|  | q||g }	g }
g }tD ]&\}d u rB|r8|
j|  q"|	j|  q"tj}q"|	| |
 S )Nc                   S   rR   )Nz#at least one index must be providedr*   r*   r*   r*   r+   rM   8  rT   z#meta_index_Tensor.<locals>.<lambda>c                   S   rR   )Nz?tensors used as indices must be long, int, byte or bool tensorsr*   r*   r*   r*   r+   rM   @  rT   c                      ro   )N)too many indices for tensor of dimension rl  r*   r   r*   r+   rM   G  rs   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor r  r*   )r   rr   jr   ry   r*   r+   rM   L  s
    r   c                      s   dj  dt  dS )Nr  z (got r^   )r   r   r*   )r   ry   r*   r+   rM   W  r  r   Fr   T)rA   rP   rD  	enumeraterH   rt   r  r   nonzeror   ru   r   r   rx   r   selecttorch._refsZ_refsr   r    r   rw   )ry   r   r   r  refsstateZhas_contiguous_subspacerP  Ztransposed_indicesZbefore_shapeZafter_shapeZreplacement_shaperi   r*   )r   rr   r   r  r   ry   r+   meta_index_Tensor6  s   








r  c                 C   sT   d }d }d }|
d r|  | }|
d r|  | }|
d r%|  |}|||fS )Nr   r   r   rw   r   )grad_output_input_weight_Zbias_sizes_optr   r  rT  Z
transposedrW  rV  output_maskZbackend_grad_inputZbackend_grad_weightZbackend_grad_biasr*   r*   r+   meta_convolution_backward  s   

r  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko^|  d|kd
d  | |   S )Nr   r   r}   c                   S   rR   r-  r*   r*   r*   r*   r+   rM     rT   zmeta_addbmm.<locals>.<lambda>c                   S   rR   r.  r*   r*   r*   r*   r+   rM     rT   r   c                         d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   r~   r   r*   r/  r0  r*   r+   rM     r1  c                
      6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (r   r7   r   r~   r^   r   r*   r  r*   r+   rM     
   c                   S   rR   )Nz.self tensor does not match matmul output shaper*   r*   r*   r*   r+   rM     rT   )r   r  rA   rP   ri   rw   )ry   r/  r0  r,  r   r9  r:  r*   r  r+   meta_addbmm  s$   

r  c                    r$   )Nc                    s    fdd}t |  S )Nc                    s<   t | dd }tt|dd}tt| t |d d S )N.r   Z	_foreach_ 
_scalar_op)rR  splitgetattrr"   replacer   r   r   )r'   Zop_nameZ	scalar_opr(   r*   r+   r,     s   z8register_meta_foreach.<locals>.wrapper.<locals>.register)r.   r/   r0   opsr(   r+   r1     s   z&register_meta_foreach.<locals>.wrapperr*   )r  r1   r*   r  r+   register_meta_foreach  s   r  r  c                    s  t td tfdd td t dkdd  d}tdd  D ]8\ t trG|d7 }t t k fdd q)t trat   dkoY  dk fdd q) g }t	D ]!fdd	t	|D }|
| g ||d  R i | qh|S )
Nr   c                      s   dt  d  dS )Nz1The first argument must be List[Tensor], but got r   r  r_   r*   r>   r*   r+   rM     r  z,_meta_foreach_out_of_place.<locals>.<lambda>c                   S   rR   )Nz*Tensor list must have at least one tensor.r*   r*   r*   r*   r+   rM     rT   r   c                      s    dd  d dt   dS )Nzself and argument-r   z must match in length, but got r~   r  r  r*   )rX   iargnelemr*   r+   rM   "  s
   c                      s   d    d   dS )Nz.scalar tensor expected to be 0 dim but it has z dimensions and 
 elements.)ri   rv   r*   )rX   r*   r+   rM   *  
   c                    s   g | ]} |  qS r*   r*   rN  )r>   elemr*   r+   r:   4  r  z._meta_foreach_out_of_place.<locals>.<listcomp>)rA   rP   rV   r   r   r  r   ri   rv   r   r   )r  r>   kwargsZnlistsr   Z	each_argsr*   )rX   r>   r  r  r  r+   _meta_foreach_out_of_place  s8   *




(r  c                 O   s   t |d| i| d S Nr  )r  )r  r>   r  r*   r*   r+   _meta_foreach_inplace:  s   +r  c                    s(   t t t fdd dd  D S )Nc                         dt   S Nz'exponent must be a tensor list but got r	  r*   exponentr*   r+   rM   o  rL  z5meta__foreach_pow_scalar_and_tensor.<locals>.<lambda>c                 S      g | ]}t |qS r*   rA   r   )r6   er*   r*   r+   r:   q  r;   z7meta__foreach_pow_scalar_and_tensor.<locals>.<listcomp>rA   rP   rV   r   )ry   r  r*   r  r+   #meta__foreach_pow_scalar_and_tensori  s
   
r  c                    sX   t ttot t fdd t tdko"tt k fdd d S )Nc                         dt  dt   dS )Nz9The first two arguments of must be List[Tensor], but got r~   r  r	  r*   r   ry   r*   r+   rM   w  r  z3_check_foreach_binop_tensor_lists.<locals>.<lambda>r   c                      r  )Nz>self and other must be non-empty and match in length, but got r~   r  r  r*   r  r*   r+   rM   ~  r  )rA   rP   rV   r   r   ry   r   r*   r  r+   !_check_foreach_binop_tensor_listst  s   r   c                  G   s   t | dtjiS r  )r  r"   Z	clamp_minr
  r*   r*   r+   meta__foreach_binop_scalar  s   r!  c                  G   s   t | dtji d S r  )r  r"   Z
clamp_min_r
  r*   r*   r+   meta__foreach_binop__scalar  s   r"  c                    s~   t tdd  fD  fdd t t dkdd  t t tko3t tkdd  dd	  D S )
Nc                 s   rU   r%   rV   r   r6   lr*   r*   r+   rY     rZ   z.meta__foreach_addcop_scalar.<locals>.<genexpr>c                      "   dt   dt  dt  S )Nz,All arguments must be List[Tensor], but got r]   , and r	  r*   ry   tensor1tensor2r*   r+   rM        z-meta__foreach_addcop_scalar.<locals>.<lambda>r   c                   S   rR   Nz$input tensor list must not be empty.r*   r*   r*   r*   r+   rM     rT   c                   S   rR   Nz0All input tensor lists must have the same lengthr*   r*   r*   r*   r+   rM     rT   c                 S   r  r*   r  r6   rY  r*   r*   r+   r:     r;   z/meta__foreach_addcop_scalar.<locals>.<listcomp>rA   rP   r  r   ry   r)  r*  Zscalarr*   r(  r+   meta__foreach_addcop_scalar  s   	r1  c                    s   t tdd fD ot t j fdd t tdkdd  t ttko:ttkdd  d S )Nc                 s   rU   r%   r#  r$  r*   r*   r+   rY     rZ   z.meta__foreach_addcop_tensor.<locals>.<genexpr>c                	      s,   dt  dt  dt  dt   S )Nzi_foreach_addc*_ op expects arguments of type: List[Tensor], List[Tensor], List[Tensor], tensor, but got: r]   r'  r	  r*   ro  ry   r)  r*  r*   r+   rM     s   z-meta__foreach_addcop_tensor.<locals>.<lambda>r   c                   S   rR   r,  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r-  r*   r*   r*   r*   r+   rM     rT   )rA   rP   r  rV   r   r   )ry   r)  r*  ro  r*   r2  r+   meta__foreach_addcop_tensor  s   
r3  c                    st   t tdd  fD  fdd t t dkdd  t t tko3t tkdd  d S )Nc                 s   rU   r%   r#  r$  r*   r*   r+   rY     rZ   z/meta__foreach_addcop__scalar.<locals>.<genexpr>c                      r&  )Nz?All arguments of _foreach_addc*_ must be List[Tensor], but got r]   r'  r	  r*   r(  r*   r+   rM     r+  z.meta__foreach_addcop__scalar.<locals>.<lambda>r   c                   S   rR   r,  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r-  r*   r*   r*   r*   r+   rM     rT   r/  r0  r*   r(  r+   meta__foreach_addcop__scalar  s   r4  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                      r  r  r	  r*   r%  r*   r+   rM     rL  z#meta__fused_adam_.<locals>.<lambda>r  )ry   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer5  r6  r*   r7  r+   meta__fused_adam_  s   
rD  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      r  r  r	  r*   r7  r*   r+   rM     rL  z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   r  r*   r  )r6   rH  r*   r*   r+   r:     r;   z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r*   )Ztensor_listr*   r*   r+   empty_like_list  s   z)meta__fused_adam.<locals>.empty_like_listr  )ry   r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  r5  r6  rE  r*   r7  r+   meta__fused_adam  s   
rF  c                    s   t   dkdd  t  dkdd  t  jt ju  fdd t jt ju fdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   rR   )Nza must be a 2D tensorr*   r*   r*   r*   r+   rM     rT   zmeta__int_mm.<locals>.<lambda>c                   S   rR   )Nzb must be a 2D tensorr*   r*   r*   r*   r+   rM     rT   c                      ro   )Nzexpected self to be int8, got rp   r*   )rK  r*   r+   rM   !  rs   c                      ro   )Nzexpected mat2 to be int8, got rp   r*   )rL  r*   r+   rM   %  rs   r   r   c                
      r  )Nz'Incompatible matrix sizes for _int_mm (r   r7   r   r~   r^   r   r*   rJ  r*   r+   rM   )  r  rp   )rA   rP   ri   rH   r   r   rw   r   rJ  r*   rJ  r+   meta__int_mm  s   



 rG  c                    sp   t   dkdd  t  jt ju  fdd  d} d} j|d ||d  d	|d ft jd
S )Nr   c                   S   rR   )Nzw must be a 2D tensorr*   r*   r*   r*   r+   rM   3  rT   z2meta__convert_weight_to_int4pack.<locals>.<lambda>c                      ro   Nzexpected w to be int32, got rp   r*   r{  r*   r+   rM   6  rs   r   r      r       rp   )rA   rP   ri   rH   r   r   rw   )r{  Zinner_k_tilesr   r   r*   rI  r+    meta__convert_weight_to_int4pack1  s   




rL  c                    s   t  dkdd  t   dkdd  t jt ju fdd t  jt ju  fdd jd dd	 jd
S )Nr   c                   S   rR   )Nzx must be a 2D tensorr*   r*   r*   r*   r+   rM   G  rT   z*meta__weight_int4pack_mm.<locals>.<lambda>r  c                   S   rR   )Nzw must be a 4D tensorr*   r*   r*   r*   r+   rM   H  rT   c                      ro   )Nzexpected x to be bf16, got rp   r*   r   r*   r+   rM   K  rs   c                      ro   rH  rp   r*   rI  r*   r+   rM   O  rs   r   rJ  rp   )rA   rP   ri   rH   r   r   rw   r   )r7   r{  Zq_group_sizeZq_scale_and_zerosr*   )r{  r7   r+   meta__weight_int4pack_mmE  s   



"rM  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dv  fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                         d    dS )Nz1cdist only supports at least 2D tensors, X1 got: Dr|   r*   )x1r*   r+   rM   X  rN   z$meta_cdist_forward.<locals>.<lambda>c                      rN  )Nz1cdist only supports at least 2D tensors, X2 got: rO  r|   r*   )x2r*   r+   rM   \  rN   r{   c                      r  )Nz4X1 and X2 must have the same number of columns. X1: r{   z X2: r   r*   )rP  rQ  r*   r+   rM   `  r1  c                   S   rR   )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r*   r*   r*   r*   r+   rM   d  rT   c                   S   rR   )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r*   r*   r*   r*   r+   rM   h  rT   r   c                   S   rR   )Nz)cdist only supports non-negative p valuesr*   r*   r*   r*   r+   rM   j  rT   )Nr   r   c                      r[  )Nz%possible modes: None, 1, 2, but was: r*   r*   )compute_moder*   r+   rM   m  r]  r<  )rA   rP   ri   r   r<   is_float_dtyperH   rx   r   broadcast_shapesextendrw   )	rP  rQ  r'  rR  r1r2batch_tensor1batch_tensor2r   r*   )rR  rP  rQ  r+   meta_cdist_forwardT  s@   









rZ  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
 }|||g t|
}|dksE|dksE|dksE|dkrJt|S |t|j krV|	|}tj
|tjdS )Nr{   r<  r   r   )rx   r   rA   rT  copyrU  mathprod
zeros_liker  r   r   )r)  rP  rQ  r'  Zcdistc1rV  rW  rX  rY  r  Ztensor1_expand_sizeZbatch_productr*   r*   r+   meta_cdist_backwardx  s   



 

r`  c	                    s<  t  jt jt jfv  fdd t jt jt jfv fdd t tjfdd d}	|rEt |	dkdd  |	d8 }	|	d}
t	d\}}}d urt ||kd	d  t jjkfd
d t j
dkfdd t    k fdd fdddd fdd}tdkrʈ  d}  }||krĈ |	d}nT d}nN||
|}|||fv s|s d}nd}|	}jd }||kr|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      ro   )Nz(expected indices to be long or int, got rp   r*   r   r*   r+   rM     rs   z$meta_embedding_bag.<locals>.<lambda>c                      ro   )Nz(expected offsets to be long or int, got rp   r*   )r   r*   r+   rM     rs   c                      ro   )Nz/expected weight to be floating point type, got rp   r*   )r   r*   r+   rM     rs   r   r   c                   S   rR   Nz1include_last_offset: numBags should be at least 1r*   r*   r*   r*   r+   rM     rT   r}   c                   S   rR   )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r*   r*   r*   r*   r+   rM     rT   c                      rA  )Nzexpected weight (z) and per_sample_weights (z) to have same dtyperp   r*   )per_sample_weightsr   r*   r+   rM     r;   c                      rj  )Nz1expected per_sample_weights to be 1D tensor, got rO  rl  r*   )rc  r*   r+   rM     rL  c                      s   d   d    dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r^   rv   r*   )r   rc  r*   r+   rM     s   c                    s    | ||o| ddkS Nr   r   r   r   rE  r   padding_idx)is_fast_path_index_selectr*   r+   is_fast_path_index_select_scale  s   z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko| ddko|ddko|dk S Nr   r   )rH   rA   rD   rB   r   )r   r   rh  r*   r*   r+   ri    s   z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s"   |d ur| |||S  | ||S r%   r*   rg  )ri  rj  r*   r+   is_fast_path  s   z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   rR   rb  r*   r*   r*   r*   r+   rM     rT   )rA   rP   rH   rt   r  r<   rS  r   rw   r   r   rv   r  rx   )r   r   r   Zscale_grad_by_freqr  sparserc  Zinclude_last_offsetrh  Znum_bagsr   ZMODE_SUMZ	MODE_MEANZMODE_MAXrl  
offset2bagbag_sizemax_indicesZfast_path_sumZnumBagsr*   )r   ri  rj  r   rc  r   r+   meta_embedding_bag  s~   










rr  c                 G   sB   t | ||g|R  \}}}}t|dkr|| }||||fS )Nrm  )rr  r  rw   r   )r   r   r   r>   r   ro  rp  rq  r*   r*   r+   meta_embedding_bag_forward_only  s   rs  c                 C   s.   |r|S | j js| j jr| j S |rtjS | j S r%   )rH   r   r   rA   rt   )r   rH   promote_int_to_longr*   r*   r+   _get_reduction_dtype  s   ru  rp   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)rt  rp   )ru  r<   r  rx   r  rw   )r   rP  r  rH   r  r   r*   r*   r+   meta_nansum  s   rv  c                 C   s$   t | jtt|  }| |S r%   )r<   rQ  rx   rO   r   ri   rw   )r   r   r*   r*   r+   meta_median  s   
rw  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nr  zmedian CUDA with indices outputrp   )	r  r<   alert_not_deterministicr  rx   r  rw   rA   rt   )r   ri   r  r   r*   r*   r+   meta_median_mode_dim   s   
ry  c                 C   r   r%   r*   r   r*   r*   r+   meta_logical_not_6  r!  rz  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   rR   )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr*   r*   r*   r*   r+   rM   ?  rT   zmeta_repeat.<locals>.<lambda>r  c                    s   g | ]
} | |  qS r*   r*   rN  Zpadded_sizer  r*   r+   r:   F  r1  zmeta_repeat.<locals>.<listcomp>)rA   rP   r   ri   rO   rx   r   rw   )ry   r  Znum_new_dimensionsZtarget_sizer*   r{  r+   meta_repeat;  s   
r|  c                 C   r   r%   r*   r   r*   r*   r+   
meta_zero_J  r!  r}  c                 C      t |tjrt| j|j | S r%   rV   rA   r   rQ   rx   r  r*   r*   r+   meta_binop_inplaceO  s   r  c                 C   r~  r%   r  )ry   r   r   r*   r*   r+   meta_binop_inplace_alpha`  s   	r  c                 K      t | tjdS Nr3   )r@   r   r=   )ry   r  r*   r*   r+   
meta_roundn  s   r  c                    sl   t tj fdd tt jr&t tj fdd d S t tt fdd d S )Nc                           dj  S )Nz7: Expected input tensor to have an integral dtype. Got rp   r*   )rT  ry   r*   r+   rM   x  rN   z#shift_dtype_check.<locals>.<lambda>c                      r  )Nz6: Expected shift value to have an integral dtype. Got rp   r*   rT  r  r*   r+   rM   }  rN   c                      s     d S )Nz): Expected shift value to be an int. Got r*   r*   r  r*   r+   rM     rL  )rA   rP   r<   r  rH   rV   r   r   rT  ry   r  r*   r  r+   shift_dtype_checku  s   

r  c                 C      t d| | t| |tjdS )Nrshiftr  r  r@   r   r=   r  r*   r*   r+   meta_rshifts     r  c                 C   r  )Nlshiftr  r  r  r*   r*   r+   meta_lshifts  r  r  c                 C      |  | jS r%   rs  r   r*   r*   r+   	meta_zero     r  c                 C   r   r%   r*   ry   r  r*   r*   r+   
meta_fill_  r!  r  c                 C   
   t | S r%   r  r  r*   r*   r+   	meta_fill     
r  c                 C   r   r%   r*   r   r*   r*   r+   
meta_relu_  r!  r  c                 C   r  r%   r  ry   r   r   
accumulater*   r*   r+   meta_index_put  r  r  c                 C   s   t | j|j | S r%   )rQ   rx   )ry   rF  valuer*   r*   r+   meta_masked_fill_  s   r  c                 C   s:   t |jt jt jfv dd  t | j|jkdd  | S )Nc                   S   rR   )NzMask must be bool or uint8r*   r*   r*   r*   r+   rM     rT   z&meta_masked_scatter_.<locals>.<lambda>c                   S   rR   )Nzdmasked_scatter: expected self and source to have same dtypes but got {self.dtype} and {source.dtype}r*   r*   r*   r*   r+   rM     rT   )rA   rP   rH   rD  uint8)ry   rF  r   r*   r*   r+   meta_masked_scatter_  s   
r  c                 C   s*   t | |\} }tj| tjd}t|||S r   )r    rA   r   r   r  )ry   rF  r   r   r*   r*   r+   meta_masked_scatter  s   r  c                 C   s
   |  |S r%   r
  )ry   rF  r  r*   r*   r+   meta_masked_scatter_backward  r  r  c                 C   r   r%   r*   r  r*   r*   r+   meta_index_put_  r!  r  c                 C   r  r%   )viewrx   r   r*   r*   r+   
meta_alias  r  r  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d koB d k fdd |}|sqd urqt  dkd	d  t  kfd
d |S )Nr}   c                   S   rR   r-  r*   r*   r*   r*   r+   rM     rT   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   rR   r.  r*   r*   r*   r*   r+   rM     rT   r   r   r   c                	      r2  r3  r*   r*   r5  r*   r+   rM     s    c                   S   rR   )Nzself must be a 3D tensorr*   r*   r*   r*   r+   rM     rT   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: r   r*   )r  self_baddbmmr*   r+   rM     r  )rA   rP   ri   r   rw   )r/  r0  Zis_bmmr  r;  Zres_rowsZres_colsr   r*   )r6  r7  r8  r  r  r+   common_meta_baddbmm_bmm  s*   


r  c                 C   s   t | |dS )NT)r  )ry   mat2r*   r*   r+   meta_bmm  r  r  c                 C   s<   | | }| | }|dkrt |dk t |dk kr|d8 }|S re  )rD  )r7   yqr  r*   r*   r+   div_rtn  s
    r  c                 C   sZ   t | | | ||d   d |r|d nd |d }|r+|d | | | kr+|d8 }|S rk  )r  )	inputSize
kernelSizer  r  r   rT  r  Z
outputSizer*   r*   r+   pooling_output_shape_pad_lr	  s*   
	r  c                    s^   t |dkdd  t dkfdd t  d k fdd t|  |||S )Nr   c                   S   rR   )Nzstride should not be zeror*   r*   r*   r*   r+   rM     rT   z&pooling_output_shape.<locals>.<lambda>c                      r[  )Nz'pad must be non-negative, but got pad: r*   r*   )padr*   r+   rM      r]  r   c                      rJ   )Nz7pad should be at most half of kernel size, but got pad=z and kernel_size=r*   r*   r  r  r*   r+   rM   #  rN   )rA   rP   r  )r  r  r  r   rT  r  r*   r  r+   r    s   
r  c              	      sN     }tdkodkdd  t|dko|dkdd  t|dko+|dkdd   ddko= ddk}|tjkrWt|dkoQ|oQ d	dkd
d  n"t|d	krf ddkrf|pr|dkor|or d	dk fdd td 
kod 	k	
fdd tdkodkfdd d S )Nr   c                   S   rR   )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r*   r*   r*   r*   r+   rM   @  rT   z$pool2d_shape_check.<locals>.<lambda>c                   S   rR   )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r*   r*   r*   r*   r+   rM   D  rT   c                   S   rR   )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r*   r*   r*   r*   r+   rM   H  rT   r   r   r  r}   c                   S   rR   )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r*   r*   r*   r*   r+   rM   P  rT   c                         d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r*   r  r*   r+   rM   W  rL  c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r*   r*   )r  r  r  r  r*   r+   rM   \  s    c                      s*   d d  d d d d dS NzGiven input size: (r7   z). Calculated output size: (z). Output size is too smallr*   r*   )r  r  r  r  r  r  r*   r+   rM   b  s    )ri   rA   rP   r   rb  )r   r  r  r  r  r  r  	dilationH	dilationWr  r  r  r  r  r   r   Z
valid_dimsr*   )r   r  r  r  r  r  r  r  r  r  r  r+   r  *  sB   

r  r  r  r  r  r  r  r  pTpHpW	dilationTr  r  r  r  r  r  r  r  r  c              
      s  	j }tdkodkodkfdd tdko&dko& dk fdd tdko<dko<dkfdd t|dv 	fdd t|D ]|dkradkraqVt	dk	fd	d qV|rt
kokok
fd
d td kod kod kfdd tdkodkodk
fdd d S )Nr   c                         d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r*   r*   )r  r  r  r*   r+   rM        z$pool3d_shape_check.<locals>.<lambda>c                      r  )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r*   r*   )r  r  r  r*   r+   rM     r  c                      r  )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r*   r*   )r  r  r  r*   r+   rM     r  r  c                      r  )Nz/: Expected 4D or 5D tensor for input, but got: r  r*   )rT  r   r*   r+   rM     rN   r  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)rx   r   r*   rT  r   r   r*   r+   rM     s
   c                      s*   d d  d d d d dS )Nzinput image (T: r  r  z ) smaller than kernel size (kT:  kH:  kW: r^   r*   r*   )r  r  r  r  r  r  r*   r+   rM     s   r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r*   r*   )r  r  r  r  r  r  r*   r+   rM     s   r   c                      s6   d d d  d d d d d dS r  r*   r*   )r  r  r  r  r  r  r  r*   r+   rM     s   )r   rA   rP   r   r   )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rT  r  r   r*   )r  r  r  r  r  r  rT  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r+   r  h  sJ   	"r  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nr  r}   r   r   r   r  r  )r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rT  r   r*   r*   r+   max_pool3d_backward_shape_check  s@   r  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nr   Tr  r}   r   r  )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rT  r   r*   r*   r+   r    s:   r  c                 C   sB  dd }|d|\}}t t|dv dd  t|dkr#||}	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkr^t |  dkdd  n|t jkrpt |  dv dd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    r  )Nr  c                      r  )Nzmax_pool2d: r  r*   r*   r  r*   r+   rM   @  rs   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   r   r  r  r*   r  r+   r  =  r  z3max_pool2d_checks_and_compute_shape.<locals>.unpackr\  r  c                   S   rR   )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr*   r*   r*   r*   r+   rM   J  rT   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r   r  rT  r  r<  r{   r  c                   S   rR   )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr*   r*   r*   r*   r+   rM   [  rT   r  c                   S   rR   )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr*   r*   r*   r*   r+   rM   `  rT   Fc                   S   rR   )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr*   r*   r*   r*   r+   rM   e  rT   )rA   rP   r   r   r<   ra  rb  ri   r   r  r  )r   r\  r   r  rT  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r*   r*   r+   r  9  sb   	









r  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      rw  )NzExpected dtype z  for `gradOutput` but got dtype rp   r*   r  r*   r+   rM     r  z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )Nr}   r   r   )r  )rH  )r  r   r  r  r*   r+   _check_dim_size  s   z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizer  )
r  rA   rP   rH   r   r<   ra  rl   rx   rf   )r  ry   r\  r   r  rT  r  r   r  r  r   r*   )r  r  r   r  r  ry   r+   %meta_max_pool2d_with_indices_backward  s.   

r  c                 C   s   t | |||||\}}}|  dkr| dnd}	t| }
|  dkr*|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS r  )
r  ri   r   r<   ra  rA   rl   rH   rf   r   r  r*   r*   r+   meta_max_pool2d_with_indices  s2   
r  c           	         s  t d tjtjkfdd ttdkfdd \}}tjdv fdd tjjkfdd t	d	jD ] t
 d
k fdd qG }jdkrr|
d
}||||f}|S |
d
}|
d	}|||||f}|S )NZmax_unpooling2d_forward_outc                      ro   )Nz2elements in indices should be type int64 but got: rp   r*   ra  r*   r+   rM     rs   z#meta_max_unpool2d.<locals>.<lambda>r   c                         dt   dS )NzMThere should be exactly two elements (height, width) in output_size, but got r  r  r*   r  r*   r+   rM        r  c                      rj  )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.rl  r*   )self_r*   r+   rM     r  c                      rw  NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  r*   )r   r  r*   r+   rM     r~  r   r   c                      r  )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got r   being empty.r  r*   )r   r  r*   r+   rM     s
   r}   )r<   rx  rA   rP   rH   r   r   r   rx   r   r   r   rw   )	r  r   r  r  r  ry   	nchannelsr   r  r*   )r   r   r  r  r+   meta_max_unpool2d  s@   






	



r  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ]t dk fdd qXt d dkod
 dkod dkfdd d S )Nc                   S   rR   )Nz(elements in indices should be type int64r*   r*   r*   r*   r+   rM   	  rT   z._max_unpooling3d_shape_check.<locals>.<lambda>r  c                      rj  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r  rl  r*   r  r*   r+   rM     rL  r}   c                      r  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got r  r  r*   r  r*   r+   rM     r  c                      r  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: r  r  r*   rf  r*   r+   rM     rN   c                      r  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: r  r  r*   )r  r*   r+   rM     rN   c                      rw  r  r  r*   )r   r   r*   r+   rM      r~  r   r   c                      s     dj  d dS )NzI: Expected input to have non-zero size for non-batch dimensions, but got r  r  r  r*   r  r*   r+   rM   )  s
   r   c                      r[  )Nz5strides should be greater than zero, but got stride: r*   r*   rf  r*   r+   rM   2  r]  )	rA   rP   rH   r   r   r   rx   r   r   )r   r   r  r   r  rT  r*   )rT  r   r   r   r  r  r   r+   _max_unpooling3d_shape_check  s@   







	"
r  c                 C   s   t d t| ||||d |  }|\}}}| jdkr,|d}	||	|||f}
|
S |d}|d}	|||	|||f}
|
S )NZmax_unpooling3d_forward_outzmax_unpooling3d()r  r   r   )r<   rx  r  r   r   r   rw   )r  r   r  r   r  ry   Zodepthr  r  r  r   r  r*   r*   r+   meta_max_unpool3d6  s   





r  c                 C   s  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }t | p2t|dv dd  |s;|n|d }	|sC|nt|dkrK|	n|d }
|sS|nt|dkr[|	n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t | jd
v dd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdkot| t j	k}| jdkr:| 
d}|  o2|jt j	d}||||f}n|||||f}| |}| j|t jd}|r_|jt j	d}|jt j	d}||fS )Nr  c                   S   rR   NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   Z  rT   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   r   r   c                   S   rR   NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   b  rT   c                   S   rR   NzImax_pool3d: padding must either be a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   j  rT   c                   S   rR   NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr*   r*   r*   r*   r+   rM   r  rT   r  c                   S   rR   r  r*   r*   r*   r*   r+   rM   z  rT   r  r  r  r<  r{   zmax_pool3d_with_indices()r  r   rp   )rA   rP   r   r   r   r  r  r<   ra  channels_last_3dr  r  rw   r   r   )r   r\  r   r  rT  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rb  input_channels_last_checkr   r   r   r*   r*   r+   meta_max_pool3d_with_indicesN  s   

  







r  c                 C   s^  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t |jd
v dd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr|	d}|
  o|j
t jd}||j}|r-|jt jd}|S )Nr  c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   r   r   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   r  c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   r  r  r<  r{   z"max_pool3d_with_indices_backward()r  r  r   )rA   rP   r   r   r   r  r<   ra  r  r  r  rw   rx   r   )r  r   r\  r   r  rT  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rb  r  r  r*   r*   r+   %meta_max_pool3d_with_indices_backward  s   
  









r  gridc                    s   t j jk fdd t jt jko jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]t j dkfd	d qPd S )
Nc                      rw  )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r%  r*   r  r   r*   r+   rM   !  rz  z+check_grid_sampler_common.<locals>.<lambda>c                      rw  )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )re   r*   r  r*   r+   rM   (  rz  r   c                      rw  )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r  r*   r  r*   r+   rM   /  rz  r{   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   rx   r*   r  r*   r+   rM   6  s   c                      r  )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r  r*   r  r*   r+   rM   ?  r  )rA   rP   rf   re   r  rx   r   r   )r   r  r*   )r  r   r   r+   check_grid_sampler_common  s,   
r  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   r   r   N)r`   
__module____qualname__ZBILINEARZNEARESTBICUBICr*   r*   r*   r+   r  F  s    r  interpolation_modec                    sP   t jdkoj jk fdd t jdko |tjjk dd  d S )Nr  c                      rw  )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r  r*   r  r*   r+   rM   O  s
   z'check_grid_sampler_3d.<locals>.<lambda>c                   S   rR   )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr*   r*   r*   r*   r+   rM   Z  rT   )rA   rP   r   r  r  r  )r   r  r  r*   r  r+   check_grid_sampler_3dL  s   

r  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r   )rA   r^  r   r   
r  r   r  r  padding_modealign_cornersr  Zinput_requires_gradr  	grad_gridr*   r*   r+   grid_sampler_2d_backward_meta^  s   
r  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   r   r   r}   )r  r  rx   rw   )
r   r  r  r  r  rI  CZout_DZout_HZout_Wr*   r*   r+   grid_sampler_3dq  s   
	




r  r  c           
      C   sP   t || t||| |d }|rtj|tjd}nd }tj|tjd}	||	fS r  )r  r  rA   r^  rp  r   r  r*   r*   r+   grid_sampler_3d_backward  s   
r  c                 O   s:   | dd }|st|}||d< tj| g|R i |S )NrH   )rG   r<   Z	get_dtyperA   rl   )r   r  r>   r  rH   r*   r*   r+   full  s
   
r  c                 C   s   |t jkrJt |d u dd  t jd|d u r| jn|||d u r"| jn||d}| jr8||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   rR   )Nz9memory format option is only supported by strided tensorsr*   r*   r*   r*   r+   rM     rT   zzeros_like.<locals>.<lambda>r   r   Tr$  )rA   Z
sparse_coorP   rl   rH   rf   	is_sparseZsparse_resize_and_clear_r   
sparse_dim	dense_dimri   Z_coalesced_r"   r   r   fill_)ry   rH   re   rf   rg   r   r  r*   r*   r+   r^    s:   
	

	r^  c                    s     }t|dkdd   dkr n |   }t |kp'|k  fdd dkr7n| t }t } |    }| = | = |||S )Nr   c                   S   rR   )Nz-select() cannot be applied to a 0-dim tensor.r*   r*   r*   r*   r+   rM     rT   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   r*   ri   rr   ry   r*   r+   rM     s
    )ri   rA   ru   r   r   r   r   r   )ry   ri   rr   r   r   new_sizer   Znew_storage_offsetr*   r  r+   meta_select  s$   
r  c                 C   r  r%   r<   Zclone_preserve_strides)ry   r   ri   rr   r*   r*   r+   meta_select_scatter  r  r  c                 C   r  r%   r  )ry   r   ri   rb   ra   stepr*   r*   r+   meta_slice_scatter  r  r  dim_post_exprwrap_scalarc                 C   sb   |dkr
|sJ d}| }|d }| |k s| |kr'J d|  d| d| d| dk r/| |7 } | S )Nr   r   zdim z out of bounds (r]   r^   r*   )ri   r  r  r+  r,  r*   r*   r+   r     s   ,r   c                 C   s   |   dkrdS | j| S re  r  )rH  ri   r*   r*   r+   ensure_nonempty_size  s   r  c                    st   t  d}t  d}t||kdd  t|D ] kr7tttk fdd qd S )Nr   c                   S   rR   )NzDIndex tensor must have the same number of dimensions as input tensorr*   r*   r*   r*   r+   rM     rT   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be smaller than self  apart from dimension r  r*   ri   r   rr   ry   r*   r+   rM     s    )r,  ri   rA   rP   r   r  )ry   ri   rr   	self_dimsZ
index_dimsr*   r  r+   gather_shape_check  s   r  c                    sR   t ||  }  dk}|s#t jtjk fdd t| |  |  j	S )Nr   c                      ro   )Nz2gather(): Expected dtype int64 for index, but got rp   r*   rq   r*   r+   rM   #  rs   zmeta_gather.<locals>.<lambda>)
r   ri   rv   rA   rP   rH   rt   r  rw   rx   )ry   ri   rr   Zsparse_gradwrapped_dimZis_index_emptyr*   rq   r+   meta_gather  s   

r   c                 C   s   |r*| dkrdS | dkrdS | dkrdS | dkrdS | d	kr d
S t ddd  d S | dkr0dS | dkr6dS t ddd  d S )NsumZ
REDUCE_ADDr]  ZREDUCE_MULTIPLYmeanZREDUCE_MEANZamaxZREDUCE_MAXIMUMZaminZREDUCE_MINIMUMFc                   S   rR   )Nz=reduce argument must be either sum, prod, mean, amax or amin.r*   r*   r*   r*   r+   rM   8  rT   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   rR   )Nz/reduce argument must be either add or multiply.r*   r*   r*   r*   r+   rM   @  rT   r  )reduce_use_new_optionsr*   r*   r+   get_operator_enum*  s,   r  c                    sT   |  dkrt|jtjk fdd |d ur(t|j|jk fdd d S d S )Nr   c                      
     dS )Nz"(): Expected dtype int64 for indexr*   r*   method_namer*   r+   rM   I  r]  z,scatter_gather_dtype_check.<locals>.<lambda>c                      r  )Nz0(): Expected self.dtype to be equal to src.dtyper*   r*   r	  r*   r+   rM   O  r]  )rv   rA   rP   rH   rt   )r
  ry   rr   src_optr*   r	  r+   scatter_gather_dtype_checkE  s   



r  c                 C   s
   t | dS r   )r,  r|   r*   r*   r+   ensure_nonempty_dimS  s   
r  c                    s     dkrd S tt t kdd  d}t }t|D ]}t|}| kr2q&|t|kr=d} nq&|s[d ur[t|D ]}t|}|t|krZd} nqHd urtt t kdd  t|  fdd d S t|  fdd d S )	Nr   c                   S   rR   NzCIndex tensor must have the same number of dimensions as self tensorr*   r*   r*   r*   r+   rM   ]  rT   z%scatter_shape_check.<locals>.<lambda>FTc                   S   rR   r  r*   r*   r*   r*   r+   rM   w  rT   c                      s&   dj  dj  d  dj   S )NExpected index r  r  z and to be smaller than src r  r*   ri   rr   ry   r  r*   r+   rM   {  s    c                      s   dj  dj  d   S )Nr  r  r  r  r*   r  r*   r+   rM     s    )rv   rA   rP   r  ri   r   r  )ry   ri   rr   r  Zis_wrong_shaper  r   Zindex_d_sizer*   r  r+   scatter_shape_checkX  sH   

r  c                 C   sD   t ||  }td| || t| ||| |d ur t|| d S d S )Nscatter)r   ri   r  r  r  )ry   ri   rr   r   r  r  r  r*   r*   r+   scatter_meta_impl  s   r  c                 C   s   t | |||d | | jS Nr  r  rw   rx   ry   ri   rr   r   r*   r*   r+   meta_scatter_add  s   r  c                 C   s   t | |||d | S r  r  r  r*   r*   r+   meta_scatter_add_  r6  r  c                 C   s0   t |tjr|nd }t| |||| | | jS r%   )rV   rA   r   r  rw   rx   ry   ri   rr   Zsrc_or_valuer   r   r*   r*   r+   meta_scatter  s   
r  c                 C   s(   t |tjr|nd }t| |||| | S r%   )rV   rA   r   r  r  r*   r*   r+   meta_scatter_  s   	r          queryr   r  	dropout_p	is_causalreturn_debug_maskrE  c                 C   s  |  d}|  d}|  d}	|  d}
| d}t| dkrqtj||	||
f| j| jddd}tj||	|ftj| jddd}||tjdtjddtjdtjddddtjdtj	ddtjdtj	ddtjd| j| jdf	S | dd}t
|dd}tj|||	ftj| jd}|r|
d	krd
nd}t|	| }|d
krd
}n|dkrd}tj|||	|f| j| jd}n
tjd| j| jd}||d d |	|tjdtj	ddtjdtj	dd|f	S )Nr   r   r   r}   rm  rH   rf   r*   rd   @         )r   r  rA   rl   rH   rf   rg  rD   r   rt   r   r\  ceil)r  r   r  r  r   r!  rE  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_k	attention	logsumexpZquery_tblocksize_cmax_seqlen_k
debug_maskr*   r*   r+   meta__scaled_dot_product_flash  s~   







r0  r  r,  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 C   s  t |dkr2t|dddd}t|dddd}t|dddd}|||fS |d}|d}|d}t |dkrL|dn|}t |dkrY|dn|	}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}|||fS )Nrm  r   r   r   r}   r   r   r   r}   r"  )r  rA   r   rg  r   empty_permutedrH   rf   )r  r  r   r  r   r,  r1  r2  r3  r4  r  r   r5  r6  rE  grad_qgrad_kgrad_vr   r'  r)  len_qZlen_kr*   r*   r+   'meta__scaled_dot_product_flash_backward  s:   







r=  	attn_biascompute_log_sumexpc                 C   s   |  dd} | dd}| dd}| d}| d}	|d}
| d}| d}|d}tj||	||| j| jd}|rHt|	d d nd}tj|||ftj| jd}| dd}tjdtj	d	d}tjdtj	d	d}||||fS )
Nr   r   r   r<  r{   r"  rK  r*   rd   )
rg  r   rA   rl   rH   rf   r\  r&  rD   rt   )r  r   r  r>  r?  r  r   rE  rQ  MrI  r'  KKvr  logsumexp_dim
logsum_expseedoffsetr*   r*   r+   "meta__scaled_dot_product_efficientK  s(   





rG  grad_input_maskc                 C   s  | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d ur|
d r| d}|d dkrb|n|d |d  }t|  }||d< tj||j|jd}|d	d |f }||||fS )
Nr   r   r   r}   r7  r"  r{   r   .)r   rA   r8  rH   rf   r   rl   )r  r  r   r  r>  r   r,  r5  r6  r  rH  r   rE  r   r'  r3  r)  Z
head_dim_vr4  r9  r:  r;  	grad_biaslastDimlastDimAligned	new_sizesr*   r*   r+   +meta__scaled_dot_product_efficient_backwardw  sF   









 
rM  c                 C   s   |  d}|  d}|  d}|  d}| d}t| }tj|||ftj| jd}|	rX|dkr3dnd}t|| }|dkrCd}n|dkrId}tj||||f| j| jd}n
tjd| j| jd}||tjd	tj	d
dtjd	tj	d
d|fS )Nr   r   r   r}   r"  r#  r$  r%  r*   rd   )
r   rA   r   rl   rD   rf   r\  r&  rH   rt   )r  r   r  r1  r2  r3  r4  r  r   r!  rE  r   r(  r'  r)  r*  r+  r,  r-  r.  r/  r*   r*   r+   meta__flash_attention_forward  s<   






rN  c                 C   s(   t |}t |}t |}|||fS r%   r  )r  r  r   r  r   r,  r1  r2  r3  r4  r  r   r5  r6  rE  
grad_querygrad_key
grad_valuer*   r*   r+   meta__flash_attention_backward  s   



rR  cu_seqlens_qcu_seqlens_kmax_seqlen_qcustom_mask_typecausal_diagonalseqlen_kc                 C   s   |  d}|  d}| d}|  d}|  d}| d}tj||||| j| jd}|	r6t|d d nd}tj|||ftj| jd}tjdtjdd}tjdtjdd}||||||fS )	Nr   r   r<  r{   r"  rK  r*   rd   )	r   rA   rl   rH   rf   r\  r&  rD   rt   )r  r   r  r   rS  rT  rU  r  rV  r?  rE  rW  rX  rQ  r@  rI  r'  rA  rB  r  rC  rD  rE  rF  r*   r*   r+   !meta__efficient_attention_forward
  s    





rY  r.  bias_requires_gradnum_splits_keyc                 C   s   t |}t |}t |}|d urE|d}|d dkr |n|d |d  }t| }||d< t j||j|jd}|dd |f }nt jd|jd}||||fS )Nr{   r   r   r"  .r*   r%  )rA   r   r   r   rl   rH   rf   )r  r  r   r  r   rS  rT  rU  r.  r,  r  r5  r6  rV  rZ  rE  r[  rO  rP  rQ  rJ  rK  rL  rI  r*   r*   r+   "meta__efficient_attention_backward5  s   



 r\  r  scale_ascale_bscale_resultuse_fast_accumc                    s8  dd }dd }	dd }
t  dko  dk fdd	 t | d
d	  t |	 j  dd	  t dd dkfdd	 t  dd dko_ dd dk fdd	 t |
joq|
 j fdd	 |d ur|nj}t jd d|jdt jdt j	jdfS )Nc                 S   s   | d | d ko| d dkS re  r*   rf  r*   r*   r+   is_row_majori     z$meta_scaled_mm.<locals>.is_row_majorc                 S   s   |d dko|d | d kS re  r*   )rx   r   r*   r*   r+   is_col_majorl  rb  z$meta_scaled_mm.<locals>.is_col_majorc                 S   s   | t jt jfv S r%   )rA   Zfloat8_e4m3fnZfloat8_e5m2rp   r*   r*   r+   is_fp8_typeo  r-   z#meta_scaled_mm.<locals>.is_fp8_typer   c                      s   d   d    S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r|   r*   r  ry   r*   r+   rM   t  r  z meta_scaled_mm.<locals>.<lambda>c                   S   rR   )Nzself must be row_majorr*   r*   r*   r*   r+   rM   x  rT   c                   S   rR   )Nzmat2 must be col_majorr*   r*   r*   r*   r+   rM   |  rT   r   r   r   c                      s   d  d S )NzBExpected self.size(0) to be divisible by 16, but got self.size(1)=r   r   r*   r   r*   r+   rM     rN   c                      ro   )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r  r*   )r  r*   r+   rM     rs   c                      rw  )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=rp   r*   re  r*   r+   rM     r  r"  r*   )
rA   rP   ri   r   rx   r   rH   rl   rf   rx  )ry   r  r   r   r]  r^  r_  r`  ra  rc  rd  Z
_out_dtyper*   re  r+   meta_scaled_mm^  sB   

"
rf  c                 C   s    t | ||||dd | | jS NT)r  r  ry   ri   rr   r   r   r   r*   r*   r+   meta_scatter_reduce_two  s   ri  c                 C   s   t | ||||dd | S rg  r  rh  r*   r*   r+   meta_scatter_reduce__two  s   rj  c                   sh   t d    k odkn   fdd   dkr&t j|t j jdS t j d|t j jdS )Nr   r   c                      r  )Nz@The probabilty distributions dimensions must be 1 or 2, but got r|   r*   r  r*   r+   rM     rL  z"meta_multinomial.<locals>.<lambda>r   r"  )rA   rP   ri   rl   rt   rf   r   )r   Znum_samplesreplacementr   r*   r  r+   meta_multinomial  s   
rl  c                 C   s   d}| D ]}||9 }q|S r   r*   )vsr  vr*   r*   r+   multiply_integers  s   
ro  c                    s   t tkfdd d  t t k fdd t tdd dd  D o9tdd D fdd d d \}}||gR S )Nc                         d  dt  S )Nz%It is expected output_size equals to , but got size r  r*   )num_spatial_dimsr  r*   r+   rM     r  z'upsample_common_check.<locals>.<lambda>r   c                      rp  )Nz$It is expected input_size equals to rq  r  r*   )expected_input_dimsr  r*   r+   rM     r  c                 s       | ]}|d kV  qdS r   Nr*   r.  r*   r*   r+   rY     r  z(upsample_common_check.<locals>.<genexpr>c                      r[   )NzDInput and output sizes should be greater than 0, but got input size z and output size r*   r*   )r  r  r*   r+   rM     s
    )rA   rP   r   r  )r  r  rr  r  Zchannelsr*   )rs  r  rr  r  r+   upsample_common_check  s   

*rv  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r  )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r*   r  r*   r+   rM     rL  z$upsample_nearest1d.<locals>.<lambda>rr  r   
rA   rP   rv   ro  r   rv  rw   r   r<   ra  )r   r  scalesfull_output_sizer*   r  r+   upsample_nearest1d     


r{  c           	         s   t   dkpt  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr?|dk r?t j}|j|d	}|S )
Nr   r   c                      r  Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r*   r  r*   r+   rM     rL  z$upsample_nearest2d.<locals>.<lambda>r   rw  r  r  r   )rA   rP   rv   ro  r   rv  rw   r<   ra  rx   rf   r_   r   r   )	r   r  scales_hscales_wrz  r   r   r?   Z
n_channelsr*   r  r+   upsample_nearest2d  s   



r  r  r  r~  r  c                    st   t ||dd tjdkfdd tdD ]t  k fdd q|jt	dS )Nr   rw  r  c                      ro   )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension rl  r*   r  r*   r+   rM     rs   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      s&   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(r   r*   rz  r  r   r*   r+   rM     s   r   )
rv  rA   rP   r   r   r   rw   r   r<   ra  )r  r  r  r~  r  r*   r  r+   upsample_nearest2d_backward  s   

	r  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   r   c                      r  )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r*   r  r*   r+   rM     rL  z$upsample_nearest3d.<locals>.<lambda>r}   rw  r   rx  )r   r  Zscales_dr~  r  rz  r*   r  r+   upsample_nearest3d  r|  r  c           
      C   s   t | t j| t jd}}|d urQ|d urQt|tsJ t|ts$J |j}| }	t||}t||}|||	 |||	 t	||d t	||d ||fS ||fS )Nrp   )r  r  )
rA   r   r   rV   r   rx   r   r   r   r   )
ry   stableri   
descendingr   r   rn  r   r   Z
out_strider*   r*   r+   	meta_sort$  s   	

r  )ri   r  c                C   s   t | |||dd S )N)r  ri   r  r   )r  )ry   r  ri   r  r*   r*   r+   meta_argsort?  s   r  c                    s  t jdkfdd t jjkfdd dd urPt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                          j  dS Nz != 2rl  r*   input_gatesr*   r+   rM   G  rs   z%rnn_cell_checkSizes.<locals>.<lambda>c                         j  d j  S N != r  r*   )hidden_gatesr  r*   r+   rM   J      r   c                      r  )Nz != 1rl  r*   )
input_biasr*   r+   rM   N  rs   c                      s      d  S r  rd  r*   )
gates_sizer  r*   r+   rM   Q  r  c                      r  r  r  r*   )hidden_biasr  r*   r+   rM   U  r  c                      r  r  rl  r*   )prev_hiddenr*   r+   rM   W  rs   r   c                
      s,      dd d d d  d
S )Nr  r   z * z // z (aka r^   )rv   r   r*   )expected_prev_hidden_numelfactorr  r  r  r*   r+   rM   [  s   , c                 3   s    | ]	}|j  j kV  qd S r%   r%  r5   r  r*   r+   rY   ^  s
    

z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   rR   )Nz%expected all inputs to be same devicer*   r*   r*   r*   r+   rM   b  rT   )rA   rP   r   rx   r   rv   r  )r  r  r  r  r  r  r*   )r  r  r  r  r  r  r  r  r+   rnn_cell_checkSizesD  s8   





r  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nr  r   )r  rA   r   r   )r  r  cxr  r  	workspacehycyr*   r*   r+   _thnn_fused_lstm_cell_metaf  s
   
r  c                 C   s(  t |dk}|rt |}|d }| jd }n|
r| jd n| jd }|
r)| jd n| jd }d}|r4dnd}|dkr<|n|}|rG||| g}n|
rP|||| gn|||| g}| |}|	| ||g}|d u rptjd| jd}n||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   r   r{   r   r%  rp   )r   rx   rw   rA   rl   rf   r  )r   r   Zweight_stride0Z
weight_bufhxr  r  hidden_sizeZ	proj_size
num_layersbatch_firstZdropouttrainbidirectionalbatch_sizesZdropout_stateZis_input_packed
seq_length
mini_batchZbatch_sizes_sumZnum_directionsZout_sizer   r   Z
cell_shaper  r  Zreserve_shapeZreserver*   r*   r+   
_cudnn_rnnq  s2   

r  c                 C   s   |r| j d n| j d }|r| j d n| j d }|
}|r!|||gn|||g}| |}|d u r8tjd| jd}n||j }|d u rKtjd| jd}n||j }tjd| jtjd}||||fS )Nr   r   r%  r   )rx   rw   rA   rl   rf   r  )r   Zw0Zw1Zw2Zw3hx_Zcx_r   r  r  r  r  
has_biasesr  r  r  r  r  Zoutput_chanelsr   r   r  r  r  r*   r*   r+   mkldnn_rnn_layer  s    
r  c                    sT   | j dkrt dkp dk fdd d S t|  dk fdd d S )Nr   r{   c                      rJ  )Nz4: Expected reduction dim -1 or 0 for scalar but got r*   r*   ri   rT  r*   r+   rM     rL  z'zero_numel_check_dims.<locals>.<lambda>c                      rN  )Nz: Expected reduction dim z to have non-zero size.r*   r*   r  r*   r+   rM     rN   )r   rA   ru   r   )ry   ri   rT  r*   r  r+   zero_numel_check_dims  s   
r  c                    sF   |d urt || }t||  d S t| dk fdd d S )Nr   c                      r  )Nz@: Expected reduction dim to be specified for input.numel() == 0.r*   r*   r  r*   r+   rM     r]  z%check_argmax_argmin.<locals>.<lambda>)r   ri   r  rA   rP   rv   )r@  ry   ri   r*   r  r+   check_argmax_argmin  s   

r  c                 C   sD   t d| | t| j|d ur|fnd }t| ||}| j|tjdS )Nargmaxrp   )r  r<   r  rx   r  rw   rA   r   )ry   ri   r  rP  rx   r*   r*   r+   argmax_argmin_meta  s   r  c                 C   s   t jd||||dS )Nr*   r   r   )rY  rH   re   rf   rg   r*   r*   r+   scalar_tensor  r   r  c                 C   s   t ||  dd}t|dko||  dkr| |ndkdd  |  dkr*dn| |}t|dko8||kdd  t| j}t|dkrL|||< | || j|tj	dfS )	NT)r  r   r   c                   S   rR   )Nzselected index k out of ranger*   r*   r*   r*   r+   rM     rT   ztopk_meta.<locals>.<lambda>c                   S   rR   )Nzk not in range for dimensionr*   r*   r*   r*   r+   rM     rT   rp   )
r   ri   rA   rP   r   r   rx   r   rw   r   )ry   r   ri   ZlargestsortedZ	sliceSizeZtopKSizer*   r*   r+   	topk_meta  s   $
r  c                 C   s   | d ur| n|}t | dkdd  | }| d ur(t |  |kdd  |d ur8t | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   rR   Nr   r*   r*   r*   r*   r+   rM     rT   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   r   r   r  c                   S   rR   r  r*   r*   r*   r*   r+   rM     rT   )rA   rP   ri   r   rv   )grad_hygrad_cyr  r  r  Zdefined_gradZexp_sizer*   r*   r+   checkLSTMBackwardSizes  s   ,r  c           	      C   s`   | d u r
|d u r
dS t | |||| tj|td}tj|td}|r)|jdddnd }|||fS )NNNNr   r   F)r  )r  rA   r   legacy_contiguous_memory_formatr  )	r  r  r  r  r  Zhas_biasZ
grad_gatesZgrad_cxrI  r*   r*   r+   #_thnn_fused_lstm_cell_backward_impl  s   
r  c                 C   sf   d }d }d }|d r| |  }|d s|d r.| |d| df}| |d}|||fS )Nr   r   r   r{   r  )r  r  r  r  r  Zgrad_weightrI  r*   r*   r+   linear_backward+  s   
r  c                    s   t jdkrjd ||  dksJ dj d| dd   fdd	}jd ||  }jd
 | }jd | }g jd d |||R }|}|j| d}|S )Nr   r  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   r^  r%   r_  rc  r*   r*   r+   re  >  rf  z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sL    rt dkrtjS tjS jtjdrtjS jtjdr$tjS d S rg  )r  rA   r   rb  r  rh  r*   re  ry   r*   r+   rj  A  s   z.meta_pixel_shuffle.<locals>.pick_memory_formatr<  r{   r   )r   rx   rw   r   )ry   Zupscale_factorrj  r  ZHrZWrr   r   r*   r  r+   meta_pixel_shuffle8  s   & 
r  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r%   rs  )r   Zweight0Zweight1Zweight2Zweight3r  Zcx_tmpr   Zhy_Zcy_Zgrad_output_r_optZgrad_hy_r_optZgrad_cy_r_optr   r  r  r  r  r  r  r  r  r  Zdiff_xZdiff_hxZdiff_cxZdiff_w1Zdiff_w2Zdiff_br*   r*   r+   mkldnn_rnn_layer_backwardV  s   r  )	out_int32r   c                C   s   t j| |rt jnt jd S r  )rA   r   r   r   r   )ry   Z
boundariesr  r   r*   r*   r+   meta_bucketizey  s
   r  c                    sd   t   |dd}t  dkptdd   dd  D  fdd  |jt	 d	S )
Nr   rw  r   c                 s   rt  ru  r*   )r6   r   r*   r*   r+   rY     r  z.meta_upsample_bilinear2d_aa.<locals>.<genexpr>r   c                      r  r}  r   r*   r  r*   r+   rM     rL  z-meta_upsample_bilinear2d_aa.<locals>.<lambda>r   )
rv  r   rA   rP   rv   r  rw   r   r<   ra  )r   r  r  r~  r  rz  r*   r  r+   meta_upsample_bilinear2d_aa  s   
(

r  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nr   c                   S   rR   )Nz%found_inf must be a 1-element tensor.r*   r*   r*   r*   r+   rM     rT   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   rR   )Nz%inv_scale must be a 1-element tensor.r*   r*   r*   r*   r+   rM     rT   c                   S   rR   )Nz!found_inf must be a float tensor.r*   r*   r*   r*   r+   rM     rT   c                   S   rR   )Nz!inv_scale must be a float tensor.r*   r*   r*   r*   r+   rM     rT   )rA   rP   rv   rH   r   )ry   r6  Z	inv_scaler*   r*   r+   *_amp_foreach_non_finite_check_and_unscale_  s   r  c                 C   s   t |  }| |S r%   )r   r   rw   )ry   nanZposinfZneginfr   r*   r*   r+   
nan_to_num  s   
r  c                 C   s   | j tjtjtjtjhvsJ d| j  d| j}t||}t||}||kr)| S t| 	 }t| 
 }|| || ||< ||< || || ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)re   rA   r  Z
sparse_cscr  Z
sparse_bscr   r   r   r   r   r   )ry   Zdim0r9  ndimsr   r   r*   r*   r+   r    s&   

r  c                 C   sz   | j }| jr"|  }|  }|dkr|dks!J d| d| dn|  dks0J d| dt| d|dk r:dS dS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is rO  r   )r   r  r  r  ri   r  )ry   r  r  r  r*   r*   r+   t_  s   
r  )r  r   sidesorterc                C   s@   |rt jnt j}t|t jrt j||d S t jd|| jdS )Nrp   r*   r"  )	rA   r   r   rV   r   r   r   rl   rf   )Zsorted_sequencery   r  r   r  r  rH   r*   r*   r+   meta_searchsorted  s   r  r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   rR   )Nz,polygamma(n, x) does not support negative n.r*   r*   r*   r*   r+   rM     rT   z meta_polygamma.<locals>.<lambda>r  rp   )rA   rP   r   r   r  r   )r   ry   r?   r9   r*   r*   r+   meta_polygamma  s   
r  c                 C      t | t dd }|S )Nc                 S   r  r  r@   r   r  r   r*   r*   r+   _f  s   z)_create_unary_float_meta_func.<locals>._fr2   r   funcr  r*   r*   r+   _create_unary_float_meta_func     r  c                 C   r  )Nc                 S   s   t | |tjdS r  r  )r7   r  r*   r*   r+   r    s   z*_create_binary_float_meta_func.<locals>._fr  r  r*   r*   r+   _create_binary_float_meta_func  r  r  c                  C   s4  i } dD ]}t | }|D ]}|| vr|| | |< qq|  D ]y\}}t|tjjr*qt|ts1J |tjj	j
| tj| drR|t d v rQt| dq|jrVq| dv r]qd| v rjt|| qd| v rwt|| qd| v rt|| qd	| v rt|| qt|| qd S )
N)rd   Zpost_autogradZpre_autogradZCompositeImplicitAutogradrd   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   zaten::rot90zaten::constant_pad_ndzaten::as_strided_scatterzaten::clonezaten::copy_zaten::empty_stridedzaten::_to_copyzmkldnn::zmkl::zonednn::zquantized::)r   itemsrV   rA   Z_opsZHigherOrderOperatorr   Zpy_impl_CZDispatchKeyr#   Z%_dispatch_has_kernel_for_dispatch_keyr@  r   Zis_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)Zactivate_meta_tabler_   registryZopoZop_overloadr)   r*   r*   r+   activate_meta  sJ   	r  r   )NN)NNNFr  )TrE  )r   )rZ  T)FF)TT)r  )FTN)TFF)TF)r   )r@  N)r   rR  r%   )r*   r   r  F)r*   r   FTN)Fr   FNFr{   )NF)r{   F)NNNNN)r   NNr   )NNF)r  FFN)r  FN)FN)FNNN)NNNNNF)Nr{   FNN)NNNN)r{   TT(o  r\  enumr   	functoolsr   typingr   r   r   r   r   rA   Ztorch._prims_commonr`  r<   r	   r
   r   Ztorch._decompr   r   r   r   Z
torch._opsr   Ztorch._primsr   r   r   r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   r   r  r   r    Ztorch.utilsr!   r.   r  r"   ZlibraryLibraryr  r2   r@   rI   rQ   ZlinspaceZlogspacer  rn   Ztaker   r   rz   r   r   ZcummaxZcumminr   r   r   Z_fft_c2cr   Z_fft_r2cr   ZrandpermZgenerator_outr   rt   r   randintr   r   r   Zrandr   Z_fft_c2rr   r  r   r   Z
unsqueeze_r   Z_sparse_semi_structured_linearrR  r   Z_cslt_sparse_mmrH   rD  r   Zindex_reducer  r   Zindex_reduce_r   Zindex_selectr   Zsegment_reducer  r,  Z	unary_outr  ri   r  r+  r  r  r  r  r  Z_assert_asyncr   msgr#  Z_make_dep_tokenr&  r/  Z_functional_sym_constrain_ranger5  r8  Z(_functional_sym_constrain_range_for_sizer9  Z_functional_assert_asyncr:  r   rG  r   rP  rS  rW  r_  Z_linalg_eighr`  re  rh  ri  rm  ro  rr  ru  r  Zlinalg_inv_exr  Zlinalg_ldl_factor_exr  Zlinalg_ldl_solver  Z	linalg_lur  Zlinalg_lu_factor_exr  Zlinalg_lu_solver  Z	lu_unpackr  r  Z	linalg_qrr  r  r  Z_linalg_svdr  r  rn  r  r  Zlinalg_solve_triangularr  r  r  Z_linalg_detr  r  r  r  Zreflection_pad1dr  Zreplication_pad1dr  r   Zreflection_pad1d_backwardr  Zreplication_pad1d_backwardr  r  Zreflection_pad2dr  Zreplication_pad2dr  Zreflection_pad2d_backwardr  Zreplication_pad2d_backwardr  r!  Zreflection_pad3dr#  Zreplication_pad3dr$  Zreflection_pad3d_backwardZreplication_pad3d_backwardr&  Z_pdist_forwardrD   r(  Z_pdist_backwardr+  Zbaddbmmr<  Z	bernoullir?  Z
bernoulli_rB  r'  rC  Z_fused_moving_avg_obs_fq_helperrG  mmrM  r  r  r]  re  Zconvolutionrl  r  Z_has_mkldnnr  rm  Z_convolution_pointwiserq  Z_linear_pointwisert  Zhas_mklr  ru  Z_mkl_linearrv  r  rw  Zqconv2d_pointwiser  Zqlinear_pointwiser  r  r  Z
max_pool2dr  r  Z
avg_pool2dr  r  Zavg_pool2d_backwardr  Z
avg_pool3dr  Zavg_pool3d_backwardr  Z_adaptive_avg_pool2dr  Z_adaptive_avg_pool3dr  Z_adaptive_avg_pool2d_backwardr  Z_adaptive_avg_pool3d_backwardr  r  Zadaptive_max_pool2dr  r  r  Zadaptive_max_pool3dr  r  r  Zrepeat_interleaver  rW   r  r  rr   Z_unsafe_indexr  Zconvolution_backwardr  Zaddbmmr  r  Z_foreach_absZ_foreach_acosZ_foreach_asinZ_foreach_atanZ_foreach_ceilZ_foreach_cosZ_foreach_coshZ_foreach_erfZ_foreach_erfcZ_foreach_expZ_foreach_expm1Z_foreach_fracZ_foreach_floorZ_foreach_lgammaZ_foreach_logZ_foreach_log10Z_foreach_log1pZ_foreach_log2Z_foreach_negZ_foreach_reciprocalZ_foreach_roundZ_foreach_sigmoidZ_foreach_signZ_foreach_sinZ_foreach_sinhZ_foreach_sqrtZ_foreach_tanZ_foreach_tanhZ_foreach_truncZ_foreach_zeroZ_foreach_addZ_foreach_subZ_foreach_mulZ_foreach_divZ_foreach_clamp_minZ_foreach_clamp_maxZ_foreach_lerpr  Z_foreach_abs_Z_foreach_acos_Z_foreach_asin_Z_foreach_atan_Z_foreach_ceil_Z_foreach_cos_Z_foreach_cosh_Z_foreach_erf_Z_foreach_erfc_Z_foreach_exp_Z_foreach_expm1_Z_foreach_frac_Z_foreach_floor_Z_foreach_lgamma_Z_foreach_log_Z_foreach_log10_Z_foreach_log1p_Z_foreach_log2_Z_foreach_neg_Z_foreach_reciprocal_Z_foreach_round_Z_foreach_sigmoid_Z_foreach_sign_Z_foreach_sin_Z_foreach_sinh_Z_foreach_sqrt_Z_foreach_tan_Z_foreach_tanh_Z_foreach_trunc_Z_foreach_zero_Z_foreach_add_Z_foreach_sub_Z_foreach_mul_Z_foreach_div_Z_foreach_clamp_min_Z_foreach_clamp_max_Z_foreach_lerp_Z_foreach_copy_r  Z_foreach_powZScalarAndTensorr  r   Z_foreach_maximumZ_foreach_minimumr!  Z_foreach_maximum_Z_foreach_minimum_r"  Z_foreach_addcdivZScalarZ_foreach_addcmulr1  Z_foreach_addcdiv_Z_foreach_addcmul_r3  r4  Z_fused_adam_rD  Z_fused_adamrF  Z_int_mmrG  Z_convert_weight_to_int4packrL  Z_weight_int4pack_mmrM  Z_cdist_forwardrZ  Z_cdist_backwardr`  Z_embedding_bagrr  Z_embedding_bag_forward_onlyrs  ru  Znansumrv  ZmedianZ	nanmedianrw  Z
dim_valuesr  r   ry  Zlogical_not_rz  repeatr|  Zzero_r}  Zmul_Zdiv_Zlogical_and_Zlogical_or_Zlogical_xor_r  Zadd_Zsub_r  roundZdecimalsr  r  
__rshift__r  
__lshift__r  zeror  r  r  fillr  Zrelu_r  Z	index_putZ_unsafe_index_putr  Zmasked_fill_r  Zmasked_scatter_r  Zmasked_scatterr  Zmasked_scatter_backwardr  Z
index_put_r  aliasr  r  Zbmmr  r  r  r  r  r  r  r  r  Z max_pool2d_with_indices_backwardr  Zmax_pool2d_with_indicesr  Zmax_unpool2dr  r  Zmax_unpool3dr  Zmax_pool3d_with_indicesr  Z max_pool3d_with_indices_backwardr  r  r  r  Zgrid_sampler_2d_backwardr  r  r  r  r^  r  r  Zselect_scatterr  Zslice_scatterr  r   r  r  gatherr   r  r  r  r  r  Zscatter_addr  Zscatter_add_r  r  r   r  r   Zvalue_reducer  Zscatter_r  Z#_scaled_dot_product_flash_attentionr0  Z,_scaled_dot_product_flash_attention_backwardr=  Z'_scaled_dot_product_efficient_attentionrG  Z0_scaled_dot_product_efficient_attention_backwardrM  Z_flash_attention_forwardrN  Z_flash_attention_backwardrR  Z_efficient_attention_forwardrY  Z_efficient_attention_backwardr\  Z
_scaled_mmrf  Zscatter_reducetwoZtwo_outri  Zscatter_reduce_rj  Zmultinomialrl  ro  rv  r{  Z_upsample_nearest_exact1dr  Z_upsample_nearest_exact2dr  Z"_upsample_nearest_exact2d_backwardZSymIntr  Z_upsample_nearest_exact3dr   r  Zvalues_stabler  Zargsortr  r  Z_thnn_fused_lstm_cellr  r  r  r  r  r  Zargminr  r  Ztopkr  r   r  r  r  r  Zpixel_shuffler  r  Z	bucketizeZ
Tensor_outr  Z_upsample_bilinear2d_aar  r  r  r  r  Zsearchsortedr  Z	polygammar  r  r  Zspecial_airy_aiZspecial_bessel_y0Zspecial_bessel_y1Zspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Z!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_chebyshev_polynomial_tZspecial_chebyshev_polynomial_uZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_laguerre_polynomial_lZtorch._refs.nn.functionalZtorch._refs.specialr  r*   r*   r*   r+   <module>   s   $

	8	6








	

!



#
	

	









	

	



)


"

2
&
*
7
(
"
%


	
;

/
Z&5 ?'$,



e
	
'
"M,
H
TN


.

*(c 	
 !"#$%)*	
 !"#$%&*




$
#h	










!
T	
]>	
6G+
!7
/

ge( 

	,$/	








	U	
3'	
7	
5	
	
&	
$1



"
7'
"


"	

C