o
    H&i,'                     @   sR  d dl mZmZ d dlZd dlm  mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZ dZejedZ ejeddZ!ejeddZ"ejeddZ#dd Z$d%ddZ%dej&fddZ'dej&fddZ(dd Z)dd Z*dd  Z+d!d" Z,e+ Z-e, Z.d#d$ Z/dS )&    )OptionalTupleN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)backwards_not_supported)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtyperngprimsZDEFZIMPLZCompositeExplicitAutogradAutogradZMetac                 C   s"   t d| j d| j d| j d)Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)device r   AC:\wamp64\www\opt\env\Lib\site-packages\torch/_prims/rng_prims.pythrow_on_non_cuda   s
   r   c           	      C   s   t | t| | t| | ttjjj	| }|j
}|r!||_t| t| ||fD ]}||_tjjj|_||_||_||_q-d S N)rngprimZdefinerngprim_implimplrngprim_meta_implgetattrtorchZ_opsopsr   defaultZ_tagsrngprim_autograd_implr
   __doc__Z_prims_commonZRETURN_TYPEZNEWreturn_typeschema	impl_atenZprim_meta_impl)	namer%   r&   	impl_metadoctagsZprim_packetZprimpr   r   r   register_rng_prim&   s   
r,   shapec                 C   s   t tjdtjdS )Nr   dtype)r   Z
TensorLiker   Ztensorint64)r-   r   r   r   philox_rand_offset_meta=   s   r1   c                 C   s   d}| D ]}||9 }qt j|t jd}d}d}d}t jt j }|j| }|| d | }	t|	|j| }	|d ||	 |  d | }
|
S )N   r.         )	r   Zscalar_tensorr0   cudaZget_device_propertiesZcurrent_deviceZmax_threads_per_multi_processorminZmulti_processor_count)r-   Znumel_scalarZdim_sizeZnumel
block_sizeZunrollZcurand4_engine_callsZdevice_propertyZblocks_per_smZ	grid_sizeoffsetr   r   r   philox_rand_offsetC   s   

r9   c                  C   s   d} d}dt jdt jdt jdtttdf  dtd	tfd
d}dt jdt jdt jdtttdf  dtd	tfdd}t| |||dt j	j
fd d S )NZphilox_randzphilox_rand(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r-   seedr8   stride.r   r/   c                 S   s6   |d u sJ t | }tj| |||d}t| }||fS )N)r-   stridesr/   r   )r	   r   Z
TensorMetar1   )r-   r:   r8   r;   r   r/   random_valuesr   r   r   _philox_rand_meta_   s   	z/register_philox_rand.<locals>._philox_rand_metac                 S   s   |d u sJ |j dkrg }n|g}|j dkrt|tj| t|| tj| ||d}W d    n1 s9w   Y  |t| fS )Ncpur5   )r   r/   )	r   r   r   randomZfork_rngr   Zset_torch_state_tensorZrandr9   )r-   r:   r8   r;   r   r/   devicesr=   r   r   r   _philox_randp   s   	

z*register_philox_rand.<locals>._philox_randz$Philox based stateless rand operator)r'   r%   r&   r(   r)   r*   )r   SizeTensorr   r   intr   r   r,   TagZnondeterministic_seeded)r'   r%   r>   rB   r   r   r   register_philox_rand[   sH   


rG   c                 C   sl   | dr| d}t|trt|}|jS dd | D }tdd |D r)dS tdd |D r4dS d S )	Nr   c                 S   s    h | ]}t |tjr|jjqS r   )
isinstancer   rD   r   r   ).0argr   r   r   	<setcomp>   s     zget_device.<locals>.<setcomp>c                 s       | ]}|d kV  qdS )r5   Nr   rI   devr   r   r   	<genexpr>       zget_device.<locals>.<genexpr>r5   c                 s   rL   )r?   Nr   rM   r   r   r   rO      rP   r?   )getrH   strr   r   r   any)argskwargsr   rA   r   r   r   
get_device   s   



rV   c                     s   t dtjtdd tjdd tjdd tjfdd	 t fd
d} t	 fdd}S )Nrun_and_save_rng_stateTZdeferred_errorc                 _   s   t j | |i |fS r   )r   r5   get_rng_stateoprT   rU   r   r   r   	impl_cuda   s   z5register_run_and_save_rng_state_op.<locals>.impl_cudac                 _   s   t  | |i |fS r   )r   rY   rZ   r   r   r   impl_cpu   s   z4register_run_and_save_rng_state_op.<locals>.impl_cpuc                    sH    d}t ||}||v sJ d| || }|| g|R i |S N)r5   r?   zBackend not supported for rV   )r[   rT   rU   impl_mapr   r   r]   r\   r   r   impl_backend_select   s
   

z?register_run_and_save_rng_state_op.<locals>.impl_backend_selectc                    s>   |   |g|R i |W  d    S 1 sw   Y  d S r   r   )moder[   rT   rU   )rb   r   r   impl_fake_tensor_mode   s   $zAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_modec                    s~   | j r4 |g|R i |}t| jj|g|R }t| jj|}| jd||}t||d | jdS |g|R i |S Ncall_function)Zconstanttracer)enable_tracingpytreetree_maprg   unwrap_proxycreate_proxyr   )rc   r[   rT   rU   out
proxy_argsproxy_kwargs	out_proxy)rb   rW   r   r   impl_proxy_dispatch_mode   s   zDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode)
r   py_implr   r   r   CUDACPUBackendSelectr   r   )rd   rq   r   )rb   r]   r\   rW   r   "register_run_and_save_rng_state_op   s   






rv   c                     s   t dtjtdd tjdd tjdd  tfdd	} tj fd
d}t	dd }S )Nrun_with_rng_stateTrX   c                 _   s8   t j }t j|   ||i |}t j| |S r   )r   r5   rY   set_rng_stater?   	rng_stater[   rT   rU   Zcurrent_staterm   r   r   r   r\      s
   
z1register_run_with_rng_state_op.<locals>.impl_cudac                 _   s.   t  }t |  ||i |}t | |S r   )r   rY   rx   ry   r   r   r   r]      s
   

z0register_run_with_rng_state_op.<locals>.impl_cpuc           	         s   | j rIt   ||g|R i |}W d    n1 sw   Y  t| jj||g|R }t| jj|}| jd ||}t||d | jdS  ||g|R i |S re   )rh   r   ri   rj   rg   rk   rl   r   )	rc   rz   r[   rT   rU   rm   rn   ro   rp   )rw   r   r   rq      s   z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_modec                    sJ    d}t ||}||v sJ d| || }|| |g|R i |S r^   r_   )rz   r[   rT   rU   r`   r   r   ra   r   r   rb      s
   

z;register_run_with_rng_state_op.<locals>.impl_backend_selectc                 _   s6   |  ||i |W  d    S 1 sw   Y  d S r   r   )rc   rz   r[   rT   rU   r   r   r   rd      s   $z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode)
r   rr   r   r   r   rs   rt   r   ru   r   )rq   rb   rd   r   )r]   r\   rw   r   register_run_with_rng_state_op   s   







r{   c                   C   s
   t   d S r   )rG   r   r   r   r   register_rng_prims  s   
r|   r   )0typingr   r   r   Ztorch.utils._pytreeutilsZ_pytreeri   r   Ztorch._Cr   Ztorch._higher_order_ops.utilsr   Z
torch._opsr   Ztorch._prims_commonr   r	   Ztorch._prims_common.wrappersr
   Ztorch._subclasses.fake_tensorr   Z"torch.fx.experimental.proxy_tensorr   r   r   Ztorch.typesr   r   Zrngprim_namespaceZlibraryLibraryr   r   r"   r   r   r,   rC   r1   r9   rG   rV   rv   r{   rW   rw   r|   r   r   r   r   <module>   sD    


7-9