o
    <&i1                     @   sf  d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	 d dl
mZmZ ddgZG dd deZd	d
e de de d e_				ddee dee dee dee dee dededededededefddZdee dee dee dee dededededededefddZdee dee dee dee dededededededefddZdS )     N)Tensor   )	Optimizer_use_grad_for_differentiable_default_to_fused_or_foreach_differentiable_doc_foreach_doc_maximize_doc_view_as_real)ListOptionalRproprpropc                       sb   e Zd Z			ddddddee ded	ef fd
dZ fddZdd ZedddZ	  Z
S )r   {Gz?g      ?g333333?gư>2   NF)foreachmaximizedifferentiabler   r   r   c          	         s   d|kst d| d|d   k rd  k r|d k s-n t d|d  d|d  t||||||d}t || d S )	Ng        zInvalid learning rate: r   g      ?r   zInvalid eta values: z, )lretas
step_sizesr   r   r   )
ValueErrordictsuper__init__)	selfparamsr   r   r   r   r   r   defaults	__class__ <C:\wamp64\www\opt\env\Lib\site-packages\torch/optim/rprop.pyr      s   (zRprop.__init__c                    s@   t  | | jD ]}|dd  |dd |dd q	d S )Nr   r   Fr   )r   __setstate__param_groups
setdefault)r   stategroupr    r"   r#   r$   %   s   
zRprop.__setstate__c           
      C   s  d}|d D ]x}|j d u rq|t|O }|| |j }|jr$td|| | j| }	t|	dkrhd|	d< tj|tj	d|	d< |j
jrZ| |t|d |d |	d	< n| ||d |	d	< ||	d  ||	d	  |	d  d
7  < q|S )NFr   z'Rprop does not support sparse gradientsr   stepZmemory_formatprevr   	step_sizer   )gradtorch
is_complexappendZ	is_sparseRuntimeErrorr'   lenZ
zeros_likepreserve_formatZdtypenewZ
resize_as_Zfill_complex)
r   r(   r   gradsprevsr   has_complexpr-   r'   r"   r"   r#   _init_group,   s6   




zRprop._init_groupc                 C   s   d}|durt   | }W d   n1 sw   Y  | jD ]9}g }g }g }g }|d \}}	|d \}
}|d }|d }| |||||}t|||||
|||	|||d |d q |S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   )step_size_minstep_size_maxetaminusetaplusr   r   r   r8   )r.   Zenable_gradr%   r:   r   )r   closureZlossr(   r   r6   r7   r   r=   r>   r;   r<   r   r   r8   r"   r"   r#   r)   S   s<   

z
Rprop.step)r   r   r   )N)__name__
__module____qualname__r   boolr   r$   r:   r   r)   __classcell__r"   r"   r    r#   r   
   s&    	'a
  Implements the resilient backpropagation algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \theta_0 \in \mathbf{R}^d \text{ (params)},f(\theta)
                \text{ (objective)},                                                             \\
            &\hspace{13mm}      \eta_{+/-} \text{ (etaplus, etaminus)}, \Gamma_{max/min}
                \text{ (step sizes)}                                                             \\
            &\textbf{initialize} :   g^0_{prev} \leftarrow 0,
                \: \eta_0 \leftarrow \text{lr (learning rate)}                                   \\
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \textbf{for} \text{  } i = 0, 1, \ldots, d-1 \: \mathbf{do}            \\
            &\hspace{10mm}  \textbf{if} \:   g^i_{prev} g^i_t  > 0                               \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{min}(\eta^i_{t-1} \eta_{+},
                \Gamma_{max})                                                                    \\
            &\hspace{10mm}  \textbf{else if}  \:  g^i_{prev} g^i_t < 0                           \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{max}(\eta^i_{t-1} \eta_{-},
                \Gamma_{min})                                                                    \\
            &\hspace{15mm}  g^i_t \leftarrow 0                                                   \\
            &\hspace{10mm}  \textbf{else}  \:                                                    \\
            &\hspace{15mm}  \eta^i_t \leftarrow \eta^i_{t-1}                                     \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1}- \eta_t \mathrm{sign}(g_t)             \\
            &\hspace{5mm}g_{prev} \leftarrow  g_t                                                \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to the paper
    `A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.1417>`_.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        etas (Tuple[float, float], optional): pair of (etaminus, etaplus), that
            are multiplicative increase and decrease factors
            (default: (0.5, 1.2))
        step_sizes (Tuple[float, float], optional): a pair of minimal and
            maximal allowed step sizes (default: (1e-6, 50))
        z	
        z

    Fr   r6   r7   r   r   r   r   r8   r;   r<   r=   r>   c                C   sj   |du rt | |dd\}}|rtj rtd|r"tj s"t}nt}|| |||||	|
||||d dS )zpFunctional API that performs rprop algorithm computation.

    See :class:`~torch.optim.Rprop` for details.
    NF)Z	use_fusedz6torch.jit.script not supported with foreach optimizers)r;   r<   r=   r>   r   r   r8   )r   r.   ZjitZis_scriptingr1   _multi_tensor_rprop_single_tensor_rprop)r   r6   r7   r   r   r   r   r8   r;   r<   r=   r>   _funcr"   r"   r#   r      s(   
c                C   s
  t | D ]~\}}|| }|s|n| }|| }|| }t|r4t|}t|}t|}t|}|	r@||  }n|| }|||d< |||d< d||	d< |
||| |jtjd}d||	|< |j| |dd || qd S )Nr   r   r*   value)	enumerater.   r/   Zview_as_realmulclonesigngtlteqZmul_clamp_r3   Zaddcmul_Zcopy_)r   r6   r7   r   r;   r<   r=   r>   r   r   r8   iparamr-   r+   r,   rO   r"   r"   r#   rF      s,   




rF   c                C   sH  t | dkrd S |	rJ dt| |||g}| D ]\\}}}}}|
r,t|||| t||}|r9t| t|| |rFt| |}t	| |D ]}|||
d< |||d< d||d< qOt|| |D ]}||| qot|}tt |D ]}d|| || |< q~dd |D }tj|||dd qd S )Nr   z#_foreach ops don't support autogradr   c                 S   s   g | ]}|  qS r"   )rO   ).0r-   r"   r"   r#   
<listcomp>J  s    z'_multi_tensor_rprop.<locals>.<listcomp>rI   rJ   )r2   r   Z"_group_tensors_by_device_and_dtypevaluesr
   r.   Z_foreach_mulZ_foreach_neg_Z_foreach_copy_Z_foreach_sign_rP   rQ   rR   Z_foreach_mul_rS   listrangeZ_foreach_addcmul_)r   r6   r7   r   r;   r<   r=   r>   r   r   r8   Zgrouped_tensorsZgrouped_paramsZgrouped_gradsZgrouped_prevsZgrouped_step_sizesrG   ZsignsrO   r,   rT   Z
grad_signsr"   r"   r#   rE     s:   


rE   )NFFF)r.   r   Z	optimizerr   r   r   r   r   r	   r
   typingr   r   __all__r   __doc__rC   floatr   rF   rE   r"   r"   r"   r#   <module>   s    $t"
9	

0	

/	
