o
    <Æ&i—-  ã                   @   sÎ   d dl Z d dlZd dlmZ ddlmZ ddlmZ ddl	m
Z ddl	mZ d d	l mZmZ d d
lmZmZmZ g d¢ZG dd„ deƒZG dd„ deƒZeeee ef ZG dd„ deƒZG dd„ deƒZdS )é    N)Ú	Parameteré   )ÚModule)ÚCrossMapLRN2dé   )Ú
functional)Úinit)ÚTensorÚSize)ÚUnionÚListÚTuple)ÚLocalResponseNormr   Ú	LayerNormÚ	GroupNormc                       s|   e Zd ZU dZg d¢Zeed< eed< eed< eed< ddedededed
df
‡ fdd„Zde	d
e	fdd„Z
dd„ Z‡  ZS )r   a‹  Applies local response normalization over an input signal.

    The input signal is composed of several input planes, where channels occupy the second dimension.
    Applies normalization across channels.

    .. math::
        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

    Args:
        size: amount of neighbouring channels used for normalization
        alpha: multiplicative factor. Default: 0.0001
        beta: exponent. Default: 0.75
        k: additive factor. Default: 1

    Shape:
        - Input: :math:`(N, C, *)`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> lrn = nn.LocalResponseNorm(2)
        >>> signal_2d = torch.randn(32, 5, 24, 24)
        >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
        >>> output_2d = lrn(signal_2d)
        >>> output_4d = lrn(signal_4d)

    )ÚsizeÚalphaÚbetaÚkr   r   r   r   ç-Cëâ6?ç      è?ç      ð?ÚreturnNc                    ó&   t ƒ  ¡  || _|| _|| _|| _d S ©N©ÚsuperÚ__init__r   r   r   r   ©Úselfr   r   r   r   ©Ú	__class__© úIC:\wamp64\www\opt\env\Lib\site-packages\torch/nn/modules/normalization.pyr   2   ó
   

zLocalResponseNorm.__init__Úinputc                 C   ó   t  || j| j| j| j¡S r   )ÚFZlocal_response_normr   r   r   r   ©r   r%   r"   r"   r#   Úforward9   ó   ÿzLocalResponseNorm.forwardc                 C   ó   dj di | j¤ŽS ©Nz){size}, alpha={alpha}, beta={beta}, k={k}r"   ©ÚformatÚ__dict__©r   r"   r"   r#   Ú
extra_repr=   ó   zLocalResponseNorm.extra_repr)r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú__constants__ÚintÚ__annotations__Úfloatr   r	   r)   r1   Ú__classcell__r"   r"   r    r#   r      s   
 $r   c                       sv   e Zd ZU eed< eed< eed< eed< ddededededd	f
‡ fd
d„Zdedefdd„Zde	fdd„Z
‡  ZS )r   r   r   r   r   r   r   r   r   Nc                    r   r   r   r   r    r"   r#   r   G   r$   zCrossMapLRN2d.__init__r%   c                 C   r&   r   )Ú_cross_map_lrn2dÚapplyr   r   r   r   r(   r"   r"   r#   r)   N   r*   zCrossMapLRN2d.forwardc                 C   r+   r,   r-   r0   r"   r"   r#   r1   R   r2   zCrossMapLRN2d.extra_repr)r   r   r   )r3   r4   r5   r8   r9   r:   r   r	   r)   Ústrr1   r;   r"   r"   r    r#   r   A   s   
 $r   c                       s   e Zd ZU dZg d¢Zeedf ed< eed< e	ed< 			dde
dede	d
e	dd	f
‡ fdd„Zddd„Zdedefdd„Zdefdd„Z‡  ZS )r   aÆ  Applies Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
    the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`elementwise_affine` is ``True``). Default: ``True``.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 1.
        bias:   the learnable bias of the module of shape
                :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
                The values are initialized to 0.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> # NLP Example
        >>> batch, sentence_length, embedding_dim = 20, 5, 10
        >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
        >>> layer_norm = nn.LayerNorm(embedding_dim)
        >>> # Activate module
        >>> layer_norm(embedding)
        >>>
        >>> # Image Example
        >>> N, C, H, W = 20, 5, 10, 10
        >>> input = torch.randn(N, C, H, W)
        >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
        >>> # as shown in the image below
        >>> layer_norm = nn.LayerNorm([C, H, W])
        >>> output = layer_norm(input)

    .. image:: ../_static/img/nn/layer_norm.jpg
        :scale: 50 %

    )Únormalized_shapeÚepsÚelementwise_affine.r?   r@   rA   çñhãˆµøä>TNÚbiasr   c                    s®   ||dœ}t ƒ  ¡  t|tjƒr|f}t|ƒ| _|| _|| _| jrEt	t
j| jfi |¤Žƒ| _|r>t	t
j| jfi |¤Žƒ| _n|  dd ¡ n|  dd ¡ |  dd ¡ |  ¡  d S )N©ÚdeviceÚdtyperC   Úweight)r   r   Ú
isinstanceÚnumbersÚIntegralÚtupler?   r@   rA   r   ÚtorchÚemptyrG   rC   Úregister_parameterÚreset_parameters)r   r?   r@   rA   rC   rE   rF   Úfactory_kwargsr    r"   r#   r   ¬   s   


zLayerNorm.__init__c                 C   s4   | j rt | j¡ | jd urt | j¡ d S d S d S r   )rA   r   Úones_rG   rC   Úzeros_r0   r"   r"   r#   rO   Â   s   
ýzLayerNorm.reset_parametersr%   c                 C   r&   r   )r'   Z
layer_normr?   rG   rC   r@   r(   r"   r"   r#   r)   È   ó   ÿzLayerNorm.forwardc                 C   r+   )NzF{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}r"   r-   r0   r"   r"   r#   r1   Ì   ó
   ÿÿzLayerNorm.extra_repr)rB   TTNN©r   N)r3   r4   r5   r6   r7   r   r8   r9   r:   ÚboolÚ_shape_tr   rO   r	   r)   r>   r1   r;   r"   r"   r    r#   r   Y   s    
 Mÿÿÿ
r   c                       s   e Zd ZU dZg d¢Zeed< eed< eed< eed< 			ddedededed
d	f
‡ fdd„Z	ddd„Z
ded
efdd„Zd
efdd„Z‡  ZS )r   aÉ  Applies Group Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The input channels are separated into :attr:`num_groups` groups, each containing
    ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
    :attr:`num_groups`. The mean and standard-deviation are calculated
    separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
    per-channel affine transform parameter vectors of size :attr:`num_channels` if
    :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        num_groups (int): number of groups to separate the channels into
        num_channels (int): number of channels expected in input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        affine: a boolean value that when set to ``True``, this module
            has learnable per-channel affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 6, 10, 10)
        >>> # Separate 6 channels into 3 groups
        >>> m = nn.GroupNorm(3, 6)
        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
        >>> m = nn.GroupNorm(6, 6)
        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
        >>> m = nn.GroupNorm(1, 6)
        >>> # Activating the module
        >>> output = m(input)
    )Ú
num_groupsÚnum_channelsr@   ÚaffinerX   rY   r@   rZ   rB   TNr   c                    sœ   ||dœ}t ƒ  ¡  || dkrtdƒ‚|| _|| _|| _|| _| jr<ttj	|fi |¤Žƒ| _
ttj	|fi |¤Žƒ| _n|  dd ¡ |  dd ¡ |  ¡  d S )NrD   r   z,num_channels must be divisible by num_groupsrG   rC   )r   r   Ú
ValueErrorrX   rY   r@   rZ   r   rL   rM   rG   rC   rN   rO   )r   rX   rY   r@   rZ   rE   rF   rP   r    r"   r#   r     s   

zGroupNorm.__init__c                 C   s&   | j rt | j¡ t | j¡ d S d S r   )rZ   r   rQ   rG   rR   rC   r0   r"   r"   r#   rO     s   þzGroupNorm.reset_parametersr%   c                 C   r&   r   )r'   Z
group_normrX   rG   rC   r@   r(   r"   r"   r#   r)     rS   zGroupNorm.forwardc                 C   r+   )Nz8{num_groups}, {num_channels}, eps={eps}, affine={affine}r"   r-   r0   r"   r"   r#   r1   "  rT   zGroupNorm.extra_repr)rB   TNNrU   )r3   r4   r5   r6   r7   r8   r9   r:   rV   r   rO   r	   r)   r>   r1   r;   r"   r"   r    r#   r   Ñ   s   
 -ÿÿ
r   )rL   rI   Ztorch.nn.parameterr   Úmoduler   Z
_functionsr   r<   Ú r   r'   r   r	   r
   Útypingr   r   r   Ú__all__r   r8   rW   r   r   r"   r"   r"   r#   Ú<module>   s    3x