o
    b&ii                     @   s   d dl mZ d dlT d dlmZ d dlmZmZ d dlZ	d dl
Z
d dlZd dlZdZddeeee	jf dee fd	d
Zdd Zdd ZdddZdS )    )binary_dilation)*)Path)OptionalUnionNi  fpath_or_wav	source_src                 C   sb   t | ts
t | trtjt| dd\}}n| }|dur$tj||td}t|tdd}t	|}|S )a  
    Applies preprocessing operations to a waveform either on disk or in memory such that  
    The waveform will be resampled to match the data hyperparameters.

    :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not 
    just .wav), either the waveform as a numpy array of floats.
    :param source_sr: if passing an audio waveform, the sampling rate of the waveform before 
    preprocessing. After preprocessing, the waveform'speaker sampling rate will match the data 
    hyperparameters. If passing a filepath, the sampling rate will be automatically detected and 
    this argument will be ignored.
    N)sr)Zorig_srZ	target_srT)increase_only)

isinstancestrr   librosaloadZresamplesampling_ratenormalize_volumeZaudio_norm_target_dBFStrim_long_silences)r   r   wav r   <C:\wamp64\www\opt\env\Lib\site-packages\resemblyzer/audio.pypreprocess_wav   s   r   c                 C   s<   t jj| tttt d ttt d td}|t	j
jS )z
    Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform.
    Note: this not a log-mel spectrogram.
      )yr	   Zn_fftZ
hop_lengthZn_mels)r   featureZmelspectrogramr   intZmel_window_lengthZmel_window_stepZmel_n_channelsastypenpZfloat32T)r   framesr   r   r   wav_to_mel_spectrogram*   s   r   c           	      C   s   t t d }| dt| t| |   } tjdt|  gt| t tj	R  }g }t
jdd}tdt| |D ]}|| }||j||d |d  td q:t|}d	d
 }||t}t|t}t|ttd }t||}| |dk S )a+  
    Ensures that segments without voice in the waveform remain no longer than a 
    threshold determined by the VAD parameters in params.py.

    :param wav: the raw waveform as a numpy array of floats 
    :return: the same waveform with silences trimmed away (length <= original wav length)
    r   Nz%dh   )moder      )Zsample_ratec                 S   sl   t t |d d | t |d f}t j|td}||d  |d |   ||d < ||d d  | S )N   r!   )Zdtype)r   ZconcatenateZzerosZcumsumfloat)arraywidthZarray_paddedretr   r   r   moving_averageT   s   ("z*trim_long_silences.<locals>.moving_averager"   T)Zvad_window_lengthr   lenstructpackr   round	int16_maxr   Zint16	webrtcvadZVadrangeappendZ	is_speechr$   Zvad_moving_average_widthboolr   ZonesZvad_max_silence_lengthrepeat)	r   Zsamples_per_windowZpcm_waveZvoice_flagsZvadZwindow_startZ
window_endr'   Z
audio_maskr   r   r   r   9   s"   	,


r   Fc                 C   sn   |r|rt dtt| t d }dt|t  }|| }|dk r'|s-|dkr/|r/| S | d|d   S )Nz,Both increase only and decrease only are setr!      r   
   )
ValueErrorr   sqrtmeanr,   log10)r   Ztarget_dBFSr
   Zdecrease_onlyZrmsZ	wave_dBFSZdBFS_changer   r   r   r   d   s   r   )N)FF)Zscipy.ndimage.morphologyr   Zresemblyzer.hparamspathlibr   typingr   r   numpyr   r-   r   r)   r,   r   Zndarrayr   r   r   r   r   r   r   r   r   <module>   s    $+