o
    IÆ&iK^  ã                   @   sˆ  U d dl Z d dlZd dlmZmZmZ d dlZdZdZdZ	dZ
e j d¡dkZdZdZdZe j dd¡dkZe j d¡dkZdZdZdZe j d	d
¡dkZe j dd¡ZdZdZdZdZdZdZdZdZdZ dZ!dZ"i i i i i i dœZ#ee$ee$ef f e%d< i Z&ee$ee$ef f e%d< dZ'e j dd¡dkZ(dZ)dZ*dZ+dZ,g d¢Z-dZ.dZ/dZ0e j d¡dkZ1e j d¡dkZ2e j d¡dkZ3e j dd¡ 4¡ Z5dZ6e j d¡dkZ7e j d¡dkZ8e j d¡dkZ9e j d¡dkZ:e j d¡dkZ;e j d ¡dkZ<e=e j d!d¡ƒZ>e j d"d¡dkZ?e j d#d
¡dkZ@e j d$d¡dkZAe j d%¡dkZBd&ZCd'ZDd(ZEdZFdZGdZHe j d)¡dkZIe j d*¡dkZJe j d+d,¡ZKd-ZLd&ZMd(ZNdZOdZPdZQe j d.d
¡dkZRdZSdZTdZUd/d0„ ZVdZWdZXd1ejYv pyd2ejYv ZZeVƒ peZZ[d3Z\d4d5„ Z]e]ƒ Z^eVƒ rºd d6l_m`Z` zear§e` be jc dea ed7e jf¡d8¡¡Zgne` bd8¡ZgW n ehy¹   dZgY nw dZgd9Zie j d:d¡dkZje j d;d
¡dkZkdZldZmdZndZoe j d<d,¡Zpepd,kZqepdkrêd,nepZre j d=d¡ZsdZte j d>d
¡dkZueve%d?< dZweve%d@< G dAdB„ dBƒZxG dCdD„ dDƒZyG dEdF„ dFƒZzG dGdH„ dHƒZ{G dIdJ„ dJƒZ|dKhZ}er5d dLl~T d dMlm€Z€ e€eje‚ ƒ dS )Né    N)ÚAnyÚDictÚTYPE_CHECKINGFTZTORCHINDUCTOR_FX_GRAPH_CACHEÚ1ZTORCHINDUCTOR_SIZE_ASSERTSZTORCHINDUCTOR_NAN_ASSERTSZTORCHINDUCTOR_MEMORY_PLANNINGÚ0ZTORCHINDUCTOR_MEMORY_POOLZintermediates)Zbatch_linearZbatch_linear_lhsZbatch_layernormZ
batch_tanhZ
batch_reluZbatch_sigmoidÚpre_grad_fusion_optionsÚpost_grad_fusion_optionsZ"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCK)Zreorder_compute_for_overlapZ
sink_waitsZraise_commsÚdefaulti,  é   ZTORCHINDUCTOR_MAX_AUTOTUNEZ$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISEZTORCHINDUCTOR_MAX_AUTOTUNE_GEMMZ(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITONi    Z#TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHEZTORCHINDUCTOR_SAVE_ARGSZ!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCZ#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICEZ'TORCHINDUCTOR_COORDINATE_DESCENT_TUNINGZ5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONSZ'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSZ!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONZTORCHINDUCTOR_FORCE_LAYOUT_OPTZ TORCHINDUCTOR_KEEP_OUTPUT_STRIDEZTORCHINDUCTOR_WARN_MIX_LAYOUTé   iÐ  é   ZTORCHINDUCTOR_DEBUG_FUSIONZTORCHINDUCTOR_BENCHMARK_FUSIONZ#TORCHINDUCTOR_ENABLED_METRIC_TABLESÚ é@   ZTORCHINDUCTOR_BENCHMARK_KERNELc                   C   s   t tjdƒ S )NZgit_version)ÚhasattrÚtorchÚversion© r   r   úAC:\wamp64\www\opt\env\Lib\site-packages\torch/_inductor/config.pyÚ	is_fbcode  s   r   ÚdevÚgitÚforkc                  C   s^   dt jv rtt jd ƒS tjdkstƒ rdS tt dƒr"tt  d¡ƒnt  	¡ } | s*J ‚t
d| ƒS )a8  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform or it's a fbcode build
    3. decide by the number of CPU cores
    ZTORCHINDUCTOR_COMPILE_THREADSÚwin32é   Úsched_getaffinityr   é    )ÚosÚenvironÚintÚsysÚplatformr   r   Úlenr   Ú	cpu_countÚmin)r"   r   r   r   Údecide_compile_threads$  s   
ÿý
r$   )ÚparutilÚ.zfb/cacheé
   ZTORCHINDUCTOR_SHAPE_PADDINGZTORCHINDUCTOR_PERMUTE_FUSIONZTORCHINDUCTOR_PROFILEZTORCHINDUCTOR_PROFILE_OUTPUTZTORCHINDUCTOR_FREEZINGÚfreezingÚfreezing_discard_parametersc                   @   sV   e Zd ZdZdZdZdZdZdej	 
dd¡fZdZdZdZdZdZdZd	ZdZdZdS )
ÚcppéÿÿÿÿTFNi   ZCXXzg++Úoriginal_atené   )Ú__name__Ú
__module__Ú__qualname__ÚthreadsZno_redundant_loopsZdynamic_threadsZsimdlenZmin_chunk_sizer   r   ÚgetZcxxZenable_kernel_profileZweight_prepackÚinject_relu_bug_TESTING_ONLYZinject_log1p_bug_TESTING_ONLYZ
vec_isa_okÚdescriptive_namesZmax_horizontal_fusion_sizeZfallback_scatter_reduce_sumZenable_unsafe_math_opt_flagr   r   r   r   r*   {  s$    ú
r*   c                   @   s   e Zd ZU dZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZej d¡dkZdZej dd¡dkZdZdd	d	d
œZdZdZeed< dZdS )ÚtritonFTé   Z!TORCHINDUCTOR_UNIQUE_KERNEL_NAMESr   r,   Z#TORCHINDUCTOR_PERSISTENT_REDUCTIONSi   i   )ÚXÚYÚZr-   Úspill_thresholdN)r.   r/   r0   Z
cudagraphsZcudagraph_treesZslow_path_cudagraph_assertsZ!cudagraph_trees_history_recordingZfast_path_cudagraph_assertsZskip_cudagraph_warmupZdebug_sync_graphZdebug_sync_kernelZdense_indexingZ	max_tilesZautotune_pointwiseZautotune_cublasLtZ tiling_prevents_pointwise_fusionZ tiling_prevents_reduction_fusionr   r   r2   Zunique_kernel_namesr4   Zpersistent_reductionsZdivisible_by_16Z	max_blockZstore_cubinr:   r   Ú__annotations__r3   r   r   r   r   r5   ³  s0   
 ÿr5   c                   @   s0   e Zd ZdZej dd¡dkZeƒ Z	dZ
dZdS )Úaot_inductorr   ZAOT_INDUCTOR_DEBUG_COMPILEr   r   N)r.   r/   r0   Zoutput_pathr   r   r2   Zdebug_compiler   Zabi_compatibleZserialized_in_specZserialized_out_specr   r   r   r   r<     s    r<   c                
   @   sh   e Zd ZU dZdZdZdZdZdZdZ	e
j de
j e
j e
j ej¡d¡¡¡ZdZdZdZeed< dS )ÚcudaNz-O1FZTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Úcutlass_only_evt_capable_ops)r.   r/   r0   Úarchr   Zcompile_opt_levelZenable_cuda_ltoZenable_ptxas_infoZenable_debug_infoZuse_fast_mathr   r   r2   ÚpathÚabspathÚjoinÚdirnamer   Ú__file__Zcutlass_dirZcutlass_max_profiling_configsZcuda_cxxr>   Úboolr;   r   r   r   r   r=   !  s"   
 ÿþ
r=   c                   @   sx   e Zd Zej dd¡dkZdZdZdZ	dZ
dZdZdZdZej dd¡dkZej dd¡dkZej d	d¡ZdZdZdS )
ÚtraceZTORCH_COMPILE_DEBUGr   r   NFTZINDUCTOR_POST_FUSION_SVGZINDUCTOR_ORIG_FX_SVGZINDUCTOR_DOT_GRAPH_SHAPE_SVG)r.   r/   r0   r   r   r2   ÚenabledZ	debug_dirÚ	debug_logZinfo_logZfx_graphZfx_graph_transformedZir_pre_fusionZir_post_fusionZoutput_codeZgraph_diagramZdraw_orig_fx_graphZdot_graph_shapeZcompile_profileZ
upload_tarr   r   r   r   rF   X  s    
rF   ztrace.upload_tar)Ú*)Úinstall_config_module)ƒr   r   Útypingr   r   r   r   ÚdebugZdebug_check_inf_and_nanZdisable_progressZverbose_progressr   r2   Zfx_graph_cacheZcpp_wrapperZdceZstatic_weight_shapesZsize_assertsZnan_assertsZpick_loop_ordersZinplace_buffersZallow_buffer_reuseZmemory_planningZmemory_poolZbenchmark_harnessZepilogue_fusionZepilogue_fusion_firstZpattern_matcherZpost_grad_custom_pre_passZpost_grad_custom_post_passZpre_grad_custom_passZsplit_cat_fx_passesZ efficient_conv_bn_eval_fx_passesZgroup_fusionZbatch_fusionr   Ústrr;   r   Zreorder_for_localityZdynamic_scale_rblockZforce_fuse_int_mm_with_mulZuse_mixed_mmZforce_mixed_mmZ reorder_for_compute_comm_overlapZ'reorder_for_compute_comm_overlap_passesZestimate_op_runtimeZintra_node_bwZinter_node_bwZmax_autotuneZmax_autotune_pointwiseZmax_autotune_gemmÚupperZmax_autotune_gemm_backendsZunbacked_symint_fallbackZsearch_autotune_cacheZ	save_argsZautotune_in_subprocZautotune_multi_deviceZcoordinate_descent_tuningZ'coordinate_descent_check_all_directionsr   Z coordinate_descent_search_radiusZlayout_optimizationZforce_layout_optimizationZkeep_output_strideZwarn_mix_layoutZrealize_reads_thresholdZrealize_bytes_thresholdZrealize_acc_reads_thresholdZfallback_randomZimplicit_fallbacksZaggressive_fusionZdebug_fusionZbenchmark_fusionZenabled_metric_tablesZmax_fusion_sizeZmax_pointwise_cat_inputsZunroll_reductions_thresholdZcomment_originZconv_1x1_as_mmZsplit_reductionsZbenchmark_kernelZconstant_and_index_propagationZalways_keep_tensor_constantsZassert_indirect_indexingr   Zjoint_graph_constant_foldingZdebug_index_assertsÚ__version__Zis_nightly_or_sourceZdeveloper_warningsZworker_start_methodr$   Zcompile_threadsZlibfb.pyr%   Ú__package__Zget_dir_pathr@   rB   ÚreplaceÚsepZglobal_cache_dirÚ
ValueErrorZkernel_name_max_opsZshape_paddingZpermute_fusionZprofiler_mark_wrapper_callZgenerate_intermediate_hooksZdebug_ir_tracebackZ_raise_error_for_testingZ_profile_varZprofile_bandwidthZprofile_bandwidth_regexZprofile_bandwidth_outputZdisable_cpp_codegenr(   rE   r)   r*   r5   r<   r=   rF   Z_save_config_ignoreZtorch.utils._config_typingZtorch.utils._config_modulerJ   Úmodulesr.   r   r   r   r   Ú<module>   s  
 úÿþÿÿÿÿ
€ÿ8Y77þ