o
    Z&iT                     @   s  d Z ddlmZ ddlmZmZ ddlmZ ddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZ ddgddgddgddgddgddggZg dZddgddgddgddgddgddggZg dZg dZdd Ze
j de!e	j"gdd Z#dd Z$dd Z%dd Z&dd Z'd d! Z(e
j de!e	j"gd"d# Z)d$d% Z*e
j d&g d'd(d) Z+d*d+ Z,dS ),zG
Testing for export functions of decision trees (sklearn.tree.export).
    )StringIO)finditersearch)dedentN)RandomState)is_classifier)GradientBoostingClassifier)NotFittedError)DecisionTreeClassifierDecisionTreeRegressorexport_graphvizexport_text	plot_tree      )r   r   r   r   r   r      )r   r   r         ?r   r   )r   r   r   r   r   r   c               
   C   s  t ddddd} | tt t| d d}d}||ksJ t| ddgd d	}d
}||ks-J t| ddgd d	}d}||ks>J t| ddgd d}d}||ksOJ t| ddgd d}d}||ks`J t| dddddd dd}d}||kstJ t| ddd d}d}||ksJ t| ddd dd}d}||ksJ t ddddd} | jtttd} t| ddd d }d!}||ksJ tddd"dd} | tt t| ddd dddd#}d$}||ksJ t dd%} | tt t| dd d&}d'}d S )(Nr   r   gini	max_depthmin_samples_split	criterionrandom_stateout_filea  digraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="x[0] <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]"] ;
1 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}feature0feature1feature_namesr     digraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="feature0 <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]"] ;
1 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}z
feature"0"z
feature"1"a  digraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="feature\"0\" <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]"] ;
1 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}yesnoclass_namesr     digraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="x[0] <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]\nclass = yes"] ;
1 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]\nclass = yes"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]\nclass = no"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}z"yes"z"no"a  digraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="x[0] <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]\nclass = \"yes\""] ;
1 [label="gini = 0.0\nsamples = 3\nvalue = [3, 0]\nclass = \"yes\""] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]\nclass = \"no\""] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}TFsans)filledimpurity
proportionZspecial_charactersroundedr   fontnamea  digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="sans"] ;
edge [fontname="sans"] ;
0 [label=<x<SUB>0</SUB> &le; 0.0<br/>samples = 100.0%<br/>value = [0.5, 0.5]>, fillcolor="#ffffff"] ;
1 [label=<samples = 50.0%<br/>value = [1.0, 0.0]>, fillcolor="#e58139"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label=<samples = 50.0%<br/>value = [0.0, 1.0]>, fillcolor="#399de5"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
}r   )r   r%   r   zdigraph Tree {
node [shape=box, fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="x[0] <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]\nclass = y[0]"] ;
1 [label="(...)"] ;
0 -> 1 ;
2 [label="(...)"] ;
0 -> 2 ;
})r   r(   r   Znode_idsa;  digraph Tree {
node [shape=box, style="filled", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="node #0\nx[0] <= 0.0\ngini = 0.5\nsamples = 6\nvalue = [3, 3]", fillcolor="#ffffff"] ;
1 [label="(...)", fillcolor="#C0C0C0"] ;
0 -> 1 ;
2 [label="(...)", fillcolor="#C0C0C0"] ;
0 -> 2 ;
})Zsample_weight)r(   r)   r   a  digraph Tree {
node [shape=box, style="filled", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="x[0] <= 0.0\nsamples = 6\nvalue = [[3.0, 1.5, 0.0]\n[3.0, 1.0, 0.5]]", fillcolor="#ffffff"] ;
1 [label="samples = 3\nvalue = [[3, 0, 0]\n[3, 0, 0]]", fillcolor="#e58139"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="x[0] <= 1.5\nsamples = 3\nvalue = [[0.0, 1.5, 0.0]\n[0.0, 1.0, 0.5]]", fillcolor="#f1bd97"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
3 [label="samples = 2\nvalue = [[0, 1, 0]\n[0, 1, 0]]", fillcolor="#e58139"] ;
2 -> 3 ;
4 [label="samples = 1\nvalue = [[0.0, 0.5, 0.0]\n[0.0, 0.0, 0.5]]", fillcolor="#e58139"] ;
2 -> 4 ;
}Zsquared_error)r(   Zleaves_parallelr   rotater+   r,   aT  digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="sans"] ;
graph [ranksep=equally, splines=polyline] ;
edge [fontname="sans"] ;
rankdir=LR ;
0 [label="x[0] <= 0.0\nsquared_error = 1.0\nsamples = 6\nvalue = 0.0", fillcolor="#f2c09c"] ;
1 [label="squared_error = 0.0\nsamples = 3\nvalue = -1.0", fillcolor="#ffffff"] ;
0 -> 1 [labeldistance=2.5, labelangle=-45, headlabel="True"] ;
2 [label="squared_error = 0.0\nsamples = 3\nvalue = 1.0", fillcolor="#e58139"] ;
0 -> 2 [labeldistance=2.5, labelangle=45, headlabel="False"] ;
{rank=same ; 0} ;
{rank=same ; 1; 2} ;
}r   )r(   r   zdigraph Tree {
node [shape=box, style="filled", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label="gini = 0.0\nsamples = 6\nvalue = 6.0", fillcolor="#ffffff"] ;
})	r
   fitXyr   y2wr   
y_degraded)clf	contents1	contents2 r8   IC:\wamp64\www\opt\env\Lib\site-packages\sklearn/tree/tests/test_export.pytest_graphviz_toy    s   




r:   constructorc                 C   sl   t ddddd}|tt t|| ddgd d}d}||ks!J t|| d	d
gd d}d}||ks4J d S )Nr   r   r   r   r   r   r   r!   r"   r#   r$   r&   )r
   r/   r0   r1   r   )r;   r5   r6   r7   r8   r8   r9   /test_graphviz_feature_class_names_array_support-  s    r<   c                  C   s`  t ddd} t }tt t| | W d    n1 sw   Y  | tt d}tjt	|d t| d dgd W d    n1 sEw   Y  d}tjt	|d t| d g d	d W d    n1 sgw   Y  d
}tjt
|d t| ttj W d    n1 sw   Y  t }tt t| |g d W d    d S 1 sw   Y  d S )Nr   r   )r   r   z?Length of feature_names, 1 does not match number of features, 2matchar    z?Length of feature_names, 3 does not match number of features, 2)r?   bczis not an estimator instancer%   )r
   r   pytestraisesr	   r   r/   r0   r1   
ValueError	TypeErrorZtree_
IndexError)r5   outmessager8   r8   r9   test_graphviz_errorsc  s,   "rK   c                  C   s   t ddd} | tt t }t| |d tddd} | tt | jD ]
}t|d |d q$td|	 D ]
}d|
 v s@J q6d S )Nfriedman_mser   )r   r   r   r   )Zn_estimatorsr   z\[.*?samples.*?\])r   r/   r0   r1   r   r   r   Zestimators_r   getvaluegroup)r5   dot_dataZ	estimatorfindingr8   r8   r9   test_friedman_mse_in_graphviz  s   
rQ   c            	      C   s4  t d} t d}t| d|df| d|jdddftdd	d
dtd
d	dfD ]l\}}}||| dD ]^}t|d |dd}td|D ]}t	t
d|  |d
 ksZJ qGt|rbd}nd}t||D ]}t	t
d|  |d
 ks|J qitd|D ]}t	t
d|  |d
 ksJ qq8q+d S )Nr      )   r   )     )rS   )rT   )sizerL   r   r   )r   r   r   r   r   )rU   r   T)r   	precisionr*   zvalue = \d+\.\d+z\.\d+zgini = \d+\.\d+zfriedman_mse = \d+\.\d+z<= \d+\.\d+)r   zipZrandom_samplerandintr   r
   r/   r   r   lenr   rN   r   )	Zrng_regZrng_clfr0   r1   r5   rX   rO   rP   patternr8   r8   r9   test_precision  s8   

$$$r]   c                  C   s   t ddd} | tt d}tjt|d t| dgd W d    n1 s'w   Y  d}tjt|d t| dgd	 W d    d S 1 sHw   Y  d S )
Nr   r   rW   z,feature_names must contain 2 elements, got 1r=   r?   r@   zWhen `class_names` is an array, it should contain as many items as `decision_tree.classes_`. Got 1 while the tree was fitted with 2 classes.rC   )r
   r/   r0   r1   rD   rE   rF   r   )r5   err_msgr8   r8   r9   test_export_text_errors  s   "r_   c                  C   s"  t ddd} | tt td }t| |ksJ t| dd|ks$J t| dd|ks.J td }t| dd	|ks>J td
 }t| dd|ksNJ ddgddgddgddgddgddgddgg}g d}t ddd} | || td }t| dd|ksJ ddgddgddgddgddgddgg}ddgddgddgddgddgddgg}tddd}||| td }t|dd|ksJ t|ddd|ksJ dgdgdgdgdgdgg}tddd}||| td }t|ddgd|ksJ t|dddgd|ksJ d S )Nr   r   rW   zh
    |--- feature_1 <= 0.00
    |   |--- class: -1
    |--- feature_1 >  0.00
    |   |--- class: 1
    r.   
   z
    |--- feature_1 <= 0.00
    |   |--- weights: [3.00, 0.00] class: -1
    |--- feature_1 >  0.00
    |   |--- weights: [0.00, 3.00] class: 1
    T)show_weightsz\
    |- feature_1 <= 0.00
    | |- class: -1
    |- feature_1 >  0.00
    | |- class: 1
    r   )spacingr   r   )r   r   r   r   r   r   r   rU   z{
    |--- feature_1 <= 0.00
    |   |--- class: -1
    |--- feature_1 >  0.00
    |   |--- truncated branch of depth 2
    zy
    |--- feature_1 <= 0.0
    |   |--- value: [-1.0, -1.0]
    |--- feature_1 >  0.0
    |   |--- value: [1.0, 1.0]
    )decimals)rc   ra   zq
    |--- first <= 0.0
    |   |--- value: [-1.0, -1.0]
    |--- first >  0.0
    |   |--- value: [1.0, 1.0]
    first)rc   r    )rc   ra   r    )r
   r/   r0   r1   r   lstripr   r   )r5   expected_reportZX_lZy_lZX_moZy_moregZX_singler8   r8   r9   test_export_text  sn   	.((rh   c                 C   sl   t ddd}|tt td }t|| ddgd|ks J td }t|| d	d
gd|ks4J d S )Nr   r   rW   zX
    |--- b <= 0.00
    |   |--- class: -1
    |--- b >  0.00
    |   |--- class: 1
    r?   rA   r@   zk
    |--- feature_1 <= 0.00
    |   |--- class: cat
    |--- feature_1 >  0.00
    |   |--- class: dog
    catdogrC   )r
   r/   r0   r1   r   re   r   )r;   r5   rf   r8   r8   r9   2test_export_text_feature_class_names_array_support$  s    rk   c                 C   s   t ddddd}|tt ddg}t||d}t|dks J |d	  d
ks*J |d  dks4J |d  dks>J |d  dksHJ |d  dksRJ d S )Nr   r   Zentropyr   
first featsepal_widthr@   rS   r   z:first feat <= 0.0
entropy = 1.0
samples = 6
value = [3, 3]r   z(entropy = 0.0
samples = 3
value = [3, 0]True  z(entropy = 0.0
samples = 3
value = [0, 3]rU     False)r
   r/   r0   r1   r   r[   get_text)pyplotr5   r    nodesr8   r8   r9   test_plot_tree_entropy@  s   
rs   fontsize)Nr`      c                    s   t ddddd}|tt ddg}t|| d}t|dks!J  d ur2t fd	d
|D s2J |d  dks<J |d  dksFJ |d  dksPJ |d  dksZJ |d  dksdJ d S )Nr   r   r   r   rl   rm   )r    rt   rS   c                 3   s    | ]	}|   kV  qd S N)Zget_fontsize).0nodert   r8   r9   	<genexpr>g  s    z&test_plot_tree_gini.<locals>.<genexpr>r   z7first feat <= 0.0
gini = 0.5
samples = 6
value = [3, 3]r   z%gini = 0.0
samples = 3
value = [3, 0]rn   z%gini = 0.0
samples = 3
value = [0, 3]rU   ro   )r
   r/   r0   r1   r   r[   allrp   )rq   rt   r5   r    rr   r8   ry   r9   test_plot_tree_giniV  s&   
r|   c                 C   s>   t  }tt t| W d    d S 1 sw   Y  d S rv   )r   rD   rE   r	   r   )rq   r5   r8   r8   r9   test_not_fitted_treer  s   
"r}   )-__doc__ior   rer   r   textwrapr   numpynprD   Znumpy.randomr   Zsklearn.baser   Zsklearn.ensembler   Zsklearn.exceptionsr	   Zsklearn.treer
   r   r   r   r   r0   r1   r2   r3   r4   r:   markZparametrizelistarrayr<   rK   rQ   r]   r_   rh   rk   rs   r|   r}   r8   r8   r8   r9   <module>   s@    (	(  
5 *Y

