import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import VarianceThreshold

/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

data = pd.read_csv('/content/drive/My Drive/kaggle/Learning_mutual_information/data/santander-train.csv', nrows=20000)
data.head()

x = data.drop('TARGET', axis=1)
y = data['TARGET']
print(x.shape, y.shape)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, stratify=y)

(20000, 370) (20000,)

Constant Feature Removal¶

# https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
constant_filter = VarianceThreshold(threshold=0)
constant_filter.fit(x_train)
constant_list = [not temp for temp in constant_filter.get_support()]
x_train_filter = constant_filter.transform(x_train)
x_test_filter = constant_filter.transform(x_test)
x_train_filter.shape, x_test_filter.shape, x_train.shape

((16000, 291), (4000, 291), (16000, 370))

Quasi constant feature removal¶

# remove feature under 1% variance
quasi_constant_filter = VarianceThreshold(threshold=0.01)
quasi_constant_filter.fit(x_train_filter)
x_train_quasi_filter = quasi_constant_filter.transform(x_train_filter) # transformの戻り値はnumpy.array
x_test_quasi_filter = quasi_constant_filter.transform(x_test_filter)
x_train_quasi_filter.shape, x_test_quasi_filter.shape

((16000, 245), (4000, 245))

Remove Duplicate Features¶

x_train_T = x_train_quasi_filter.T
x_test_T = x_test_quasi_filter.T

x_train_T = pd.DataFrame(x_train_T)
x_test_T = pd.DataFrame(x_test_T)

duplicated_features = x_train_T.duplicated()
features_to_keep = [not index for index in duplicated_features]

x_train_unique = x_train_T[features_to_keep].T
x_test_unique = x_test_T[features_to_keep].T

x_train_unique.shape, x_train.shape

((16000, 227), (16000, 370))

Pearson Correlation Coefficients¶

corrmat = x_train_unique.corr()

plt.figure(figsize=(12,8))
sns.heatmap(corrmat)

<matplotlib.axes._subplots.AxesSubplot at 0x7fbcb0db4f60>

def get_correlation(data, threshold):
    corr_col = set()
    corrmat = data.corr()
    for i in range(len(corrmat.columns)):
        for j in range(i):
            if abs(corrmat.iloc[i, j]) > threshold:
                colname = corrmat.columns[i]
                corr_col.add(colname)
    return corr_col

corr_features = get_correlation(x_train_unique, 0.85)
corr_features

{5,
 7,
 9,
 11,
 12,
 14,
 15,
 16,
 17,
 18,
 23,
 24,
 28,
 29,
 30,
 32,
 33,
 35,
 36,
 38,
 42,
 46,
 47,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 60,
 61,
 62,
 65,
 67,
 68,
 69,
 70,
 72,
 76,
 80,
 81,
 82,
 83,
 84,
 86,
 87,
 88,
 91,
 93,
 95,
 98,
 100,
 101,
 103,
 104,
 111,
 115,
 117,
 120,
 121,
 125,
 136,
 138,
 143,
 146,
 149,
 153,
 154,
 157,
 158,
 161,
 162,
 163,
 164,
 169,
 170,
 173,
 180,
 182,
 183,
 184,
 185,
 188,
 189,
 190,
 191,
 192,
 193,
 194,
 195,
 197,
 198,
 199,
 204,
 205,
 207,
 208,
 215,
 216,
 217,
 219,
 220,
 221,
 223,
 224,
 227,
 228,
 229,
 230,
 231,
 232,
 234,
 235,
 236,
 237,
 238,
 239,
 240,
 241,
 242,
 243}

list(corr_features)[:10]

[5, 7, 9, 11, 12, 14, 15, 16, 17, 18]

len(corr_features)

124

x_train_uncorr = x_train_unique.drop(labels=corr_features, axis=1)
x_test_uncorr = x_test_unique.drop(labels=corr_features, axis=1)

x_train_uncorr.shape, x_test_uncorr.shape

((16000, 103), (4000, 103))

def run_randomForest(x_train, x_test, y_train, y_test):
    clf = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=-1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print('Accuracy on test set: ')
    print(accuracy_score(y_test, y_pred))

%%time
run_randomForest(x_train_uncorr, x_test_uncorr, y_train, y_test)

Accuracy on test set: 
0.95875
CPU times: user 3.78 s, sys: 38 ms, total: 3.81 s
Wall time: 2.07 s

%%time
run_randomForest(x_train, x_test, y_train, y_test)

Accuracy on test set: 
0.9585
CPU times: user 5.77 s, sys: 58.8 ms, total: 5.83 s
Wall time: 3.11 s

Feature Grouping and Feature Importance¶

corrmat

corrdata = corrmat.abs().stack()
corrdata

0    0      1.000000
     1      0.025277
     2      0.001942
     3      0.003594
     4      0.004054
              ...   
244  240    0.011106
     241    0.011807
     242    0.008604
     243    0.009136
     244    1.000000
Length: 51529, dtype: float64

corrdata = corrdata.sort_values(ascending=False)
corrdata

29   58     1.000000e+00
58   29     1.000000e+00
134  158    1.000000e+00
158  134    1.000000e+00
182  182    1.000000e+00
                ...     
229  111    1.934954e-06
231  150    6.044672e-07
150  231    6.044672e-07
231  123    3.966696e-07
123  231    3.966696e-07
Length: 51529, dtype: float64

corrdata = corrdata[corrdata > 0.85]
corrdata = corrdata[corrdata < 1]
corrdata

143  135    1.000000
135  143    1.000000
136  128    1.000000
128  136    1.000000
31   62     1.000000
              ...   
67   66     0.851384
61   28     0.851022
28   61     0.851022
72   35     0.850893
35   72     0.850893
Length: 534, dtype: float64

corrdata = pd.DataFrame(corrdata).reset_index()
corrdata.columns = ['features1', 'feature2', 'corr_value']
corrdata

# 各独立変数ごとに、相関の強い独立変数をまとめてグループ化
grouped_feature_list = []
correlated_groups_list = []
for feature in corrdata.features1.unique(): # ffeatures1列から重複を取り除いて値を取り出し
    if feature not in grouped_feature_list:
        correlated_block = corrdata[corrdata.features1 == feature]
        grouped_feature_list = grouped_feature_list + list(correlated_block.feature2.unique()) + [feature]
        correlated_groups_list.append(correlated_block)

len(correlated_groups_list)
# 全部で56グループある

56

# 各グループごとの相関状況一覧
for group in correlated_groups_list:
    print(group)

   features1  feature2  corr_value
0        143       135         1.0
     features1  feature2  corr_value
2          136       128    1.000000
197        136       169    0.959468
   features1  feature2  corr_value
4         31        62         1.0
   features1  feature2  corr_value
6         20        47         1.0
     features1  feature2  corr_value
8           52        23    1.000000
297         52        24    0.927683
299         52        53    0.927683
448         52        21    0.877297
505         52       183    0.860163
     features1  feature2  corr_value
12          33        69    1.000000
224         33        32    0.947113
228         33        68    0.946571
322         33        26    0.917665
337         33        55    0.914178
422         33       184    0.884383
    features1  feature2  corr_value
14        157       133         1.0
    features1  feature2  corr_value
16        237       149    1.000000
26        237       148    0.999929
    features1  feature2  corr_value
18        154       132         1.0
     features1  feature2  corr_value
20         146       230    0.999997
36         146       229    0.999778
59         146       231    0.997052
68         146       232    0.996772
76         146       113    0.996424
89         146       120    0.993307
245        146       170    0.944314
     features1  feature2  corr_value
22         238       122    0.999945
49         238       239    0.998497
264        238       236    0.938668
    features1  feature2  corr_value
34         82        78    0.999859
     features1  feature2  corr_value
40         108       115    0.999478
97         108       219    0.992870
115        108       125    0.987333
142        108       220    0.982474
280        108       217    0.933815
     features1  feature2  corr_value
46         199       197    0.998753
362        199       196    0.905699
371        199       198    0.904341
     features1  feature2  corr_value
50         181       208    0.997718
345        181       205    0.911453
467        181       207    0.871801
     features1  feature2  corr_value
72          17        14    0.996739
396         17        16    0.890442
408         17        13    0.888669
     features1  feature2  corr_value
86         242       243    0.993536
122        242       126    0.986744
276        242       240    0.935317
     features1  feature2  corr_value
92          28        57    0.993186
124         28        58    0.986371
126         28        29    0.986371
185         28       185    0.964067
381         28        27    0.901032
399         28        30    0.889321
531         28        61    0.851022
     features1  feature2  corr_value
94          51        22    0.992882
385         51       182    0.899063
     features1  feature2  corr_value
100         44        46    0.990593
377         44        98    0.902736
410         44        95    0.888337
     features1  feature2  corr_value
102         77        81    0.989793
461         77        80    0.874240
517         77        84    0.858529
     features1  feature2  corr_value
104        109       223    0.989341
151        109       224    0.980951
356        109       221    0.907987
413        109       111    0.887721
     features1  feature2  corr_value
112          9         8    0.988256
417          9       193    0.886955
444          9       192    0.878045
     features1  feature2  corr_value
116        227       228    0.987304
188        227       225    0.962657
     features1  feature2  corr_value
118        116       117    0.987013
     features1  feature2  corr_value
128         91        49    0.985951
     features1  feature2  corr_value
130         54        25    0.985875
419         54       100    0.886309
     features1  feature2  corr_value
134         76        75    0.984751
353         76        74    0.908497
477         76       191    0.870551
522         76       190    0.857717
     features1  feature2  corr_value
136         38        35    0.984077
261         38        34    0.940390
306         38        36    0.922699
496         38        72    0.864661
     features1  feature2  corr_value
138         18        15    0.983164
465         18        16    0.872133
470         18        13    0.870936
     features1  feature2  corr_value
140        215       107    0.983156
146        215       216    0.981815
     features1  feature2  corr_value
161         56        61    0.976942
187         56        27    0.962726
211         56        30    0.953194
     features1  feature2  corr_value
164        162       163    0.975002
288        162       161    0.930635
369        162       164    0.904702
463        162        41    0.874083
     features1  feature2  corr_value
166        102       103    0.974341
     features1  feature2  corr_value
168         83        79    0.973140
263         83       188    0.938960
273         83        84    0.936080
315         83       194    0.919405
351         83        80    0.910385
518         83       189    0.858484
     features1  feature2  corr_value
174         70        72    0.972088
500         70        35    0.862850
     features1  feature2  corr_value
180         59        60    0.968504
     features1  feature2  corr_value
207        195       189    0.956666
313        195        80    0.920961
330        195       194    0.916442
378        195        84    0.902276
428        195       188    0.882312
509        195        79    0.859806
     features1  feature2  corr_value
216        235       234    0.950232
349        235       106    0.911179
     features1  feature2  corr_value
220         10       104    0.948845
     features1  feature2  corr_value
234        180       179    0.945288
     features1  feature2  corr_value
236        241       151    0.944812
     features1  feature2  corr_value
243         42        41    0.944451
415         42       161    0.887059
503         42       164    0.861507
     features1  feature2  corr_value
248         12         5    0.943622
434         12        11    0.881673
     features1  feature2  corr_value
266          4        11    0.938409
402          4         5    0.888789
     features1  feature2  corr_value
274         93        92    0.935867
     features1  feature2  corr_value
290         89       121    0.928898
     features1  feature2  corr_value
304         88        87       0.924
     features1  feature2  corr_value
318        174       204    0.918533
     features1  feature2  corr_value
333         50        21    0.916137
     features1  feature2  corr_value
354          6         7    0.908158
     features1  feature2  corr_value
372         64        65    0.904095
488         64        87    0.866430
     features1  feature2  corr_value
374        101        86    0.903641
394        101        40    0.892951
     features1  feature2  corr_value
390        131       153     0.89633
     features1  feature2  corr_value
525        173       151    0.854991
     features1  feature2  corr_value
528         66        67    0.851384

Feature Importance based on tree based classifiers¶

important_features = []
for group in correlated_groups_list:
    features = list(group.features1.unique()) + list(group.feature2.unique()) # グループ毎の変数全て取り出し
    rf = RandomForestClassifier(n_estimators=100, random_state=0) # RandomForestで変数の重要度計測
    rf.fit(x_train_unique[features], y_train)

    importance = pd.concat([pd.Series(features), pd.Series(rf.feature_importances_)], axis=1) # 重要度取り出し
    importance.columns = ['features', 'importance']
    importance.sort_values(by='importance', ascending=False, inplace=True)
    feat = importance.iloc[0] # グループ内で一番重要度の高い変数を取り出し
    important_features.append(feat)

important_features

[features      135.00
 importance      0.51
 Name: 1, dtype: float64, features      128.000000
 importance      0.563757
 Name: 1, dtype: float64, features      62.00
 importance     0.51
 Name: 1, dtype: float64, features      47.00
 importance     0.51
 Name: 1, dtype: float64, features      183.000000
 importance      0.285817
 Name: 5, dtype: float64, features      184.00000
 importance      0.34728
 Name: 6, dtype: float64, features      157.000000
 importance      0.523077
 Name: 0, dtype: float64, features      148.000000
 importance      0.624498
 Name: 2, dtype: float64, features      132.000000
 importance      0.565217
 Name: 1, dtype: float64, features      120.000000
 importance      0.749683
 Name: 6, dtype: float64, features      122.000000
 importance      0.343434
 Name: 1, dtype: float64, features      82.000000
 importance     0.518827
 Name: 0, dtype: float64, features      125.000000
 importance      0.940524
 Name: 3, dtype: float64, features      197.000000
 importance      0.289727
 Name: 1, dtype: float64, features      207.000000
 importance      0.312834
 Name: 3, dtype: float64, features      17.000000
 importance     0.286833
 Name: 0, dtype: float64, features      243.000000
 importance      0.431557
 Name: 1, dtype: float64, features      185.000000
 importance      0.391367
 Name: 4, dtype: float64, features      182.000000
 importance      0.432045
 Name: 2, dtype: float64, features      95.000000
 importance     0.487162
 Name: 3, dtype: float64, features      84.000000
 importance     0.299008
 Name: 3, dtype: float64, features      221.00000
 importance      0.28555
 Name: 3, dtype: float64, features      8.000000
 importance    0.345509
 Name: 1, dtype: float64, features      228.000000
 importance      0.434186
 Name: 1, dtype: float64, features      117.000000
 importance      0.517013
 Name: 1, dtype: float64, features      49.000000
 importance     0.500161
 Name: 1, dtype: float64, features      100.000000
 importance      0.386775
 Name: 2, dtype: float64, features      191.000000
 importance      0.345104
 Name: 3, dtype: float64, features      34.000000
 importance     0.283901
 Name: 2, dtype: float64, features      15.000000
 importance     0.400677
 Name: 1, dtype: float64, features      107.000000
 importance      0.349126
 Name: 1, dtype: float64, features      61.000000
 importance     0.323735
 Name: 1, dtype: float64, features      41.000000
 importance     0.386338
 Name: 4, dtype: float64, features      102.000000
 importance      0.508955
 Name: 0, dtype: float64, features      189.000000
 importance      0.229269
 Name: 6, dtype: float64, features      72.000000
 importance     0.490102
 Name: 1, dtype: float64, features      60.00000
 importance     0.50052
 Name: 1, dtype: float64, features      79.000000
 importance     0.213903
 Name: 6, dtype: float64, features      234.000000
 importance      0.469178
 Name: 1, dtype: float64, features      104.000000
 importance      0.640915
 Name: 1, dtype: float64, features      179.000000
 importance      0.634779
 Name: 1, dtype: float64, features      151.00
 importance      0.51
 Name: 1, dtype: float64, features      161.000000
 importance      0.346426
 Name: 2, dtype: float64, features      5.000000
 importance    0.356386
 Name: 1, dtype: float64, features      5.000000
 importance    0.403831
 Name: 2, dtype: float64, features      93.000000
 importance     0.544349
 Name: 0, dtype: float64, features      121.00
 importance      0.51
 Name: 1, dtype: float64, features      87.000000
 importance     0.553622
 Name: 1, dtype: float64, features      174.000000
 importance      0.743723
 Name: 0, dtype: float64, features      50.000000
 importance     0.616659
 Name: 0, dtype: float64, features      7.000000
 importance    0.545702
 Name: 1, dtype: float64, features      87.0000
 importance     0.7462
 Name: 2, dtype: float64, features      86.000000
 importance     0.447693
 Name: 1, dtype: float64, features      153.000000
 importance      0.515152
 Name: 1, dtype: float64, features      151.00
 importance      0.51
 Name: 1, dtype: float64, features      66.000000
 importance     0.630293
 Name: 0, dtype: float64]

important_features = pd.DataFrame(important_features)

important_features.reset_index(inplace=True, drop=True)

important_features

features_to_consider = set(important_features['features']) # 相関を考慮して残った重要な変数のリスト

features_to_discard = set(corr_features) - set(features_to_consider) # 相関により不要な変数のリスト

features_to_discard = list(features_to_discard)

x_train_grouped_uncorr = x_train_unique.drop(labels=features_to_discard, axis=1)
x_train_grouped_uncorr.shape

(16000, 140)

x_test_grouped_uncorr = x_test_unique.drop(labels=features_to_discard, axis=1)
x_test_grouped_uncorr.shape

(4000, 140)

%%time
run_randomForest(x_train_grouped_uncorr, x_test_grouped_uncorr, y_train, y_test)

Accuracy on test set: 
0.95775
CPU times: user 4.06 s, sys: 32.5 ms, total: 4.09 s
Wall time: 2.26 s

%%time
run_randomForest(x_train, x_test, y_train, y_test)

Accuracy on test set: 
0.9585
CPU times: user 5.78 s, sys: 44.4 ms, total: 5.82 s
Wall time: 3.1 s

%%time
run_randomForest(x_train_uncorr, x_test_uncorr, y_train, y_test)

Accuracy on test set: 
0.95875
CPU times: user 3.75 s, sys: 21.5 ms, total: 3.77 s
Wall time: 2.06 s

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	38	40	41	...	205	206	207	208	209	210	211	212	213	214	215	216	217	218	219	220	221	222	223	224	225	226	227	228	229	230	231	232	233	234	235	236	237	238	239	240	241	242	243	244
0	1.000000	-0.025277	-0.001942	0.003594	0.004054	-0.001697	-0.015882	-0.019807	0.000956	-0.000588	-0.012443	0.010319	0.005268	0.017605	0.016960	0.018040	0.017400	0.016745	0.015206	-0.000103	-0.001198	-0.006814	-0.002037	0.010356	0.012021	0.001732	0.001138	-0.004836	-0.006480	-0.005811	-0.003929	-0.002340	0.004714	0.005018	0.005088	0.006998	0.005580	0.007236	0.001648	-0.000359	...	0.007040	-0.001282	0.013583	0.008546	-0.002296	0.004097	0.006333	0.005441	0.006393	0.001991	0.006937	0.004924	0.008100	-0.000582	0.007130	0.007675	-0.006477	-0.010219	-0.011386	-0.011200	0.006455	0.008361	0.003765	0.005352	0.008042	0.007870	0.007952	0.008021	-0.001596	0.001830	-0.001337	0.002051	-0.008500	0.006554	0.005907	0.008825	-0.009174	0.012031	0.012128	0.006612
1	-0.025277	1.000000	-0.007647	0.001819	0.008981	0.009232	0.001638	0.001746	0.000614	0.000695	0.001517	0.009097	0.009360	-0.002511	-0.001086	0.002426	-0.002401	-0.001019	0.002629	0.000519	0.004590	-0.008975	0.041015	0.008019	0.007439	0.011525	0.009467	0.009771	0.008796	0.008676	0.009662	0.006484	0.009172	0.008687	0.007043	0.006730	0.007178	0.006622	0.050629	0.011849	...	0.003053	0.004967	0.009019	0.006758	0.004488	0.002604	0.003651	0.003672	0.001322	0.000878	0.002152	0.002210	0.003979	0.002581	0.004811	0.004879	0.005759	0.003183	0.006355	0.006248	0.002629	0.001482	0.002827	0.002770	0.000356	0.000338	0.000411	0.000408	0.000391	0.000453	0.000544	0.000586	0.000337	0.000550	0.000563	0.000922	0.000598	0.000875	0.000942	0.000415
2	-0.001942	-0.007647	1.000000	0.030919	0.106245	0.109140	0.048524	0.055708	0.004040	0.005796	0.042368	0.096719	0.098070	0.082025	0.095485	0.106415	0.081028	0.095009	0.110912	0.016886	0.107680	-0.105502	-0.102487	0.107570	0.101605	0.273152	0.231649	0.299165	0.241707	0.237830	0.296879	0.149274	0.262784	0.253780	0.083030	0.091234	0.095103	0.082214	0.032543	0.136317	...	0.205098	0.228133	0.154430	0.227132	0.188957	0.107593	0.132855	0.162295	0.029742	0.014903	0.043278	0.045622	0.149586	0.093124	0.178546	0.179565	0.178263	0.094741	0.200415	0.195652	0.125618	0.059293	0.135362	0.132537	0.023435	0.022679	0.025362	0.025406	0.013612	0.023446	0.025522	0.020168	0.011550	0.019325	0.019527	0.041321	0.016172	0.043577	0.044281	-0.000810
3	0.003594	0.001819	0.030919	1.000000	0.029418	0.024905	0.014513	0.013857	-0.000613	-0.000691	0.012451	0.026377	0.021968	0.016331	0.016458	0.024014	0.015979	0.016239	0.025558	-0.000520	0.007478	-0.002101	0.017541	0.003429	0.004843	0.010099	0.015117	0.036569	0.040420	0.041165	0.037154	-0.001398	0.012668	0.013574	0.007590	0.005443	0.007195	0.005737	0.025432	0.046103	...	0.022688	0.017827	0.038797	0.032181	0.002703	0.001554	0.011326	0.006745	0.003401	-0.000822	0.002314	0.003234	0.001554	-0.001262	0.002540	0.002948	-0.005438	-0.003083	0.025778	0.033042	-0.001532	0.000238	-0.001817	-0.001698	-0.000354	-0.000338	-0.000285	-0.000334	-0.000391	0.008469	0.014032	-0.000583	-0.000337	-0.000548	-0.000561	0.000541	-0.000577	0.000231	0.000235	0.000966
4	0.004054	0.008981	0.106245	0.029418	1.000000	0.888789	0.381632	0.341266	0.012927	0.019674	0.298916	0.938409	0.838953	0.266746	0.326051	0.638412	0.263482	0.324417	0.673593	0.049579	0.227803	-0.208030	0.041167	0.200514	0.220673	0.027387	0.033757	-0.010411	-0.012628	-0.012035	-0.010694	0.030235	0.014793	0.009176	0.232927	0.223937	0.229660	0.226089	0.108756	0.267246	...	0.235397	0.168010	0.268317	0.270899	0.021159	-0.004700	0.055867	0.029543	0.035820	0.110967	0.073627	0.086904	-0.003401	-0.007050	-0.002079	-0.001151	0.002963	0.040691	-0.000914	-0.000322	0.003267	0.012429	0.000824	0.001272	-0.001629	-0.001669	-0.001677	-0.001730	-0.001930	0.000833	0.002328	0.016743	-0.001662	0.020509	0.021276	-0.001905	-0.000635	-0.002552	-0.002736	0.003656
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
240	0.008825	0.000922	0.041321	0.000541	-0.001905	0.000871	-0.000818	-0.000866	-0.000309	-0.000349	-0.000762	-0.001754	0.001253	0.007400	0.004485	0.004773	0.007237	0.004412	0.004487	-0.000262	0.009008	-0.000421	0.013263	0.001397	0.002086	-0.000543	0.001500	-0.003893	-0.004438	-0.004378	-0.004659	-0.003271	0.001831	0.002405	0.003502	0.002806	0.003308	0.002956	0.012984	0.005350	...	0.031705	0.038442	0.027067	0.037467	0.012998	0.000226	0.025813	0.015868	0.104010	0.073770	0.082936	0.074334	-0.001886	-0.001294	-0.002222	-0.002257	-0.002904	-0.001604	-0.003205	-0.003151	-0.001195	-0.000573	-0.001417	-0.001387	0.000265	-0.000171	0.001004	0.000746	0.031970	0.004649	0.012705	0.021540	-0.000170	0.032162	0.030087	1.000000	0.329805	0.935317	0.919036	0.011106
241	-0.009174	0.000598	0.016172	-0.000577	-0.000635	0.007096	-0.000515	-0.000545	-0.000195	-0.000220	-0.000480	-0.000494	0.007871	0.002248	0.001176	0.001465	0.002188	0.001147	0.001333	-0.000165	-0.001449	-0.010257	0.004114	0.011918	0.013156	-0.003662	-0.003008	-0.003103	-0.002796	-0.002758	-0.003068	-0.002061	-0.002914	-0.002760	0.014110	0.014929	0.013776	0.015226	0.008179	0.008135	...	0.010826	0.025881	0.008090	0.016236	0.037897	0.001330	0.056950	0.037100	0.287537	0.204017	0.230452	0.206808	-0.001258	-0.000815	-0.001523	-0.001545	-0.001829	-0.001011	-0.002019	-0.001985	-0.000831	-0.000463	-0.000895	-0.000876	-0.000113	-0.000107	-0.000130	-0.000129	-0.000124	-0.000144	-0.000173	-0.000185	-0.000107	-0.000174	-0.000178	0.329805	1.000000	0.127224	0.140902	0.011807
242	0.012031	0.000875	0.043577	0.000231	-0.002552	-0.001672	-0.000779	-0.000825	-0.000295	-0.000332	-0.000726	-0.002467	-0.001513	0.006688	0.004012	0.003984	0.006539	0.003946	0.003729	-0.000250	0.009156	0.002149	0.013717	-0.001487	-0.001036	-0.001555	0.000193	-0.003587	-0.004228	-0.004171	-0.004263	-0.003116	0.000472	0.000954	-0.000169	-0.000911	-0.000291	-0.000824	0.012369	0.000950	...	0.028671	0.023918	0.026292	0.031313	0.013596	-0.000514	0.019951	0.016858	0.037924	0.026822	0.029915	0.026758	-0.001828	-0.001232	-0.002103	-0.002116	-0.002766	-0.001528	-0.003054	-0.003002	-0.001146	-0.000556	-0.001348	-0.001318	0.000089	-0.000163	0.000510	0.000360	0.068648	0.010219	0.027515	0.012393	-0.000162	0.018565	0.017358	0.935317	0.127224	1.000000	0.993536	0.008604
243	0.012128	0.000942	0.044281	0.000235	-0.002736	-0.001844	-0.000839	-0.000888	-0.000317	-0.000358	-0.000782	-0.002645	-0.001676	0.006283	0.003599	0.003632	0.006139	0.003534	0.003377	-0.000269	0.011164	0.002306	0.014768	-0.001591	-0.001105	-0.001463	0.000460	-0.003989	-0.004553	-0.004491	-0.004670	-0.003356	0.000768	0.001301	-0.000128	-0.000859	-0.000260	-0.000763	0.013320	0.001099	...	0.029862	0.026072	0.028454	0.033433	0.017619	-0.000616	0.023478	0.020881	0.042586	0.030148	0.033617	0.030066	-0.001966	-0.001327	-0.002237	-0.002243	-0.002979	-0.001646	-0.003288	-0.003233	-0.001241	-0.000608	-0.001452	-0.001421	0.000121	-0.000175	0.000619	0.000443	0.057673	0.008541	0.023072	0.014523	-0.000174	0.021742	0.020331	0.919036	0.140902	0.993536	1.000000	0.009136
244	0.006612	0.000415	-0.000810	0.000966	0.003656	0.002257	0.004448	0.002427	-0.000739	-0.000811	0.003341	0.002290	0.001570	0.000707	-0.001992	0.001339	0.000614	-0.002038	0.001910	0.000213	-0.001227	-0.016447	-0.056029	0.012346	-0.003767	0.012034	0.006643	0.012240	0.007400	0.006121	0.011291	0.004047	0.008078	0.006178	0.000669	0.000397	0.000346	0.000546	-0.055329	-0.014822	...	-0.010182	-0.008797	-0.006917	-0.010172	0.016391	0.019986	0.007422	0.016724	-0.000105	-0.001055	0.000338	0.000244	0.017276	0.006644	0.018092	0.017579	0.014736	0.002052	0.014980	0.014628	0.014567	0.005688	0.015351	0.014485	0.013197	0.012842	0.013321	0.013418	-0.000203	-0.003446	-0.003399	-0.000773	-0.000402	-0.000525	-0.000589	0.011106	0.011807	0.008604	0.009136	1.000000

	ID	var3	var15	imp_op_var39_comer_ult1	imp_op_var39_comer_ult3	imp_op_var41_comer_ult1	imp_op_var41_comer_ult3	imp_op_var41_ult1	imp_op_var39_ult1	ind_var5_0	ind_var5	ind_var12_0	ind_var12	ind_var13_0	ind_var13_corto_0	ind_var13_corto	ind_var13	...	saldo_medio_var5_ult1	saldo_medio_var5_ult3	saldo_medio_var12_ult1	saldo_medio_var12_ult3	saldo_medio_var13_corto_hace2	saldo_medio_var13_corto_hace3	saldo_medio_var13_corto_ult1	saldo_medio_var13_corto_ult3	var38
0	1	2	23	0.0	0.0	0.0	0.0	0.0	0.0	1	0	0	0	0	0	0	0	...	0.00	0.00	0.00	0.00	0.0	0.00	0.0	0.00	39205.170000
1	3	2	34	0.0	0.0	0.0	0.0	0.0	0.0	1	0	0	0	1	1	1	1	...	0.00	0.00	0.00	0.00	300.0	122.22	300.0	240.75	49278.030000
2	4	2	23	0.0	0.0	0.0	0.0	0.0	0.0	1	1	0	0	0	0	0	0	...	3.00	2.07	0.00	0.00	0.0	0.00	0.0	0.00	67333.770000
3	8	2	37	195.0	195.0	195.0	195.0	195.0	195.0	1	1	0	0	0	0	0	0	...	91.56	138.84	0.00	0.00	0.0	0.00	0.0	0.00	64007.970000
4	10	2	39	0.0	0.0	0.0	0.0	0.0	0.0	1	0	1	1	0	0	0	0	...	40501.08	13501.47	85501.89	85501.89	0.0	0.00	0.0	0.00	117310.979016

	features	importance
0	135.0	0.510000
1	128.0	0.563757
2	62.0	0.510000
3	47.0	0.510000
4	183.0	0.285817
5	184.0	0.347280
6	157.0	0.523077
7	148.0	0.624498
8	132.0	0.565217
9	120.0	0.749683
10	122.0	0.343434
11	82.0	0.518827
12	125.0	0.940524
13	197.0	0.289727
14	207.0	0.312834
15	17.0	0.286833
16	243.0	0.431557
17	185.0	0.391367
18	182.0	0.432045
19	95.0	0.487162
20	84.0	0.299008
21	221.0	0.285550
22	8.0	0.345509
23	228.0	0.434186
24	117.0	0.517013
25	49.0	0.500161
26	100.0	0.386775
27	191.0	0.345104
28	34.0	0.283901
29	15.0	0.400677
30	107.0	0.349126
31	61.0	0.323735
32	41.0	0.386338
33	102.0	0.508955
34	189.0	0.229269
35	72.0	0.490102
36	60.0	0.500520
37	79.0	0.213903
38	234.0	0.469178
39	104.0	0.640915
40	179.0	0.634779
41	151.0	0.510000
42	161.0	0.346426
43	5.0	0.356386
44	5.0	0.403831
45	93.0	0.544349
46	121.0	0.510000
47	87.0	0.553622
48	174.0	0.743723
49	50.0	0.616659
50	7.0	0.545702
51	87.0	0.746200
52	86.0	0.447693
53	153.0	0.515152
54	151.0	0.510000
55	66.0	0.630293