The content for this site available on GitHub. If you want to launch the notebooks interactively click on the binder stamp below.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
< EDA Iris | Contents | EDA Wine Quality Prediction binary >
EDA: Wine Cultivars¶
Wine dataset - chemical analysis to determine the origin of wines.
In [1]:
%reload_ext autoreload
%autoreload 2
In [2]:
from sklearn.datasets import load_wine
from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from helpers.base_imports import *
Create new EDA¶
In [3]:
eda = EDA(name="winec")
eda
Loading 'edas.csv' Creating experiment: 'winec'
Out[3]:
EDA: winec Columns: Index(['description', 'n features', 'n samples', 'f/n ratio', 'noise', 'stats', 'class balance', 'outliers', 'skewness', 'correlations', 'DR potential'], dtype='object') Datasets: Index([], dtype='object', name='dataset')
Get raw dataset from remote source¶
In [4]:
# fetch dataset
data = load_wine(as_frame=True)
data.frame.head(5)
Out[4]:
alcohol | malic_acid | ash | alcalinity_of_ash | magnesium | total_phenols | flavanoids | nonflavanoid_phenols | proanthocyanins | color_intensity | hue | od280/od315_of_diluted_wines | proline | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 14.23 | 1.71 | 2.43 | 15.6 | 127.0 | 2.80 | 3.06 | 0.28 | 2.29 | 5.64 | 1.04 | 3.92 | 1065.0 | 0 |
1 | 13.20 | 1.78 | 2.14 | 11.2 | 100.0 | 2.65 | 2.76 | 0.26 | 1.28 | 4.38 | 1.05 | 3.40 | 1050.0 | 0 |
2 | 13.16 | 2.36 | 2.67 | 18.6 | 101.0 | 2.80 | 3.24 | 0.30 | 2.81 | 5.68 | 1.03 | 3.17 | 1185.0 | 0 |
3 | 14.37 | 1.95 | 2.50 | 16.8 | 113.0 | 3.85 | 3.49 | 0.24 | 2.18 | 7.80 | 0.86 | 3.45 | 1480.0 | 0 |
4 | 13.24 | 2.59 | 2.87 | 21.0 | 118.0 | 2.80 | 2.69 | 0.39 | 1.82 | 4.32 | 1.04 | 2.93 | 735.0 | 0 |
In [5]:
list(data.target_names)
Out[5]:
[np.str_('class_0'), np.str_('class_1'), np.str_('class_2')]
In [6]:
disp_df(data.frame)
alcohol | malic_acid | ash | alcalinity_of_ash | magnesium | total_phenols | flavanoids | nonflavanoid_phenols | proanthocyanins | color_intensity | hue | od280/od315_of_diluted_wines | proline | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 14.23 | 1.71 | 2.43 | 15.6 | 127.0 | 2.80 | 3.06 | 0.28 | 2.29 | 5.640000 | 1.040 | 3.92 | 1065.0 | 0 |
1 | 13.20 | 1.78 | 2.14 | 11.2 | 100.0 | 2.65 | 2.76 | 0.26 | 1.28 | 4.380000 | 1.050 | 3.40 | 1050.0 | 0 |
2 | 13.16 | 2.36 | 2.67 | 18.6 | 101.0 | 2.80 | 3.24 | 0.30 | 2.81 | 5.680000 | 1.030 | 3.17 | 1185.0 | 0 |
3 | 14.37 | 1.95 | 2.50 | 16.8 | 113.0 | 3.85 | 3.49 | 0.24 | 2.18 | 7.800000 | 0.860 | 3.45 | 1480.0 | 0 |
4 | 13.24 | 2.59 | 2.87 | 21.0 | 118.0 | 2.80 | 2.69 | 0.39 | 1.82 | 4.320000 | 1.040 | 2.93 | 735.0 | 0 |
5 | 14.20 | 1.76 | 2.45 | 15.2 | 112.0 | 3.27 | 3.39 | 0.34 | 1.97 | 6.750000 | 1.050 | 2.85 | 1450.0 | 0 |
6 | 14.39 | 1.87 | 2.45 | 14.6 | 96.0 | 2.50 | 2.52 | 0.30 | 1.98 | 5.250000 | 1.020 | 3.58 | 1290.0 | 0 |
7 | 14.06 | 2.15 | 2.61 | 17.6 | 121.0 | 2.60 | 2.51 | 0.31 | 1.25 | 5.050000 | 1.060 | 3.58 | 1295.0 | 0 |
8 | 14.83 | 1.64 | 2.17 | 14.0 | 97.0 | 2.80 | 2.98 | 0.29 | 1.98 | 5.200000 | 1.080 | 2.85 | 1045.0 | 0 |
9 | 13.86 | 1.35 | 2.27 | 16.0 | 98.0 | 2.98 | 3.15 | 0.22 | 1.85 | 7.220000 | 1.010 | 3.55 | 1045.0 | 0 |
10 | 14.10 | 2.16 | 2.30 | 18.0 | 105.0 | 2.95 | 3.32 | 0.22 | 2.38 | 5.750000 | 1.250 | 3.17 | 1510.0 | 0 |
11 | 14.12 | 1.48 | 2.32 | 16.8 | 95.0 | 2.20 | 2.43 | 0.26 | 1.57 | 5.000000 | 1.170 | 2.82 | 1280.0 | 0 |
12 | 13.75 | 1.73 | 2.41 | 16.0 | 89.0 | 2.60 | 2.76 | 0.29 | 1.81 | 5.600000 | 1.150 | 2.90 | 1320.0 | 0 |
13 | 14.75 | 1.73 | 2.39 | 11.4 | 91.0 | 3.10 | 3.69 | 0.43 | 2.81 | 5.400000 | 1.250 | 2.73 | 1150.0 | 0 |
14 | 14.38 | 1.87 | 2.38 | 12.0 | 102.0 | 3.30 | 3.64 | 0.29 | 2.96 | 7.500000 | 1.200 | 3.00 | 1547.0 | 0 |
15 | 13.63 | 1.81 | 2.70 | 17.2 | 112.0 | 2.85 | 2.91 | 0.30 | 1.46 | 7.300000 | 1.280 | 2.88 | 1310.0 | 0 |
16 | 14.30 | 1.92 | 2.72 | 20.0 | 120.0 | 2.80 | 3.14 | 0.33 | 1.97 | 6.200000 | 1.070 | 2.65 | 1280.0 | 0 |
17 | 13.83 | 1.57 | 2.62 | 20.0 | 115.0 | 2.95 | 3.40 | 0.40 | 1.72 | 6.600000 | 1.130 | 2.57 | 1130.0 | 0 |
18 | 14.19 | 1.59 | 2.48 | 16.5 | 108.0 | 3.30 | 3.93 | 0.32 | 1.86 | 8.700000 | 1.230 | 2.82 | 1680.0 | 0 |
19 | 13.64 | 3.10 | 2.56 | 15.2 | 116.0 | 2.70 | 3.03 | 0.17 | 1.66 | 5.100000 | 0.960 | 3.36 | 845.0 | 0 |
20 | 14.06 | 1.63 | 2.28 | 16.0 | 126.0 | 3.00 | 3.17 | 0.24 | 2.10 | 5.650000 | 1.090 | 3.71 | 780.0 | 0 |
21 | 12.93 | 3.80 | 2.65 | 18.6 | 102.0 | 2.41 | 2.41 | 0.25 | 1.98 | 4.500000 | 1.030 | 3.52 | 770.0 | 0 |
22 | 13.71 | 1.86 | 2.36 | 16.6 | 101.0 | 2.61 | 2.88 | 0.27 | 1.69 | 3.800000 | 1.110 | 4.00 | 1035.0 | 0 |
23 | 12.85 | 1.60 | 2.52 | 17.8 | 95.0 | 2.48 | 2.37 | 0.26 | 1.46 | 3.930000 | 1.090 | 3.63 | 1015.0 | 0 |
24 | 13.50 | 1.81 | 2.61 | 20.0 | 96.0 | 2.53 | 2.61 | 0.28 | 1.66 | 3.520000 | 1.120 | 3.82 | 845.0 | 0 |
25 | 13.05 | 2.05 | 3.22 | 25.0 | 124.0 | 2.63 | 2.68 | 0.47 | 1.92 | 3.580000 | 1.130 | 3.20 | 830.0 | 0 |
26 | 13.39 | 1.77 | 2.62 | 16.1 | 93.0 | 2.85 | 2.94 | 0.34 | 1.45 | 4.800000 | 0.920 | 3.22 | 1195.0 | 0 |
27 | 13.30 | 1.72 | 2.14 | 17.0 | 94.0 | 2.40 | 2.19 | 0.27 | 1.35 | 3.950000 | 1.020 | 2.77 | 1285.0 | 0 |
28 | 13.87 | 1.90 | 2.80 | 19.4 | 107.0 | 2.95 | 2.97 | 0.37 | 1.76 | 4.500000 | 1.250 | 3.40 | 915.0 | 0 |
29 | 14.02 | 1.68 | 2.21 | 16.0 | 96.0 | 2.65 | 2.33 | 0.26 | 1.98 | 4.700000 | 1.040 | 3.59 | 1035.0 | 0 |
30 | 13.73 | 1.50 | 2.70 | 22.5 | 101.0 | 3.00 | 3.25 | 0.29 | 2.38 | 5.700000 | 1.190 | 2.71 | 1285.0 | 0 |
31 | 13.58 | 1.66 | 2.36 | 19.1 | 106.0 | 2.86 | 3.19 | 0.22 | 1.95 | 6.900000 | 1.090 | 2.88 | 1515.0 | 0 |
32 | 13.68 | 1.83 | 2.36 | 17.2 | 104.0 | 2.42 | 2.69 | 0.42 | 1.97 | 3.840000 | 1.230 | 2.87 | 990.0 | 0 |
33 | 13.76 | 1.53 | 2.70 | 19.5 | 132.0 | 2.95 | 2.74 | 0.50 | 1.35 | 5.400000 | 1.250 | 3.00 | 1235.0 | 0 |
34 | 13.51 | 1.80 | 2.65 | 19.0 | 110.0 | 2.35 | 2.53 | 0.29 | 1.54 | 4.200000 | 1.100 | 2.87 | 1095.0 | 0 |
35 | 13.48 | 1.81 | 2.41 | 20.5 | 100.0 | 2.70 | 2.98 | 0.26 | 1.86 | 5.100000 | 1.040 | 3.47 | 920.0 | 0 |
36 | 13.28 | 1.64 | 2.84 | 15.5 | 110.0 | 2.60 | 2.68 | 0.34 | 1.36 | 4.600000 | 1.090 | 2.78 | 880.0 | 0 |
37 | 13.05 | 1.65 | 2.55 | 18.0 | 98.0 | 2.45 | 2.43 | 0.29 | 1.44 | 4.250000 | 1.120 | 2.51 | 1105.0 | 0 |
38 | 13.07 | 1.50 | 2.10 | 15.5 | 98.0 | 2.40 | 2.64 | 0.28 | 1.37 | 3.700000 | 1.180 | 2.69 | 1020.0 | 0 |
39 | 14.22 | 3.99 | 2.51 | 13.2 | 128.0 | 3.00 | 3.04 | 0.20 | 2.08 | 5.100000 | 0.890 | 3.53 | 760.0 | 0 |
40 | 13.56 | 1.71 | 2.31 | 16.2 | 117.0 | 3.15 | 3.29 | 0.34 | 2.34 | 6.130000 | 0.950 | 3.38 | 795.0 | 0 |
41 | 13.41 | 3.84 | 2.12 | 18.8 | 90.0 | 2.45 | 2.68 | 0.27 | 1.48 | 4.280000 | 0.910 | 3.00 | 1035.0 | 0 |
42 | 13.88 | 1.89 | 2.59 | 15.0 | 101.0 | 3.25 | 3.56 | 0.17 | 1.70 | 5.430000 | 0.880 | 3.56 | 1095.0 | 0 |
43 | 13.24 | 3.98 | 2.29 | 17.5 | 103.0 | 2.64 | 2.63 | 0.32 | 1.66 | 4.360000 | 0.820 | 3.00 | 680.0 | 0 |
44 | 13.05 | 1.77 | 2.10 | 17.0 | 107.0 | 3.00 | 3.00 | 0.28 | 2.03 | 5.040000 | 0.880 | 3.35 | 885.0 | 0 |
45 | 14.21 | 4.04 | 2.44 | 18.9 | 111.0 | 2.85 | 2.65 | 0.30 | 1.25 | 5.240000 | 0.870 | 3.33 | 1080.0 | 0 |
46 | 14.38 | 3.59 | 2.28 | 16.0 | 102.0 | 3.25 | 3.17 | 0.27 | 2.19 | 4.900000 | 1.040 | 3.44 | 1065.0 | 0 |
47 | 13.90 | 1.68 | 2.12 | 16.0 | 101.0 | 3.10 | 3.39 | 0.21 | 2.14 | 6.100000 | 0.910 | 3.33 | 985.0 | 0 |
48 | 14.10 | 2.02 | 2.40 | 18.8 | 103.0 | 2.75 | 2.92 | 0.32 | 2.38 | 6.200000 | 1.070 | 2.75 | 1060.0 | 0 |
49 | 13.94 | 1.73 | 2.27 | 17.4 | 108.0 | 2.88 | 3.54 | 0.32 | 2.08 | 8.900000 | 1.120 | 3.10 | 1260.0 | 0 |
50 | 13.05 | 1.73 | 2.04 | 12.4 | 92.0 | 2.72 | 3.27 | 0.17 | 2.91 | 7.200000 | 1.120 | 2.91 | 1150.0 | 0 |
51 | 13.83 | 1.65 | 2.60 | 17.2 | 94.0 | 2.45 | 2.99 | 0.22 | 2.29 | 5.600000 | 1.240 | 3.37 | 1265.0 | 0 |
52 | 13.82 | 1.75 | 2.42 | 14.0 | 111.0 | 3.88 | 3.74 | 0.32 | 1.87 | 7.050000 | 1.010 | 3.26 | 1190.0 | 0 |
53 | 13.77 | 1.90 | 2.68 | 17.1 | 115.0 | 3.00 | 2.79 | 0.39 | 1.68 | 6.300000 | 1.130 | 2.93 | 1375.0 | 0 |
54 | 13.74 | 1.67 | 2.25 | 16.4 | 118.0 | 2.60 | 2.90 | 0.21 | 1.62 | 5.850000 | 0.920 | 3.20 | 1060.0 | 0 |
55 | 13.56 | 1.73 | 2.46 | 20.5 | 116.0 | 2.96 | 2.78 | 0.20 | 2.45 | 6.250000 | 0.980 | 3.03 | 1120.0 | 0 |
56 | 14.22 | 1.70 | 2.30 | 16.3 | 118.0 | 3.20 | 3.00 | 0.26 | 2.03 | 6.380000 | 0.940 | 3.31 | 970.0 | 0 |
57 | 13.29 | 1.97 | 2.68 | 16.8 | 102.0 | 3.00 | 3.23 | 0.31 | 1.66 | 6.000000 | 1.070 | 2.84 | 1270.0 | 0 |
58 | 13.72 | 1.43 | 2.50 | 16.7 | 108.0 | 3.40 | 3.67 | 0.19 | 2.04 | 6.800000 | 0.890 | 2.87 | 1285.0 | 0 |
59 | 12.37 | 0.94 | 1.36 | 10.6 | 88.0 | 1.98 | 0.57 | 0.28 | 0.42 | 1.950000 | 1.050 | 1.82 | 520.0 | 1 |
60 | 12.33 | 1.10 | 2.28 | 16.0 | 101.0 | 2.05 | 1.09 | 0.63 | 0.41 | 3.270000 | 1.250 | 1.67 | 680.0 | 1 |
61 | 12.64 | 1.36 | 2.02 | 16.8 | 100.0 | 2.02 | 1.41 | 0.53 | 0.62 | 5.750000 | 0.980 | 1.59 | 450.0 | 1 |
62 | 13.67 | 1.25 | 1.92 | 18.0 | 94.0 | 2.10 | 1.79 | 0.32 | 0.73 | 3.800000 | 1.230 | 2.46 | 630.0 | 1 |
63 | 12.37 | 1.13 | 2.16 | 19.0 | 87.0 | 3.50 | 3.10 | 0.19 | 1.87 | 4.450000 | 1.220 | 2.87 | 420.0 | 1 |
64 | 12.17 | 1.45 | 2.53 | 19.0 | 104.0 | 1.89 | 1.75 | 0.45 | 1.03 | 2.950000 | 1.450 | 2.23 | 355.0 | 1 |
65 | 12.37 | 1.21 | 2.56 | 18.1 | 98.0 | 2.42 | 2.65 | 0.37 | 2.08 | 4.600000 | 1.190 | 2.30 | 678.0 | 1 |
66 | 13.11 | 1.01 | 1.70 | 15.0 | 78.0 | 2.98 | 3.18 | 0.26 | 2.28 | 5.300000 | 1.120 | 3.18 | 502.0 | 1 |
67 | 12.37 | 1.17 | 1.92 | 19.6 | 78.0 | 2.11 | 2.00 | 0.27 | 1.04 | 4.680000 | 1.120 | 3.48 | 510.0 | 1 |
68 | 13.34 | 0.94 | 2.36 | 17.0 | 110.0 | 2.53 | 1.30 | 0.55 | 0.42 | 3.170000 | 1.020 | 1.93 | 750.0 | 1 |
69 | 12.21 | 1.19 | 1.75 | 16.8 | 151.0 | 1.85 | 1.28 | 0.14 | 2.50 | 2.850000 | 1.280 | 3.07 | 718.0 | 1 |
70 | 12.29 | 1.61 | 2.21 | 20.4 | 103.0 | 1.10 | 1.02 | 0.37 | 1.46 | 3.050000 | 0.906 | 1.82 | 870.0 | 1 |
71 | 13.86 | 1.51 | 2.67 | 25.0 | 86.0 | 2.95 | 2.86 | 0.21 | 1.87 | 3.380000 | 1.360 | 3.16 | 410.0 | 1 |
72 | 13.49 | 1.66 | 2.24 | 24.0 | 87.0 | 1.88 | 1.84 | 0.27 | 1.03 | 3.740000 | 0.980 | 2.78 | 472.0 | 1 |
73 | 12.99 | 1.67 | 2.60 | 30.0 | 139.0 | 3.30 | 2.89 | 0.21 | 1.96 | 3.350000 | 1.310 | 3.50 | 985.0 | 1 |
74 | 11.96 | 1.09 | 2.30 | 21.0 | 101.0 | 3.38 | 2.14 | 0.13 | 1.65 | 3.210000 | 0.990 | 3.13 | 886.0 | 1 |
75 | 11.66 | 1.88 | 1.92 | 16.0 | 97.0 | 1.61 | 1.57 | 0.34 | 1.15 | 3.800000 | 1.230 | 2.14 | 428.0 | 1 |
76 | 13.03 | 0.90 | 1.71 | 16.0 | 86.0 | 1.95 | 2.03 | 0.24 | 1.46 | 4.600000 | 1.190 | 2.48 | 392.0 | 1 |
77 | 11.84 | 2.89 | 2.23 | 18.0 | 112.0 | 1.72 | 1.32 | 0.43 | 0.95 | 2.650000 | 0.960 | 2.52 | 500.0 | 1 |
78 | 12.33 | 0.99 | 1.95 | 14.8 | 136.0 | 1.90 | 1.85 | 0.35 | 2.76 | 3.400000 | 1.060 | 2.31 | 750.0 | 1 |
79 | 12.70 | 3.87 | 2.40 | 23.0 | 101.0 | 2.83 | 2.55 | 0.43 | 1.95 | 2.570000 | 1.190 | 3.13 | 463.0 | 1 |
80 | 12.00 | 0.92 | 2.00 | 19.0 | 86.0 | 2.42 | 2.26 | 0.30 | 1.43 | 2.500000 | 1.380 | 3.12 | 278.0 | 1 |
81 | 12.72 | 1.81 | 2.20 | 18.8 | 86.0 | 2.20 | 2.53 | 0.26 | 1.77 | 3.900000 | 1.160 | 3.14 | 714.0 | 1 |
82 | 12.08 | 1.13 | 2.51 | 24.0 | 78.0 | 2.00 | 1.58 | 0.40 | 1.40 | 2.200000 | 1.310 | 2.72 | 630.0 | 1 |
83 | 13.05 | 3.86 | 2.32 | 22.5 | 85.0 | 1.65 | 1.59 | 0.61 | 1.62 | 4.800000 | 0.840 | 2.01 | 515.0 | 1 |
84 | 11.84 | 0.89 | 2.58 | 18.0 | 94.0 | 2.20 | 2.21 | 0.22 | 2.35 | 3.050000 | 0.790 | 3.08 | 520.0 | 1 |
85 | 12.67 | 0.98 | 2.24 | 18.0 | 99.0 | 2.20 | 1.94 | 0.30 | 1.46 | 2.620000 | 1.230 | 3.16 | 450.0 | 1 |
86 | 12.16 | 1.61 | 2.31 | 22.8 | 90.0 | 1.78 | 1.69 | 0.43 | 1.56 | 2.450000 | 1.330 | 2.26 | 495.0 | 1 |
87 | 11.65 | 1.67 | 2.62 | 26.0 | 88.0 | 1.92 | 1.61 | 0.40 | 1.34 | 2.600000 | 1.360 | 3.21 | 562.0 | 1 |
88 | 11.64 | 2.06 | 2.46 | 21.6 | 84.0 | 1.95 | 1.69 | 0.48 | 1.35 | 2.800000 | 1.000 | 2.75 | 680.0 | 1 |
89 | 12.08 | 1.33 | 2.30 | 23.6 | 70.0 | 2.20 | 1.59 | 0.42 | 1.38 | 1.740000 | 1.070 | 3.21 | 625.0 | 1 |
90 | 12.08 | 1.83 | 2.32 | 18.5 | 81.0 | 1.60 | 1.50 | 0.52 | 1.64 | 2.400000 | 1.080 | 2.27 | 480.0 | 1 |
91 | 12.00 | 1.51 | 2.42 | 22.0 | 86.0 | 1.45 | 1.25 | 0.50 | 1.63 | 3.600000 | 1.050 | 2.65 | 450.0 | 1 |
92 | 12.69 | 1.53 | 2.26 | 20.7 | 80.0 | 1.38 | 1.46 | 0.58 | 1.62 | 3.050000 | 0.960 | 2.06 | 495.0 | 1 |
93 | 12.29 | 2.83 | 2.22 | 18.0 | 88.0 | 2.45 | 2.25 | 0.25 | 1.99 | 2.150000 | 1.150 | 3.30 | 290.0 | 1 |
94 | 11.62 | 1.99 | 2.28 | 18.0 | 98.0 | 3.02 | 2.26 | 0.17 | 1.35 | 3.250000 | 1.160 | 2.96 | 345.0 | 1 |
95 | 12.47 | 1.52 | 2.20 | 19.0 | 162.0 | 2.50 | 2.27 | 0.32 | 3.28 | 2.600000 | 1.160 | 2.63 | 937.0 | 1 |
96 | 11.81 | 2.12 | 2.74 | 21.5 | 134.0 | 1.60 | 0.99 | 0.14 | 1.56 | 2.500000 | 0.950 | 2.26 | 625.0 | 1 |
97 | 12.29 | 1.41 | 1.98 | 16.0 | 85.0 | 2.55 | 2.50 | 0.29 | 1.77 | 2.900000 | 1.230 | 2.74 | 428.0 | 1 |
98 | 12.37 | 1.07 | 2.10 | 18.5 | 88.0 | 3.52 | 3.75 | 0.24 | 1.95 | 4.500000 | 1.040 | 2.77 | 660.0 | 1 |
99 | 12.29 | 3.17 | 2.21 | 18.0 | 88.0 | 2.85 | 2.99 | 0.45 | 2.81 | 2.300000 | 1.420 | 2.83 | 406.0 | 1 |
100 | 12.08 | 2.08 | 1.70 | 17.5 | 97.0 | 2.23 | 2.17 | 0.26 | 1.40 | 3.300000 | 1.270 | 2.96 | 710.0 | 1 |
101 | 12.60 | 1.34 | 1.90 | 18.5 | 88.0 | 1.45 | 1.36 | 0.29 | 1.35 | 2.450000 | 1.040 | 2.77 | 562.0 | 1 |
102 | 12.34 | 2.45 | 2.46 | 21.0 | 98.0 | 2.56 | 2.11 | 0.34 | 1.31 | 2.800000 | 0.800 | 3.38 | 438.0 | 1 |
103 | 11.82 | 1.72 | 1.88 | 19.5 | 86.0 | 2.50 | 1.64 | 0.37 | 1.42 | 2.060000 | 0.940 | 2.44 | 415.0 | 1 |
104 | 12.51 | 1.73 | 1.98 | 20.5 | 85.0 | 2.20 | 1.92 | 0.32 | 1.48 | 2.940000 | 1.040 | 3.57 | 672.0 | 1 |
105 | 12.42 | 2.55 | 2.27 | 22.0 | 90.0 | 1.68 | 1.84 | 0.66 | 1.42 | 2.700000 | 0.860 | 3.30 | 315.0 | 1 |
106 | 12.25 | 1.73 | 2.12 | 19.0 | 80.0 | 1.65 | 2.03 | 0.37 | 1.63 | 3.400000 | 1.000 | 3.17 | 510.0 | 1 |
107 | 12.72 | 1.75 | 2.28 | 22.5 | 84.0 | 1.38 | 1.76 | 0.48 | 1.63 | 3.300000 | 0.880 | 2.42 | 488.0 | 1 |
108 | 12.22 | 1.29 | 1.94 | 19.0 | 92.0 | 2.36 | 2.04 | 0.39 | 2.08 | 2.700000 | 0.860 | 3.02 | 312.0 | 1 |
109 | 11.61 | 1.35 | 2.70 | 20.0 | 94.0 | 2.74 | 2.92 | 0.29 | 2.49 | 2.650000 | 0.960 | 3.26 | 680.0 | 1 |
110 | 11.46 | 3.74 | 1.82 | 19.5 | 107.0 | 3.18 | 2.58 | 0.24 | 3.58 | 2.900000 | 0.750 | 2.81 | 562.0 | 1 |
111 | 12.52 | 2.43 | 2.17 | 21.0 | 88.0 | 2.55 | 2.27 | 0.26 | 1.22 | 2.000000 | 0.900 | 2.78 | 325.0 | 1 |
112 | 11.76 | 2.68 | 2.92 | 20.0 | 103.0 | 1.75 | 2.03 | 0.60 | 1.05 | 3.800000 | 1.230 | 2.50 | 607.0 | 1 |
113 | 11.41 | 0.74 | 2.50 | 21.0 | 88.0 | 2.48 | 2.01 | 0.42 | 1.44 | 3.080000 | 1.100 | 2.31 | 434.0 | 1 |
114 | 12.08 | 1.39 | 2.50 | 22.5 | 84.0 | 2.56 | 2.29 | 0.43 | 1.04 | 2.900000 | 0.930 | 3.19 | 385.0 | 1 |
115 | 11.03 | 1.51 | 2.20 | 21.5 | 85.0 | 2.46 | 2.17 | 0.52 | 2.01 | 1.900000 | 1.710 | 2.87 | 407.0 | 1 |
116 | 11.82 | 1.47 | 1.99 | 20.8 | 86.0 | 1.98 | 1.60 | 0.30 | 1.53 | 1.950000 | 0.950 | 3.33 | 495.0 | 1 |
117 | 12.42 | 1.61 | 2.19 | 22.5 | 108.0 | 2.00 | 2.09 | 0.34 | 1.61 | 2.060000 | 1.060 | 2.96 | 345.0 | 1 |
118 | 12.77 | 3.43 | 1.98 | 16.0 | 80.0 | 1.63 | 1.25 | 0.43 | 0.83 | 3.400000 | 0.700 | 2.12 | 372.0 | 1 |
119 | 12.00 | 3.43 | 2.00 | 19.0 | 87.0 | 2.00 | 1.64 | 0.37 | 1.87 | 1.280000 | 0.930 | 3.05 | 564.0 | 1 |
120 | 11.45 | 2.40 | 2.42 | 20.0 | 96.0 | 2.90 | 2.79 | 0.32 | 1.83 | 3.250000 | 0.800 | 3.39 | 625.0 | 1 |
121 | 11.56 | 2.05 | 3.23 | 28.5 | 119.0 | 3.18 | 5.08 | 0.47 | 1.87 | 6.000000 | 0.930 | 3.69 | 465.0 | 1 |
122 | 12.42 | 4.43 | 2.73 | 26.5 | 102.0 | 2.20 | 2.13 | 0.43 | 1.71 | 2.080000 | 0.920 | 3.12 | 365.0 | 1 |
123 | 13.05 | 5.80 | 2.13 | 21.5 | 86.0 | 2.62 | 2.65 | 0.30 | 2.01 | 2.600000 | 0.730 | 3.10 | 380.0 | 1 |
124 | 11.87 | 4.31 | 2.39 | 21.0 | 82.0 | 2.86 | 3.03 | 0.21 | 2.91 | 2.800000 | 0.750 | 3.64 | 380.0 | 1 |
125 | 12.07 | 2.16 | 2.17 | 21.0 | 85.0 | 2.60 | 2.65 | 0.37 | 1.35 | 2.760000 | 0.860 | 3.28 | 378.0 | 1 |
126 | 12.43 | 1.53 | 2.29 | 21.5 | 86.0 | 2.74 | 3.15 | 0.39 | 1.77 | 3.940000 | 0.690 | 2.84 | 352.0 | 1 |
127 | 11.79 | 2.13 | 2.78 | 28.5 | 92.0 | 2.13 | 2.24 | 0.58 | 1.76 | 3.000000 | 0.970 | 2.44 | 466.0 | 1 |
128 | 12.37 | 1.63 | 2.30 | 24.5 | 88.0 | 2.22 | 2.45 | 0.40 | 1.90 | 2.120000 | 0.890 | 2.78 | 342.0 | 1 |
129 | 12.04 | 4.30 | 2.38 | 22.0 | 80.0 | 2.10 | 1.75 | 0.42 | 1.35 | 2.600000 | 0.790 | 2.57 | 580.0 | 1 |
130 | 12.86 | 1.35 | 2.32 | 18.0 | 122.0 | 1.51 | 1.25 | 0.21 | 0.94 | 4.100000 | 0.760 | 1.29 | 630.0 | 2 |
131 | 12.88 | 2.99 | 2.40 | 20.0 | 104.0 | 1.30 | 1.22 | 0.24 | 0.83 | 5.400000 | 0.740 | 1.42 | 530.0 | 2 |
132 | 12.81 | 2.31 | 2.40 | 24.0 | 98.0 | 1.15 | 1.09 | 0.27 | 0.83 | 5.700000 | 0.660 | 1.36 | 560.0 | 2 |
133 | 12.70 | 3.55 | 2.36 | 21.5 | 106.0 | 1.70 | 1.20 | 0.17 | 0.84 | 5.000000 | 0.780 | 1.29 | 600.0 | 2 |
134 | 12.51 | 1.24 | 2.25 | 17.5 | 85.0 | 2.00 | 0.58 | 0.60 | 1.25 | 5.450000 | 0.750 | 1.51 | 650.0 | 2 |
135 | 12.60 | 2.46 | 2.20 | 18.5 | 94.0 | 1.62 | 0.66 | 0.63 | 0.94 | 7.100000 | 0.730 | 1.58 | 695.0 | 2 |
136 | 12.25 | 4.72 | 2.54 | 21.0 | 89.0 | 1.38 | 0.47 | 0.53 | 0.80 | 3.850000 | 0.750 | 1.27 | 720.0 | 2 |
137 | 12.53 | 5.51 | 2.64 | 25.0 | 96.0 | 1.79 | 0.60 | 0.63 | 1.10 | 5.000000 | 0.820 | 1.69 | 515.0 | 2 |
138 | 13.49 | 3.59 | 2.19 | 19.5 | 88.0 | 1.62 | 0.48 | 0.58 | 0.88 | 5.700000 | 0.810 | 1.82 | 580.0 | 2 |
139 | 12.84 | 2.96 | 2.61 | 24.0 | 101.0 | 2.32 | 0.60 | 0.53 | 0.81 | 4.920000 | 0.890 | 2.15 | 590.0 | 2 |
140 | 12.93 | 2.81 | 2.70 | 21.0 | 96.0 | 1.54 | 0.50 | 0.53 | 0.75 | 4.600000 | 0.770 | 2.31 | 600.0 | 2 |
141 | 13.36 | 2.56 | 2.35 | 20.0 | 89.0 | 1.40 | 0.50 | 0.37 | 0.64 | 5.600000 | 0.700 | 2.47 | 780.0 | 2 |
142 | 13.52 | 3.17 | 2.72 | 23.5 | 97.0 | 1.55 | 0.52 | 0.50 | 0.55 | 4.350000 | 0.890 | 2.06 | 520.0 | 2 |
143 | 13.62 | 4.95 | 2.35 | 20.0 | 92.0 | 2.00 | 0.80 | 0.47 | 1.02 | 4.400000 | 0.910 | 2.05 | 550.0 | 2 |
144 | 12.25 | 3.88 | 2.20 | 18.5 | 112.0 | 1.38 | 0.78 | 0.29 | 1.14 | 8.210000 | 0.650 | 2.00 | 855.0 | 2 |
145 | 13.16 | 3.57 | 2.15 | 21.0 | 102.0 | 1.50 | 0.55 | 0.43 | 1.30 | 4.000000 | 0.600 | 1.68 | 830.0 | 2 |
146 | 13.88 | 5.04 | 2.23 | 20.0 | 80.0 | 0.98 | 0.34 | 0.40 | 0.68 | 4.900000 | 0.580 | 1.33 | 415.0 | 2 |
147 | 12.87 | 4.61 | 2.48 | 21.5 | 86.0 | 1.70 | 0.65 | 0.47 | 0.86 | 7.650000 | 0.540 | 1.86 | 625.0 | 2 |
148 | 13.32 | 3.24 | 2.38 | 21.5 | 92.0 | 1.93 | 0.76 | 0.45 | 1.25 | 8.420000 | 0.550 | 1.62 | 650.0 | 2 |
149 | 13.08 | 3.90 | 2.36 | 21.5 | 113.0 | 1.41 | 1.39 | 0.34 | 1.14 | 9.400000 | 0.570 | 1.33 | 550.0 | 2 |
150 | 13.50 | 3.12 | 2.62 | 24.0 | 123.0 | 1.40 | 1.57 | 0.22 | 1.25 | 8.600000 | 0.590 | 1.30 | 500.0 | 2 |
151 | 12.79 | 2.67 | 2.48 | 22.0 | 112.0 | 1.48 | 1.36 | 0.24 | 1.26 | 10.800000 | 0.480 | 1.47 | 480.0 | 2 |
152 | 13.11 | 1.90 | 2.75 | 25.5 | 116.0 | 2.20 | 1.28 | 0.26 | 1.56 | 7.100000 | 0.610 | 1.33 | 425.0 | 2 |
153 | 13.23 | 3.30 | 2.28 | 18.5 | 98.0 | 1.80 | 0.83 | 0.61 | 1.87 | 10.520000 | 0.560 | 1.51 | 675.0 | 2 |
154 | 12.58 | 1.29 | 2.10 | 20.0 | 103.0 | 1.48 | 0.58 | 0.53 | 1.40 | 7.600000 | 0.580 | 1.55 | 640.0 | 2 |
155 | 13.17 | 5.19 | 2.32 | 22.0 | 93.0 | 1.74 | 0.63 | 0.61 | 1.55 | 7.900000 | 0.600 | 1.48 | 725.0 | 2 |
156 | 13.84 | 4.12 | 2.38 | 19.5 | 89.0 | 1.80 | 0.83 | 0.48 | 1.56 | 9.010000 | 0.570 | 1.64 | 480.0 | 2 |
157 | 12.45 | 3.03 | 2.64 | 27.0 | 97.0 | 1.90 | 0.58 | 0.63 | 1.14 | 7.500000 | 0.670 | 1.73 | 880.0 | 2 |
158 | 14.34 | 1.68 | 2.70 | 25.0 | 98.0 | 2.80 | 1.31 | 0.53 | 2.70 | 13.000000 | 0.570 | 1.96 | 660.0 | 2 |
159 | 13.48 | 1.67 | 2.64 | 22.5 | 89.0 | 2.60 | 1.10 | 0.52 | 2.29 | 11.750000 | 0.570 | 1.78 | 620.0 | 2 |
160 | 12.36 | 3.83 | 2.38 | 21.0 | 88.0 | 2.30 | 0.92 | 0.50 | 1.04 | 7.650000 | 0.560 | 1.58 | 520.0 | 2 |
161 | 13.69 | 3.26 | 2.54 | 20.0 | 107.0 | 1.83 | 0.56 | 0.50 | 0.80 | 5.880000 | 0.960 | 1.82 | 680.0 | 2 |
162 | 12.85 | 3.27 | 2.58 | 22.0 | 106.0 | 1.65 | 0.60 | 0.60 | 0.96 | 5.580000 | 0.870 | 2.11 | 570.0 | 2 |
163 | 12.96 | 3.45 | 2.35 | 18.5 | 106.0 | 1.39 | 0.70 | 0.40 | 0.94 | 5.280000 | 0.680 | 1.75 | 675.0 | 2 |
164 | 13.78 | 2.76 | 2.30 | 22.0 | 90.0 | 1.35 | 0.68 | 0.41 | 1.03 | 9.580000 | 0.700 | 1.68 | 615.0 | 2 |
165 | 13.73 | 4.36 | 2.26 | 22.5 | 88.0 | 1.28 | 0.47 | 0.52 | 1.15 | 6.620000 | 0.780 | 1.75 | 520.0 | 2 |
166 | 13.45 | 3.70 | 2.60 | 23.0 | 111.0 | 1.70 | 0.92 | 0.43 | 1.46 | 10.680000 | 0.850 | 1.56 | 695.0 | 2 |
167 | 12.82 | 3.37 | 2.30 | 19.5 | 88.0 | 1.48 | 0.66 | 0.40 | 0.97 | 10.260000 | 0.720 | 1.75 | 685.0 | 2 |
168 | 13.58 | 2.58 | 2.69 | 24.5 | 105.0 | 1.55 | 0.84 | 0.39 | 1.54 | 8.660000 | 0.740 | 1.80 | 750.0 | 2 |
169 | 13.40 | 4.60 | 2.86 | 25.0 | 112.0 | 1.98 | 0.96 | 0.27 | 1.11 | 8.500000 | 0.670 | 1.92 | 630.0 | 2 |
170 | 12.20 | 3.03 | 2.32 | 19.0 | 96.0 | 1.25 | 0.49 | 0.40 | 0.73 | 5.500000 | 0.660 | 1.83 | 510.0 | 2 |
171 | 12.77 | 2.39 | 2.28 | 19.5 | 86.0 | 1.39 | 0.51 | 0.48 | 0.64 | 9.899999 | 0.570 | 1.63 | 470.0 | 2 |
172 | 14.16 | 2.51 | 2.48 | 20.0 | 91.0 | 1.68 | 0.70 | 0.44 | 1.24 | 9.700000 | 0.620 | 1.71 | 660.0 | 2 |
173 | 13.71 | 5.65 | 2.45 | 20.5 | 95.0 | 1.68 | 0.61 | 0.52 | 1.06 | 7.700000 | 0.640 | 1.74 | 740.0 | 2 |
174 | 13.40 | 3.91 | 2.48 | 23.0 | 102.0 | 1.80 | 0.75 | 0.43 | 1.41 | 7.300000 | 0.700 | 1.56 | 750.0 | 2 |
175 | 13.27 | 4.28 | 2.26 | 20.0 | 120.0 | 1.59 | 0.69 | 0.43 | 1.35 | 10.200000 | 0.590 | 1.56 | 835.0 | 2 |
176 | 13.17 | 2.59 | 2.37 | 20.0 | 120.0 | 1.65 | 0.68 | 0.53 | 1.46 | 9.300000 | 0.600 | 1.62 | 840.0 | 2 |
177 | 14.13 | 4.10 | 2.74 | 24.5 | 96.0 | 2.05 | 0.76 | 0.56 | 1.35 | 9.200000 | 0.610 | 1.60 | 560.0 | 2 |
In [7]:
X = data.frame.drop(columns="target")
y = data.frame.target
X.shape, y.shape
Out[7]:
((178, 13), (178,))
In [9]:
eda.update_param(
"description", "Classify wines into 3 classes based on chemical analysis"
)
eda.update_param("n features", X.shape[1])
eda.update_param("n samples", X.shape[0])
eda.update_param("f/n ratio", len(data.target_names) / X.shape[0])
In [10]:
eda.summary_df
Out[10]:
description | n features | n samples | f/n ratio | noise | stats | class balance | outliers | skewness | correlations | DR potential | |
---|---|---|---|---|---|---|---|---|---|---|---|
dataset | |||||||||||
winec | Classify wines into 3 classes based on chemica... | 13 | 178 | 0.016854 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Noise¶
In [11]:
# check for missing values
X.isna().sum().sum()
Out[11]:
np.int64(0)
In [12]:
eda.update_param("noise", "None, no missing vals")
Stats¶
In [13]:
skewness = X.skew()
summary_stats = X.describe().T
summary_stats["skewness"] = skewness
disp_df(summary_stats[["min", "max", "mean", "std", "skewness", "25%", "50%", "75%"]])
min | max | mean | std | skewness | 25% | 50% | 75% | |
---|---|---|---|---|---|---|---|---|
alcohol | 11.03 | 14.83 | 13.000618 | 0.811827 | -0.051482 | 12.3625 | 13.050 | 13.6775 |
malic_acid | 0.74 | 5.80 | 2.336348 | 1.117146 | 1.039651 | 1.6025 | 1.865 | 3.0825 |
ash | 1.36 | 3.23 | 2.366517 | 0.274344 | -0.176699 | 2.2100 | 2.360 | 2.5575 |
alcalinity_of_ash | 10.60 | 30.00 | 19.494944 | 3.339564 | 0.213047 | 17.2000 | 19.500 | 21.5000 |
magnesium | 70.00 | 162.00 | 99.741573 | 14.282484 | 1.098191 | 88.0000 | 98.000 | 107.0000 |
total_phenols | 0.98 | 3.88 | 2.295112 | 0.625851 | 0.086639 | 1.7425 | 2.355 | 2.8000 |
flavanoids | 0.34 | 5.08 | 2.029270 | 0.998859 | 0.025344 | 1.2050 | 2.135 | 2.8750 |
nonflavanoid_phenols | 0.13 | 0.66 | 0.361854 | 0.124453 | 0.450151 | 0.2700 | 0.340 | 0.4375 |
proanthocyanins | 0.41 | 3.58 | 1.590899 | 0.572359 | 0.517137 | 1.2500 | 1.555 | 1.9500 |
color_intensity | 1.28 | 13.00 | 5.058090 | 2.318286 | 0.868585 | 3.2200 | 4.690 | 6.2000 |
hue | 0.48 | 1.71 | 0.957449 | 0.228572 | 0.021091 | 0.7825 | 0.965 | 1.1200 |
od280/od315_of_diluted_wines | 1.27 | 4.00 | 2.611685 | 0.709990 | -0.307285 | 1.9375 | 2.780 | 3.1700 |
proline | 278.00 | 1680.00 | 746.893258 | 314.907474 | 0.767822 | 500.5000 | 673.500 | 985.0000 |
In [14]:
fig, ax = plot_feature_statistics(X, X.columns, line=False)
fig.savefig(f"{FIGS_DIR}/{eda.name}_feature-statistics.png")
In [15]:
eda.update_param("skewness", "some skewness")
eda.update_param("stats", "fine")
eda.update_param("outliers", "some outliers in ~7 features")
In [16]:
# class distribution of whole dataset
ax = sns.countplot(x=data.target_names[y])
plt.title(f"Target Class Distribution ({eda.name})")
plt.xlabel("Class")
plt.ylabel("Count")
# Annotate each bar with the count
for p in ax.patches:
height = p.get_height()
ax.annotate(
f"{height}",
(p.get_x() + p.get_width() / 2.0, height),
ha="center",
va="center",
xytext=(0, 5),
textcoords="offset points",
)
plt.savefig(f"{FIGS_DIR}/{eda.name}_target-class-distribution.png")
plt.show()
In [17]:
eda.update_param("class balance", "~Imbalanced (59, 71, 48)")
Feature Correlations¶
In [18]:
df = data.frame.copy()
df["target"] = data.target_names[y]
# df.head(5)
sns.pairplot(data=df, hue="target", palette="bright")
plt.savefig(f"{FIGS_DIR}/{eda.name}_pairplot.png")
In [19]:
# Create a heatmap of the correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(X.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title(f"Correlation Matrix ({eda.name})")
plt.savefig(f"{FIGS_DIR}/{eda.name}_correlation-matrix.png")
plt.show()
In [20]:
eda.update_param("correlations", "a couple medium-strong correlations, but not much")
Dimensionality Reduction Potential¶
In [21]:
# PCA - number of components to explain 95% variance
pca_pipe = Pipeline(
[
("scaler", StandardScaler()),
("pca", PCA()),
]
)
pca_pipe.fit(X)
Out[21]:
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA())])
StandardScaler()
PCA()
In [22]:
explained_variance_ratio = pca_pipe.named_steps["pca"].explained_variance_ratio_
cumulative_explained_variance = np.cumsum(explained_variance_ratio)
plt.figure(figsize=(8, 6))
plt.plot(cumulative_explained_variance, marker="o", linestyle="--")
plt.xlabel("Number of Principal Components")
plt.ylabel("Cumulative Explained Variance")
plt.title(f"PCA - Cumulative Explained Variance ({eda.name})")
plt.axhline(y=0.95, color="r", linestyle="--") # Threshold for 95% explained variance
plt.show()
# Number of components to explain 95% variance
num_components_95 = np.argmax(cumulative_explained_variance >= 0.95) + 1
print(f"Number of components to explain 95% of the variance: {num_components_95}")
Number of components to explain 95% of the variance: 10
In [23]:
# ICA - number of independent components
ica_pipe = Pipeline(
[
("scaler", StandardScaler()),
("ica", FastICA()),
]
)
components = ica_pipe.fit_transform(X)
# Number of independent components
num_independent_components = components.shape[1]
print(f"Number of independent components found: {num_independent_components}")
Number of independent components found: 13
/Users/yarik/vc_projects/ML/ml-prep/.venv/lib/python3.12/site-packages/sklearn/decomposition/_fastica.py:128: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations. warnings.warn(
In [24]:
eda.update_param(
"DR potential",
"PCA: 95% variance explained with 10 components\nICA: 13 independent components",
)
Save EDA results¶
In [25]:
eda.save()
Loading 'edas.csv' Saving winec to results/edas.csv
Create and save a shuffled 80/20 train/test split¶
< EDA Iris | Contents | EDA Wine Quality Prediction binary >