-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathfill-missing-values
42 lines (27 loc) · 1.01 KB
/
fill-missing-values
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# fill with mode, mean, or median
df_mode, df_mean, df_median = df.mode().iloc[0], df.mean(), df.median()
df_fill_mode = df.fillna(df_mode)
df_fill_mean = df.fillna(df_mean)
df_fill_median = df.fillna(df_median)
# drop col with any missing values
df_drop_na_col = df.dropna(axis=1)
#To impute the null values with median
from sklearn.base import TransformerMixin
class DataFrameImputer(TransformerMixin):
def __init__(self):
"""Impute missing values.
Columns of dtype object are imputed with the most frequent value
in column.
Columns of other types are imputed with mean of column.
"""
def fit(self, X, y=None):
self.fill = pd.Series([X[c].value_counts().index[0]
if X[c].dtype == np.dtype('O') else X[c].mode() for c in X],
index=X.columns)
return self
def transform(self, X, y=None):
return X.fillna(self.fill)
-----------
df_filtered3 = DataFrameImputer().fit_transform(df2)
df_filtered3.isnull().sum()
------------