-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcategorical.py
72 lines (66 loc) · 1.87 KB
/
categorical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
from sklearn.preprocessing import LabelEncoder
def show_categories(file):
print("Categorical Column Unique Values ")
for col in file:
if file[col].dtype == 'object':
spaces = (16 - len(col)) * " "
print(f'{col}{spaces} {file[col].describe()[1]}')
print("\n")
def one_hot(file):
print("Categorical Column Unique Values ")
for col in file:
if file[col].dtype == 'object':
spaces = (16 - len(col)) * " "
print(f'{col}{spaces} {file[col].describe()[1]}')
while True:
col = input("\nWhich column would you like to one - hot encode? (Press -1 to go back) ")
if col == '-1':
return
elif col in file:
new_cols = pd.get_dummies(file[col])
file.drop(col, axis=1, inplace=True)
for col in new_cols:
file[col] = new_cols[col]
print("Encoding Done......")
while True:
yn = input("Are there more columns to be encoded? (y / n) ")
if yn.lower() == 'y':
break
elif yn.lower() == 'n':
return
else:
print("Invalid character")
else:
print("Invalid column")
continue
def label_encode(file):
le = LabelEncoder()
print("Categorical Column Unique Values ")
for col in file:
if file[col].dtype == 'object':
spaces = (16 - len(col)) * " "
print(f'{col}{spaces} {file[col].describe()[1]}')
while True:
col = input("\nWhich column would you like to label encode? (Press -1 to go back) ")
if col == '-1':
return
elif col in file:
file[col] = le.fit_transform(file[col])
print("Encoding Done......")
while True:
yn = input("Are there more columns to be encoded? (y / n) ")
if yn.lower() == 'y':
break
elif yn.lower() == 'n':
return
else:
print("Invalid character")
else:
print("Invalid column")
continue
def show_dataset(file):
n = int(input("\nHow many rows (>0) to print? (Press -1 to go back) "))
if n == -1:
return
print(file.head(n))