-
Notifications
You must be signed in to change notification settings - Fork 1
/
ecommerce
44 lines (32 loc) · 1.24 KB
/
ecommerce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Import the libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import pylab
import scipy.stats as stats
df = pd.read_csv("/Users/tikkybamiro/Documents/linear practice/ecommerce.csv")
df.head()
df.describe()
lm = LinearRegression()
X = df[['Avg. Session Length', 'Time on App', 'Time on Website', 'Length of Membership']]
Y = df['Yearly Amount Spent']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
lm.fit(X_train, Y_train)
cdf = pd.DataFrame(lm.coef_, X.columns, columns=['Coefficients'])
cdf
predictions = lm.predict(X_test)
sns.scatterplot(x = predictions, y= Y_test)
plt.xlabel("Predictions")
print("Mean Absolute Error: ", mean_absolute_error(Y_test, predictions))
print("Mean Squared Error: ", mean_squared_error(Y_test, predictions))
print("RMSE: ", math.sqrt(mean_squared_error(Y_test, predictions)))
residuals = Y_test - predictions
sns.displot(residuals, bins=20, kde=True)
plt.ylabel("Frequency")
plt.xlabel("Residuals")
stats.probplot(residuals, dist="norm", plot=pylab)
pylab.show()