Skip to content

Commit

Permalink
Refactored Code Structure, Added Error Handling, Increased Flexibility
Browse files Browse the repository at this point in the history
- Refactored Code Structure: The initial script was a single block of code that was refactored into separate functions, each with a specific task. This makes the code easier to read, understand, and maintain. The functions include 'prepare_data', 'create_model', 'compile_and_train_model', 'evaluate_model', 'predict_new_sample', 'cross_validation', and 'main'.

- Added Error Handling: While the initial script assumed that everything would work perfectly, the refactored script includes some basic error checking. For example, it checks whether the target column exists in the DataFrame.

- Increased Flexibility: The initial script was hard-coded to work with a specific DataFrame and target column. In the refactored script, these are parameters that can be passed to the main function, making it easier to work with different datasets or target columns.

- Added Scaler Saving: The refactored script includes a line to save the 'StandardScaler' object using 'joblib'. This is important because the same scaler must be used to transform any new data in the future.
  • Loading branch information
AusBoone authored Jul 26, 2023
1 parent 6c594b7 commit b9691fb
Showing 1 changed file with 136 additions and 61 deletions.
197 changes: 136 additions & 61 deletions Neural-Network.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Import necessary libraries
import os
import joblib
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
Expand All @@ -7,65 +12,135 @@
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import numpy as np

# Assume you have a DataFrame `df` with a binary target column 'target'
X = df.drop('target', axis=1)
y = df['target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features to have mean=0 and variance=1
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization()) # Add Batch Normalization layer
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization()) # Add Batch Normalization layer
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
optimizer = Adam(learning_rate=0.001) # Define optimizer with learning rate
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Print the model summary
print(model.summary())

# Visualize the model
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)

# Train the model for a given number of epochs
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint, reduce_lr_on_plateau])

# Load the best model
model = load_model('best_model.h5')

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

# Use the model to predict the class of a new sample (you'll need to replace this with your own data)
new_sample = np.array([0]*X_train_scaled.shape[1]) # This is just a placeholder
new_sample_scaled = scaler.transform(new_sample.reshape(1, -1))
prediction = model.predict(new_sample_scaled)
print(f'Prediction for the new sample: {prediction}')

# Cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, val_index in kfold.split(X_train_scaled):
X_train_fold, X_val_fold = X_train_scaled[train_index], X_train_scaled[val_index]
y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, validation_data=(X_val_fold, y_val_fold), callbacks=[early_stopping, model_checkpoint, reduce_lr_on_plateau])
# Example DataFrame
df = pd.DataFrame(np.random.rand(1000, 20), columns=[f'feature_{i}' for i in range(20)])
df['target'] = np.random.randint(2, size=1000)

def prepare_data(df, target_column, test_size=0.2, random_state=42):
"""
Prepare data for model training and evaluation.
Splits data into training and testing sets, and scales features.
"""
X = df.drop(target_column, axis=1)
y = df[target_column]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

# Standardize the features to have mean=0 and variance=1
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

return X_train_scaled, X_test_scaled, y_train, y_test, scaler


def create_model(input_dim):
"""
Create a Sequential model and add layers to it.
"""
model = Sequential()
model.add(Dense(64, input_dim=input_dim, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization()) # Add Batch Normalization layer
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization()) # Add Batch Normalization layer
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

return model


def compile_and_train_model(model, X_train, y_train, learning_rate=0.001, epochs=10, batch_size=32, validation_split=0.2):
"""
Compile and train the model.
"""
optimizer = Adam(learning_rate=learning_rate) # Define optimizer with learning rate
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Print the model summary
print(model.summary())

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)

# Train the model for a given number of epochs
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, callbacks=[early_stopping, model_checkpoint, reduce_lr_on_plateau])

# Load the best model
model = load_model('best_model.h5')

return model


def evaluate_model(model, X_test, y_test):
"""
Evaluate the model on the test set.
"""
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')


def predict_new_sample(model, new_sample, scaler):
"""
Use the model to predict the class of a new sample.
"""
new_sample_scaled = scaler.transform(new_sample.reshape(1, -1))
prediction = model.predict(new_sample_scaled)
print(f'Prediction for the new sample: {prediction}')


def cross_validation(X_train, y_train, n_splits=5, random_state=42):
"""
Perform KFold cross-validation.
"""
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)

# Define the K-Fold cross-validator. The number of folds is set by n_splits.
# shuffle=True means the data will be shuffled before being split into folds.
# random_state is set for reproducibility.
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)

# kfold.split(X_train) generates indices to split data into training and validation set.
for train_index, val_index in kfold.split(X_train):
# Use the indices to split the data into training and validation sets for both features and target
X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

# Create a new instance of the model for the current fold
model = create_model(X_train_fold.shape[1])

# Compile and train the model on the training data for this fold
model = compile_and_train_model(model, X_train_fold, y_train_fold)

# Evaluate the trained model on the validation data for this fold
evaluate_model(model, X_val_fold, y_val_fold)


def main(df, target_column):
"""
Main function to run the script.
"""
X_train, X_test, y_train, y_test, scaler = prepare_data(df, target_column)
model = create_model(X_train.shape[1])
model = compile_and_train_model(model, X_train, y_train)
evaluate_model(model, X_test, y_test)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Example new sample
new_sample = np.random.rand(X_train.shape[1])
predict_new_sample(model, new_sample, scaler)

cross_validation(X_train, y_train)

# Run the main function
main(df, 'target')

0 comments on commit b9691fb

Please sign in to comment.