diff --git a/Fake-News-Classification/Dataset/README.md b/Fake-News-Classification/Dataset/README.md new file mode 100644 index 000000000..d22ba02d2 --- /dev/null +++ b/Fake-News-Classification/Dataset/README.md @@ -0,0 +1,97 @@ +# Fake News Classification using DL + +## PROJECT TITLE + +Fake News Detection using Deep Learning + +## GOAL + +To identify whether the given news is fake or not. + +## DATASET + +The link for the dataset used in this project: https://www.kaggle.com/competitions/fake-news/data?select=train.csv + + +## DESCRIPTION + +This project aims to identify whether the given news is fake or not by extracting meaning and semantics of the given news. + +## WHAT I HAD DONE + +1. Data collection: From the link of the dataset given above. +2. Data preprocessing: Preprocessed the news by combining title and text to create a new feature and did some augementation like tokeinizing and vectorising before passing them to model training +3. Model selection: Self Designed model having a Embedding Layer followed by Global Pooling Layer and then 2 Dense layers and then output layer.Second model had a Embedding layer followed by a RNN layer and a Dense output layer. +4. Comparative analysis: Compared the accuracy score of all the models. + +## MODELS SUMMARY + +Model-1: "sequential" +_________________________________________________________________ + Layer (type) Output Shape Param # +================================================================= + embedding (Embedding) (None, 12140, 182) 30222010 + + global_average_pooling1d ( (None, 182) 0 + GlobalAveragePooling1D) + + dense (Dense) (None, 96) 17568 + + dense_1 (Dense) (None, 24) 2328 + + dense_2 (Dense) (None, 1) 25 + +================================================================= +Total params: 30241931 (115.36 MB) +Trainable params: 30241931 (115.36 MB) +Non-trainable params: 0 (0.00 Byte) + +Model-2: "sequential_3" +_________________________________________________________________ + Layer (type) Output Shape Param # +================================================================= + embedding_3 (Embedding) (None, 12140, 100) 16605500 + + simple_rnn (SimpleRNN) (None, 10) 1110 + + dense_5 (Dense) (None, 1) 11 + +================================================================= +Total params: 16606621 (63.35 MB) +Trainable params: 16606621 (63.35 MB) +Non-trainable params: 0 (0.00 Byte) + +## LIBRARIES NEEDED + +The following libraries are required to run this project: + +- nltk +- pandas +- matplotlib +- tensorflow +- keras +- sklearn + +## EVALUATION METRICS + +The evaluation metrics I used to assess the models: + +- Accuracy +- Loss + +It is shown using Confusion Matrix in the Images folder + +## RESULTS +Results on Val dataset: +For Model-1: +Accuracy:96.11% +loss: 0.1350 + +For Model-2: +Accuracy:85.03% +loss: 0.1439 + +## CONCLUSION +Based on results we can draw following conclusions: + +1.The model-1 showed high validation accuracy of 96.11% and loss of 0.1350.Thus the model-1 worked fairly well identifying 2874 fake articles from a total of 3044.The first model performed better.The second model had good training accuracy but less test accuracy hinting towards overfitting.Maybe the key reason being in fake news it is important to capture overall sentiment better than individual word sentiment. diff --git a/Fake-News-Classification/Images/Dataset.png b/Fake-News-Classification/Images/Dataset.png new file mode 100644 index 000000000..1e317542d Binary files /dev/null and b/Fake-News-Classification/Images/Dataset.png differ diff --git a/Fake-News-Classification/Images/EDA.png b/Fake-News-Classification/Images/EDA.png new file mode 100644 index 000000000..17401792b Binary files /dev/null and b/Fake-News-Classification/Images/EDA.png differ diff --git a/Fake-News-Classification/Images/EDA1.png b/Fake-News-Classification/Images/EDA1.png new file mode 100644 index 000000000..ddf9bc5f0 Binary files /dev/null and b/Fake-News-Classification/Images/EDA1.png differ diff --git a/Fake-News-Classification/Images/metrics.png b/Fake-News-Classification/Images/metrics.png new file mode 100644 index 000000000..1dc7d3150 Binary files /dev/null and b/Fake-News-Classification/Images/metrics.png differ diff --git a/Fake-News-Classification/Images/model.png b/Fake-News-Classification/Images/model.png new file mode 100644 index 000000000..a8cd84df6 Binary files /dev/null and b/Fake-News-Classification/Images/model.png differ diff --git a/Fake-News-Classification/Images/model2.png b/Fake-News-Classification/Images/model2.png new file mode 100644 index 000000000..b95d80c07 Binary files /dev/null and b/Fake-News-Classification/Images/model2.png differ diff --git a/Fake-News-Classification/Images/model2metrics.png b/Fake-News-Classification/Images/model2metrics.png new file mode 100644 index 000000000..e7631795b Binary files /dev/null and b/Fake-News-Classification/Images/model2metrics.png differ diff --git a/Fake-News-Classification/Model/PridictionModel.ipynb b/Fake-News-Classification/Model/PridictionModel.ipynb new file mode 100644 index 000000000..6b37ebbe1 --- /dev/null +++ b/Fake-News-Classification/Model/PridictionModel.ipynb @@ -0,0 +1,2513 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2021-05-25T06:50:29.636394Z", + "iopub.status.busy": "2021-05-25T06:50:29.636041Z", + "iopub.status.idle": "2021-05-25T06:50:29.643277Z", + "shell.execute_reply": "2021-05-25T06:50:29.642127Z", + "shell.execute_reply.started": "2021-05-25T06:50:29.636365Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sklearn\n", + "import itertools\n", + "import numpy as np\n", + "import seaborn as sb\n", + "import re\n", + "import nltk\n", + "import pickle\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import confusion_matrix\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.linear_model import PassiveAggressiveClassifier,LogisticRegression\n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.corpus import stopwords" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2021-05-25T06:50:29.656569Z", + "iopub.status.busy": "2021-05-25T06:50:29.656203Z", + "iopub.status.idle": "2021-05-25T06:50:32.048864Z", + "shell.execute_reply": "2021-05-25T06:50:32.047882Z", + "shell.execute_reply.started": "2021-05-25T06:50:29.65654Z" + } + }, + "outputs": [], + "source": [ + "train_df = pd.read_csv('train.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2021-05-25T06:50:32.05136Z", + "iopub.status.busy": "2021-05-25T06:50:32.051032Z", + "iopub.status.idle": "2021-05-25T06:50:32.089516Z", + "shell.execute_reply": "2021-05-25T06:50:32.088399Z", + "shell.execute_reply.started": "2021-05-25T06:50:32.051329Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | id | \n", + "title | \n", + "author | \n", + "text | \n", + "label | \n", + "
---|---|---|---|---|---|
0 | \n", + "0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "Darrell Lucus | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "1 | \n", + "
1 | \n", + "1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "Daniel J. Flynn | \n", + "Ever get the feeling your life circles the rou... | \n", + "0 | \n", + "
2 | \n", + "2 | \n", + "Why the Truth Might Get You Fired | \n", + "Consortiumnews.com | \n", + "Why the Truth Might Get You Fired October 29, ... | \n", + "1 | \n", + "
3 | \n", + "3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "Jessica Purkiss | \n", + "Videos 15 Civilians Killed In Single US Airstr... | \n", + "1 | \n", + "
4 | \n", + "4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "Howard Portnoy | \n", + "Print \\nAn Iranian woman has been sentenced to... | \n", + "1 | \n", + "
5 | \n", + "5 | \n", + "Jackie Mason: Hollywood Would Love Trump if He... | \n", + "Daniel Nussbaum | \n", + "In these trying times, Jackie Mason is the Voi... | \n", + "0 | \n", + "
6 | \n", + "6 | \n", + "Life: Life Of Luxury: Elton John’s 6 Favorite ... | \n", + "NaN | \n", + "Ever wonder how Britain’s most iconic pop pian... | \n", + "1 | \n", + "
7 | \n", + "7 | \n", + "Benoît Hamon Wins French Socialist Party’s Pre... | \n", + "Alissa J. Rubin | \n", + "PARIS — France chose an idealistic, traditi... | \n", + "0 | \n", + "
8 | \n", + "8 | \n", + "Excerpts From a Draft Script for Donald Trump’... | \n", + "NaN | \n", + "Donald J. Trump is scheduled to make a highly ... | \n", + "0 | \n", + "
9 | \n", + "9 | \n", + "A Back-Channel Plan for Ukraine and Russia, Co... | \n", + "Megan Twohey and Scott Shane | \n", + "A week before Michael T. Flynn resigned as nat... | \n", + "0 | \n", + "
10 | \n", + "10 | \n", + "Obama’s Organizing for Action Partners with So... | \n", + "Aaron Klein | \n", + "Organizing for Action, the activist group that... | \n", + "0 | \n", + "
11 | \n", + "11 | \n", + "BBC Comedy Sketch \"Real Housewives of ISIS\" Ca... | \n", + "Chris Tomlinson | \n", + "The BBC produced spoof on the “Real Housewives... | \n", + "0 | \n", + "
12 | \n", + "12 | \n", + "Russian Researchers Discover Secret Nazi Milit... | \n", + "Amando Flavio | \n", + "The mystery surrounding The Third Reich and Na... | \n", + "1 | \n", + "
13 | \n", + "13 | \n", + "US Officials See No Link Between Trump and Russia | \n", + "Jason Ditz | \n", + "Clinton Campaign Demands FBI Affirm Trump's Ru... | \n", + "1 | \n", + "
14 | \n", + "14 | \n", + "Re: Yes, There Are Paid Government Trolls On S... | \n", + "AnotherAnnie | \n", + "Yes, There Are Paid Government Trolls On Socia... | \n", + "1 | \n", + "
\n", + " | title | \n", + "text | \n", + "label | \n", + "
---|---|---|---|
0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "1 | \n", + "
1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "Ever get the feeling your life circles the rou... | \n", + "0 | \n", + "
2 | \n", + "Why the Truth Might Get You Fired | \n", + "Why the Truth Might Get You Fired October 29, ... | \n", + "1 | \n", + "
3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "Videos 15 Civilians Killed In Single US Airstr... | \n", + "1 | \n", + "
4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "Print \\nAn Iranian woman has been sentenced to... | \n", + "1 | \n", + "
\n", + " | title | \n", + "text | \n", + "
---|---|---|
0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "
1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "Ever get the feeling your life circles the rou... | \n", + "
2 | \n", + "Why the Truth Might Get You Fired | \n", + "Why the Truth Might Get You Fired October 29, ... | \n", + "
3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "Videos 15 Civilians Killed In Single US Airstr... | \n", + "
4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "Print \\nAn Iranian woman has been sentenced to... | \n", + "
5 | \n", + "Jackie Mason: Hollywood Would Love Trump if He... | \n", + "In these trying times, Jackie Mason is the Voi... | \n", + "
6 | \n", + "Life: Life Of Luxury: Elton John’s 6 Favorite ... | \n", + "Ever wonder how Britain’s most iconic pop pian... | \n", + "
7 | \n", + "Benoît Hamon Wins French Socialist Party’s Pre... | \n", + "PARIS — France chose an idealistic, traditi... | \n", + "
8 | \n", + "Excerpts From a Draft Script for Donald Trump’... | \n", + "Donald J. Trump is scheduled to make a highly ... | \n", + "
9 | \n", + "A Back-Channel Plan for Ukraine and Russia, Co... | \n", + "A week before Michael T. Flynn resigned as nat... | \n", + "
\n", + " | title | \n", + "text | \n", + "
---|---|---|
0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "
1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "Ever get the feeling your life circles the rou... | \n", + "
2 | \n", + "Why the Truth Might Get You Fired | \n", + "Why the Truth Might Get You Fired October 29, ... | \n", + "
3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "Videos 15 Civilians Killed In Single US Airstr... | \n", + "
4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "Print \\nAn Iranian woman has been sentenced to... | \n", + "
5 | \n", + "Jackie Mason: Hollywood Would Love Trump if He... | \n", + "In these trying times, Jackie Mason is the Voi... | \n", + "
6 | \n", + "Life: Life Of Luxury: Elton John’s 6 Favorite ... | \n", + "Ever wonder how Britain’s most iconic pop pian... | \n", + "
7 | \n", + "Benoît Hamon Wins French Socialist Party’s Pre... | \n", + "PARIS — France chose an idealistic, traditi... | \n", + "
8 | \n", + "Excerpts From a Draft Script for Donald Trump’... | \n", + "Donald J. Trump is scheduled to make a highly ... | \n", + "
9 | \n", + "A Back-Channel Plan for Ukraine and Russia, Co... | \n", + "A week before Michael T. Flynn resigned as nat... | \n", + "
10 | \n", + "Obama’s Organizing for Action Partners with So... | \n", + "Organizing for Action, the activist group that... | \n", + "
11 | \n", + "BBC Comedy Sketch \"Real Housewives of ISIS\" Ca... | \n", + "The BBC produced spoof on the “Real Housewives... | \n", + "
12 | \n", + "Russian Researchers Discover Secret Nazi Milit... | \n", + "The mystery surrounding The Third Reich and Na... | \n", + "
13 | \n", + "US Officials See No Link Between Trump and Russia | \n", + "Clinton Campaign Demands FBI Affirm Trump's Ru... | \n", + "
14 | \n", + "Re: Yes, There Are Paid Government Trolls On S... | \n", + "Yes, There Are Paid Government Trolls On Socia... | \n", + "
15 | \n", + "In Major League Soccer, Argentines Find a Home... | \n", + "Guillermo Barros Schelotto was not the first A... | \n", + "
16 | \n", + "Wells Fargo Chief Abruptly Steps Down - The Ne... | \n", + "The scandal engulfing Wells Fargo toppled its ... | \n", + "
17 | \n", + "Anonymous Donor Pays $2.5 Million To Release E... | \n", + "A Caddo Nation tribal leader has just been fre... | \n", + "
18 | \n", + "FBI Closes In On Hillary! | \n", + "FBI Closes In On Hillary! Posted on Home » Hea... | \n", + "
19 | \n", + "Chuck Todd: ’BuzzFeed Did Donald Trump a Polit... | \n", + "Wednesday after Donald Trump’s press confere... | \n", + "
\n", + " | title | \n", + "text | \n", + "new_text | \n", + "
---|---|---|---|
0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "
1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "Ever get the feeling your life circles the rou... | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "
2 | \n", + "Why the Truth Might Get You Fired | \n", + "Why the Truth Might Get You Fired October 29, ... | \n", + "Why the Truth Might Get You Fired Why the Trut... | \n", + "
3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "Videos 15 Civilians Killed In Single US Airstr... | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "
4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "Print \\nAn Iranian woman has been sentenced to... | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "
5 | \n", + "Jackie Mason: Hollywood Would Love Trump if He... | \n", + "In these trying times, Jackie Mason is the Voi... | \n", + "Jackie Mason: Hollywood Would Love Trump if He... | \n", + "
6 | \n", + "Life: Life Of Luxury: Elton John’s 6 Favorite ... | \n", + "Ever wonder how Britain’s most iconic pop pian... | \n", + "Life: Life Of Luxury: Elton John’s 6 Favorite ... | \n", + "
7 | \n", + "Benoît Hamon Wins French Socialist Party’s Pre... | \n", + "PARIS — France chose an idealistic, traditi... | \n", + "Benoît Hamon Wins French Socialist Party’s Pre... | \n", + "
8 | \n", + "Excerpts From a Draft Script for Donald Trump’... | \n", + "Donald J. Trump is scheduled to make a highly ... | \n", + "Excerpts From a Draft Script for Donald Trump’... | \n", + "
9 | \n", + "A Back-Channel Plan for Ukraine and Russia, Co... | \n", + "A week before Michael T. Flynn resigned as nat... | \n", + "A Back-Channel Plan for Ukraine and Russia, Co... | \n", + "
10 | \n", + "Obama’s Organizing for Action Partners with So... | \n", + "Organizing for Action, the activist group that... | \n", + "Obama’s Organizing for Action Partners with So... | \n", + "
11 | \n", + "BBC Comedy Sketch \"Real Housewives of ISIS\" Ca... | \n", + "The BBC produced spoof on the “Real Housewives... | \n", + "BBC Comedy Sketch \"Real Housewives of ISIS\" Ca... | \n", + "
12 | \n", + "Russian Researchers Discover Secret Nazi Milit... | \n", + "The mystery surrounding The Third Reich and Na... | \n", + "Russian Researchers Discover Secret Nazi Milit... | \n", + "
13 | \n", + "US Officials See No Link Between Trump and Russia | \n", + "Clinton Campaign Demands FBI Affirm Trump's Ru... | \n", + "US Officials See No Link Between Trump and Rus... | \n", + "
14 | \n", + "Re: Yes, There Are Paid Government Trolls On S... | \n", + "Yes, There Are Paid Government Trolls On Socia... | \n", + "Re: Yes, There Are Paid Government Trolls On S... | \n", + "
15 | \n", + "In Major League Soccer, Argentines Find a Home... | \n", + "Guillermo Barros Schelotto was not the first A... | \n", + "In Major League Soccer, Argentines Find a Home... | \n", + "
16 | \n", + "Wells Fargo Chief Abruptly Steps Down - The Ne... | \n", + "The scandal engulfing Wells Fargo toppled its ... | \n", + "Wells Fargo Chief Abruptly Steps Down - The Ne... | \n", + "
17 | \n", + "Anonymous Donor Pays $2.5 Million To Release E... | \n", + "A Caddo Nation tribal leader has just been fre... | \n", + "Anonymous Donor Pays $2.5 Million To Release E... | \n", + "
18 | \n", + "FBI Closes In On Hillary! | \n", + "FBI Closes In On Hillary! Posted on Home » Hea... | \n", + "FBI Closes In On Hillary! FBI Closes In On Hil... | \n", + "
19 | \n", + "Chuck Todd: ’BuzzFeed Did Donald Trump a Polit... | \n", + "Wednesday after Donald Trump’s press confere... | \n", + "Chuck Todd: ’BuzzFeed Did Donald Trump a Polit... | \n", + "
\n", + " | new_text | \n", + "
---|---|
0 | \n", + "House Dem Aide: We Didn’t Even See Comey’s Let... | \n", + "
1 | \n", + "FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n", + "
2 | \n", + "Why the Truth Might Get You Fired Why the Trut... | \n", + "
3 | \n", + "15 Civilians Killed In Single US Airstrike Hav... | \n", + "
4 | \n", + "Iranian woman jailed for fictional unpublished... | \n", + "