Skip to content

Commit

Permalink
Add unit tests for sentiment.py (#25)
Browse files Browse the repository at this point in the history
- Add `add_sentiment_to_dataframe` method to SentimentAnalysis class

- Remove unused 'get_sentiment_multiple' method, which assumed an outdated comment data format
- Add fixture for creating a comment data frame
- Add tests for sentiment.py

- Modify workflow to install requirements.txt in src/ directory

- Install required `nltk` packages
- Suppress noisy output from `nltk.download()`
  • Loading branch information
AustinCullar authored Sep 26, 2024
1 parent f2a7d91 commit 034e291
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 31 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest google-api-python-client pandas python-dotenv nltk
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install flake8 pytest
if [ -f src/requirements.txt ]; then pip install -r src/requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
14 changes: 3 additions & 11 deletions src/astro.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,10 @@ def main():
youtube = YouTubeDataAPI(logger, api_key)
comments_df = youtube.get_comments(video_id)

if not comments_df.empty:
comments_df['PSentiment'] = ''
comments_df['NSentiment'] = ''
sa = SentimentAnalysis(logger)
sa.add_sentiment_to_dataframe(comments_df)

sa = SentimentAnalysis(logger)

for index, row in comments_df.iterrows():
sentiment = sa.get_sentiment(row['comment'])
comments_df.loc[index, 'PSentiment'] = sentiment[0]
comments_df.loc[index, 'NSentiment'] = sentiment[1]

# Database logic
# Commit dataframe to database
db = AstroDB(logger, db_file)
db.create_database()
db.insert_comment_dataframe(video_id, comments_df)
Expand Down
38 changes: 22 additions & 16 deletions src/data_collection/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,33 @@


class SentimentAnalysis:
logger = None

def __init__(self, logger):

self.logger = logger.get_logger()
self.nltk_init()

def get_sentiment(self, comment: str) -> ():
def nltk_init(self):
required_nltk_packages = [
'punkt_tab',
'averaged_perceptron_tagger_eng',
'wordnet',
'sentiwordnet']

for pkg in required_nltk_packages:
nltk.download(pkg, quiet=True, raise_on_error=True)

def add_sentiment_to_dataframe(self, df):
if not df.empty:
df['PSentiment'] = ''
df['NSentiment'] = ''

for index, row in df.iterrows():
sentiment = self.get_sentiment(row['comment'])
df.loc[index, 'PSentiment'] = sentiment[0]
df.loc[index, 'NSentiment'] = sentiment[1]

def get_sentiment(self, comment: str) -> ():
token_comment = nltk.word_tokenize(comment)
pos_tag_comment = nltk.pos_tag(token_comment)

Expand Down Expand Up @@ -57,17 +77,3 @@ def get_sentiment(self, comment: str) -> ():
return (positive_sentiment, negative_sentiment, objectivity)

return (positive_sentiment, negative_sentiment, objectivity)

def get_sentiment_multiple(self, comments: list) -> list:

comments_with_sentiment = []

for comment in comments:
# comment_sentiment: (user, comment, timestamp)
comment_sentiment = self.get_sentiment(comment[1])

comment_with_sentiment = comment + comment_sentiment

comments_with_sentiment.append(comment_with_sentiment)

return comments_with_sentiment
16 changes: 14 additions & 2 deletions src/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,22 @@
import json
import googleapiclient

import pandas as pd
from unittest.mock import MagicMock
from src.log import Logger


@pytest.fixture(scope='class')
def comment_dataframe():
df = pd.DataFrame(columns=['comment', 'user', 'date'])

df.loc[0] = ['hello there', '@user1', '2022-10-23T19:05:89Z']
df.loc[1] = ['this is terrible', '@user2', '2023-10-23T20:05:89Z']
df.loc[2] = ['this is awesome!', '@user3', '2021-8-23T20:11:90Z']

return df


@pytest.fixture(scope='class')
def api_comment_response():
response = {}
Expand All @@ -16,12 +28,12 @@ def api_comment_response():
return response


@pytest.fixture
@pytest.fixture(scope='class')
def logger():
return Logger('debug')


@pytest.fixture
@pytest.fixture(scope='class')
def mock_google_http_request(api_comment_response):
mock = googleapiclient.http.HttpRequest
mock.execute = MagicMock(return_value=api_comment_response)
45 changes: 45 additions & 0 deletions src/tests/test_sentiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Tests for the SentimentAnalysis class.
"""
import pytest

# Astro modules
from src.data_collection.sentiment import SentimentAnalysis

positive_string = 'This is amazing!'
negative_string = 'This is terrible!'
neutral_string = 'The color is blue'
nonsense_string = 'asdf gra asdg vrs sdg'
empty_string = ''


def verify_sentiment(sentiment):
assert sentiment <= 1.0 and sentiment >= 0.0


class TestSentimentAnalysis:

def test_add_sentiment_to_dataframe(self, logger, comment_dataframe):
sa = SentimentAnalysis(logger)

sa.add_sentiment_to_dataframe(comment_dataframe)

for index, row in comment_dataframe.iterrows():
verify_sentiment(row['PSentiment'])
verify_sentiment(row['NSentiment'])

@pytest.mark.parametrize('text',
[positive_string,
negative_string,
neutral_string,
nonsense_string,
empty_string])
def test_get_sentiment(self, logger, text):
sa = SentimentAnalysis(logger)

sentiment = sa.get_sentiment(text)

# verify that some sentiment data is returned
verify_sentiment(sentiment[0])
verify_sentiment(sentiment[1])
verify_sentiment(sentiment[2])

0 comments on commit 034e291

Please sign in to comment.