Add unit tests for sentiment.py (#25)

- Add `add_sentiment_to_dataframe` method to SentimentAnalysis class - Remove unused 'get_sentiment_multiple' method, which assumed an outdated comment data format - Add fixture for creating a comment data frame - Add tests for sentiment.py - Modify workflow to install requirements.txt in src/ directory - Install required `nltk` packages - Suppress noisy output from `nltk.download()`
AustinCullar · Sep 26, 2024 · 034e291 · 034e291
1 parent f2a7d91
commit 034e291
Show file tree

Hide file tree

Showing 5 changed files with 86 additions and 31 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -26,8 +26,8 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest google-api-python-client pandas python-dotenv nltk
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install flake8 pytest
+        if [ -f src/requirements.txt ]; then pip install -r src/requirements.txt; fi
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

diff --git a/src/astro.py b/src/astro.py
@@ -60,18 +60,10 @@ def main():
     youtube = YouTubeDataAPI(logger, api_key)
     comments_df = youtube.get_comments(video_id)
 
-    if not comments_df.empty:
-        comments_df['PSentiment'] = ''
-        comments_df['NSentiment'] = ''
+    sa = SentimentAnalysis(logger)
+    sa.add_sentiment_to_dataframe(comments_df)
 
-        sa = SentimentAnalysis(logger)
-
-        for index, row in comments_df.iterrows():
-            sentiment = sa.get_sentiment(row['comment'])
-            comments_df.loc[index, 'PSentiment'] = sentiment[0]
-            comments_df.loc[index, 'NSentiment'] = sentiment[1]
-
-    # Database logic
+    # Commit dataframe to database
     db = AstroDB(logger, db_file)
     db.create_database()
     db.insert_comment_dataframe(video_id, comments_df)

diff --git a/src/data_collection/sentiment.py b/src/data_collection/sentiment.py
@@ -12,13 +12,33 @@
 
 
 class SentimentAnalysis:
+    logger = None
 
     def __init__(self, logger):
-
         self.logger = logger.get_logger()
+        self.nltk_init()
 
-    def get_sentiment(self, comment: str) -> ():
+    def nltk_init(self):
+        required_nltk_packages = [
+                'punkt_tab',
+                'averaged_perceptron_tagger_eng',
+                'wordnet',
+                'sentiwordnet']
+
+        for pkg in required_nltk_packages:
+            nltk.download(pkg, quiet=True, raise_on_error=True)
 
+    def add_sentiment_to_dataframe(self, df):
+        if not df.empty:
+            df['PSentiment'] = ''
+            df['NSentiment'] = ''
+
+            for index, row in df.iterrows():
+                sentiment = self.get_sentiment(row['comment'])
+                df.loc[index, 'PSentiment'] = sentiment[0]
+                df.loc[index, 'NSentiment'] = sentiment[1]
+
+    def get_sentiment(self, comment: str) -> ():
         token_comment = nltk.word_tokenize(comment)
         pos_tag_comment = nltk.pos_tag(token_comment)
 
@@ -57,17 +77,3 @@ def get_sentiment(self, comment: str) -> ():
             return (positive_sentiment, negative_sentiment, objectivity)
 
         return (positive_sentiment, negative_sentiment, objectivity)
-
-    def get_sentiment_multiple(self, comments: list) -> list:
-
-        comments_with_sentiment = []
-
-        for comment in comments:
-            # comment_sentiment: (user, comment, timestamp)
-            comment_sentiment = self.get_sentiment(comment[1])
-
-            comment_with_sentiment = comment + comment_sentiment
-
-            comments_with_sentiment.append(comment_with_sentiment)
-
-        return comments_with_sentiment
diff --git a/src/tests/conftest.py b/src/tests/conftest.py
@@ -2,10 +2,22 @@
 import json
 import googleapiclient
 
+import pandas as pd
 from unittest.mock import MagicMock
 from src.log import Logger
 
 
+@pytest.fixture(scope='class')
+def comment_dataframe():
+    df = pd.DataFrame(columns=['comment', 'user', 'date'])
+
+    df.loc[0] = ['hello there', '@user1', '2022-10-23T19:05:89Z']
+    df.loc[1] = ['this is terrible', '@user2', '2023-10-23T20:05:89Z']
+    df.loc[2] = ['this is awesome!', '@user3', '2021-8-23T20:11:90Z']
+
+    return df
+
+
 @pytest.fixture(scope='class')
 def api_comment_response():
     response = {}
@@ -16,12 +28,12 @@ def api_comment_response():
     return response
 
 
-@pytest.fixture
+@pytest.fixture(scope='class')
 def logger():
     return Logger('debug')
 
 
-@pytest.fixture
+@pytest.fixture(scope='class')
 def mock_google_http_request(api_comment_response):
     mock = googleapiclient.http.HttpRequest
     mock.execute = MagicMock(return_value=api_comment_response)
diff --git a/src/tests/test_sentiment.py b/src/tests/test_sentiment.py
@@ -0,0 +1,45 @@
+"""
+Tests for the SentimentAnalysis class.
+"""
+import pytest
+
+# Astro modules
+from src.data_collection.sentiment import SentimentAnalysis
+
+positive_string = 'This is amazing!'
+negative_string = 'This is terrible!'
+neutral_string = 'The color is blue'
+nonsense_string = 'asdf gra asdg vrs sdg'
+empty_string = ''
+
+
+def verify_sentiment(sentiment):
+    assert sentiment <= 1.0 and sentiment >= 0.0
+
+
+class TestSentimentAnalysis:
+
+    def test_add_sentiment_to_dataframe(self, logger, comment_dataframe):
+        sa = SentimentAnalysis(logger)
+
+        sa.add_sentiment_to_dataframe(comment_dataframe)
+
+        for index, row in comment_dataframe.iterrows():
+            verify_sentiment(row['PSentiment'])
+            verify_sentiment(row['NSentiment'])
+
+    @pytest.mark.parametrize('text',
+                             [positive_string,
+                              negative_string,
+                              neutral_string,
+                              nonsense_string,
+                              empty_string])
+    def test_get_sentiment(self, logger, text):
+        sa = SentimentAnalysis(logger)
+
+        sentiment = sa.get_sentiment(text)
+
+        # verify that some sentiment data is returned
+        verify_sentiment(sentiment[0])
+        verify_sentiment(sentiment[1])
+        verify_sentiment(sentiment[2])