From 7d4c4290fae5ac1993500a434594af76688ceb1c Mon Sep 17 00:00:00 2001
From: Olga Ivanova <ivanovaos.09@gmail.com>
Date: Tue, 10 Sep 2024 17:32:57 +0200
Subject: [PATCH] improve test coverage, blocked matplotlib def render under
 github action

---
 tests/test_rnaseq.py | 193 ++++++++++++++++++++++++++++++++++++++-----
 tests/test_visual.py |  70 ----------------
 2 files changed, 171 insertions(+), 92 deletions(-)
 delete mode 100644 tests/test_visual.py

diff --git a/tests/test_rnaseq.py b/tests/test_rnaseq.py
index ab9ff69..82e0ef7 100644
--- a/tests/test_rnaseq.py
+++ b/tests/test_rnaseq.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import numpy as np
 from unittest.mock import patch
+import matplotlib
 import matplotlib.pyplot as plt
 from networkcommons.visual import (plot_density,
                                    build_volcano_plot,
@@ -9,6 +10,9 @@
                                    plot_pca,
                                    plot_heatmap_with_tree)
 
+# Set the matplotlib backend to 'Agg' for headless environments (like GitHub Actions)
+matplotlib.use('Agg')
+
 
 @pytest.fixture
 def example_dataframe():
@@ -32,35 +36,57 @@ def metadata_dataframe():
     return pd.DataFrame(metadata)
 
 
-def test_plot_density():
-    """Test the plot_density function with valid data."""
+### TESTING PLOT_DENSITY ###
 
-    # Create a sample dataframe with enough data points
+def test_plot_density_valid_data():
+    """Test plot_density with valid data."""
     example_dataframe = pd.DataFrame({
         'idx': ['gene_1', 'gene_2', 'gene_3'],
         'sample_1': [10, 15, 5],
         'sample_2': [20, 18, 9],
         'sample_3': [12, 22, 8],
-        'sample_4': [14, 19, 7]  # Adding more samples to ensure enough data points
+        'sample_4': [14, 19, 7]
     })
 
-    # Create metadata for grouping
     metadata_dataframe = pd.DataFrame({
         'sample_ID': ['sample_1', 'sample_2', 'sample_3', 'sample_4'],
         'group': ['control', 'treated', 'control', 'treated']
     })
 
-    gene_ids = ['gene_1', 'gene_2']  # Make sure this has genes present in the dataframe
+    gene_ids = ['gene_1', 'gene_2']
 
-    # Mock plt.show to avoid blocking during the test
     with patch('matplotlib.pyplot.show'):
         plot_density(example_dataframe, gene_ids, metadata_dataframe)
 
-    # Assert if the plot was created by checking the number of axes
-    assert len(plt.gcf().get_axes()) == 2  # Should have 2 subplots for 2 genes
+    assert len(plt.gcf().get_axes()) == 2
+
+
+def test_plot_density_missing_data():
+    """Test plot_density with missing values in the data."""
+    example_dataframe = pd.DataFrame({
+        'idx': ['gene_1', 'gene_2', 'gene_3'],
+        'sample_1': [10, np.nan, 5],
+        'sample_2': [20, 18, 9],
+        'sample_3': [12, 22, np.nan],
+        'sample_4': [14, 19, 7]
+    })
+
+    metadata_dataframe = pd.DataFrame({
+        'sample_ID': ['sample_1', 'sample_2', 'sample_3', 'sample_4'],
+        'group': ['control', 'treated', 'control', 'treated']
+    })
+
+    gene_ids = ['gene_1', 'gene_2']
 
-def test_build_volcano_plot():
-    """Test the build_volcano_plot function."""
+    with patch('matplotlib.pyplot.show'):
+        # Since missing data is present, we catch any errors that occur when plotting
+        with pytest.raises(ValueError, match="`dataset` input should have multiple elements"):
+            plot_density(example_dataframe, gene_ids, metadata_dataframe)
+
+### TESTING BUILD_VOLCANO_PLOT ###
+
+def test_build_volcano_plot_valid_data():
+    """Test the build_volcano_plot function with valid data."""
     data = pd.DataFrame({
         'log2FoldChange': [1.5, -2.0, 0.5, -0.3],
         'pvalue': [0.01, 0.04, 0.20, 0.05]
@@ -69,12 +95,39 @@ def test_build_volcano_plot():
     with patch('matplotlib.pyplot.show'):
         build_volcano_plot(data)
 
-    # Assert if the plot was created
     assert len(plt.gcf().get_axes()) == 1  # Should have one main axis for the volcano plot
 
 
-def test_build_ma_plot():
-    """Test the build_ma_plot function."""
+def test_build_volcano_plot_empty_data():
+    """Test build_volcano_plot with an empty dataframe."""
+    data = pd.DataFrame({
+        'log2FoldChange': [],
+        'pvalue': []
+    })
+
+    with patch('matplotlib.pyplot.show'):
+        build_volcano_plot(data)
+
+    assert len(plt.gcf().get_axes()) == 1  # The plot should still exist even if empty
+
+
+def test_build_volcano_plot_edge_cases():
+    """Test build_volcano_plot with edge cases (e.g., very high/low p-values)."""
+    data = pd.DataFrame({
+        'log2FoldChange': [1.5, -2.0, 0.5, -0.3],
+        'pvalue': [1e-300, 1e-10, 1, 0.9999]  # Extremely high and low p-values
+    })
+
+    with patch('matplotlib.pyplot.show'):
+        build_volcano_plot(data)
+
+    assert len(plt.gcf().get_axes()) == 1
+
+
+### TESTING BUILD_MA_PLOT ###
+
+def test_build_ma_plot_valid_data():
+    """Test build_ma_plot with valid data."""
     data = pd.DataFrame({
         'log2FoldChange': [1.5, -2.0, 0.5, -0.3],
         'meanExpression': [10, 15, 20, 25]
@@ -83,21 +136,71 @@ def test_build_ma_plot():
     with patch('matplotlib.pyplot.show'):
         build_ma_plot(data, log2fc='log2FoldChange', mean_exp='meanExpression')
 
-    # Assert if the plot was created
     assert len(plt.gcf().get_axes()) == 1  # Should have one main axis for the MA plot
 
 
-def test_plot_pca(example_dataframe, metadata_dataframe):
+def test_build_ma_plot_empty_data():
+    """Test build_ma_plot with an empty dataframe."""
+    data = pd.DataFrame({
+        'log2FoldChange': [],
+        'meanExpression': []
+    })
+
+    with patch('matplotlib.pyplot.show'):
+        build_ma_plot(data, log2fc='log2FoldChange', mean_exp='meanExpression')
+
+    assert len(plt.gcf().get_axes()) == 1  # Plot should still exist
+
+
+### TESTING PLOT_PCA ###
+
+def test_plot_pca_valid_data(example_dataframe, metadata_dataframe):
     """Test the plot_pca function."""
     with patch('matplotlib.pyplot.show'):
         pca_df = plot_pca(example_dataframe, metadata_dataframe)
 
-    # Assert that the returned dataframe has the correct shape
     assert pca_df.shape[1] == 3  # Expecting PCA1, PCA2, and 'group' columns
 
 
-def test_build_heatmap_with_tree():
-    """Test the build_heatmap_with_tree function."""
+def test_plot_pca_empty_data():
+    """Test plot_pca with empty dataframe."""
+    empty_dataframe = pd.DataFrame({
+        'idx': ['gene_1', 'gene_2', 'gene_3']
+    })
+
+    metadata = pd.DataFrame({
+        'sample_ID': [],
+        'group': []
+    })
+
+    with patch('matplotlib.pyplot.show'):
+        with pytest.raises(ValueError, match="The dataframe contains no numeric columns suitable for PCA."):
+            plot_pca(empty_dataframe, metadata)
+
+
+def test_plot_pca_missing_data():
+    """Test plot_pca with missing data."""
+    example_dataframe = pd.DataFrame({
+        'idx': ['gene_1', 'gene_2', 'gene_3'],
+        'sample_1': [10, np.nan, 5],
+        'sample_2': [20, 18, np.nan],
+        'sample_3': [np.nan, 22, 8]
+    })
+
+    metadata = pd.DataFrame({
+        'sample_ID': ['sample_1', 'sample_2', 'sample_3'],
+        'group': ['control', 'treated', 'control']
+    })
+
+    with patch('matplotlib.pyplot.show'):
+        with pytest.raises(ValueError, match="The dataframe contains no numeric columns suitable for PCA."):
+            plot_pca(example_dataframe, metadata)
+
+
+### TESTING PLOT_HEATMAP_WITH_TREE ###
+
+def test_plot_heatmap_with_tree_valid_data():
+    """Test the plot_heatmap_with_tree function with valid data."""
     data = pd.DataFrame({
         'gene_1': [2.3, -1.1, 0.4],
         'gene_2': [1.2, 0.5, -0.7],
@@ -117,6 +220,52 @@ def test_build_heatmap_with_tree():
             render=False
         )
 
-    # Assert if the figure was created and contains an axes object
-    assert isinstance(fig, plt.Figure)  # Check if the returned object is a matplotlib Figure
-    assert len(fig.get_axes()) > 0  # Assert that axes were created in the figure
\ No newline at end of file
+    assert isinstance(fig, plt.Figure)
+    assert len(fig.get_axes()) > 0  # Check that axes exist in the figure
+
+
+def test_plot_heatmap_with_tree_empty_data():
+    """Test the plot_heatmap_with_tree function with empty dataframe."""
+    empty_data = pd.DataFrame({
+        'gene_1': [],
+        'gene_2': [],
+        'gene_3': []
+    }, index=[])
+
+    with patch('matplotlib.pyplot.show'):
+        with pytest.raises(ValueError,
+                           match="The number of observations cannot be determined on an empty distance matrix."):
+            plot_heatmap_with_tree(
+                empty_data,
+                clustering_method='ward',
+                metric='euclidean',
+                title='Empty Heatmap',
+                xlabel='Samples',
+                ylabel='Genes',
+                cmap='viridis',
+                save=False,
+                render=False
+            )
+
+
+def test_plot_heatmap_with_tree_missing_data():
+    """Test the plot_heatmap_with_tree function with missing data (NaN values)."""
+    data_with_nan = pd.DataFrame({
+        'gene_1': [2.3, np.nan, 0.4],
+        'gene_2': [1.2, 0.5, -0.7],
+        'gene_3': [3.1, 0.9, np.nan]
+    }, index=['condition_1', 'condition_2', 'condition_3'])
+
+    with patch('matplotlib.pyplot.show'):
+        with pytest.raises(ValueError, match="The condensed distance matrix must contain only finite values."):
+            plot_heatmap_with_tree(
+                data_with_nan,
+                clustering_method='ward',
+                metric='euclidean',
+                title='Heatmap with Missing Data',
+                xlabel='Samples',
+                ylabel='Genes',
+                cmap='viridis',
+                save=False,
+                render=False
+            )
\ No newline at end of file
diff --git a/tests/test_visual.py b/tests/test_visual.py
deleted file mode 100644
index 833938c..0000000
--- a/tests/test_visual.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import pandas as pd
-import numpy as np
-from sklearn.decomposition import PCA
-import matplotlib.pyplot as plt
-import seaborn as sns
-import networkcommons.visual as visual
-from unittest.mock import patch
-import pytest
-
-
-def test_pca_with_metadata_df():
-    df = pd.DataFrame({
-        'idx': ['A', 'B', 'C'],
-        'feature1': [1, 2, 3],
-        'feature2': [4, 5, 6],
-        'feature3': [7, 8, 9]
-    })
-    metadata_df = pd.DataFrame({
-        'sample': ['A', 'B', 'C'],
-        'group': ['control', 'treated', 'control']
-    })
-    result_df = visual.plot_pca(df, metadata_df)
-    assert isinstance(result_df, pd.DataFrame)
-    assert 'PCA1' in result_df.columns
-    assert 'PCA2' in result_df.columns
-    assert 'group' in result_df.columns
-
-
-def test_pca_with_metadata_array():
-    df = pd.DataFrame({
-        'idx': ['A', 'B', 'C'],
-        'feature1': [1, 2, 3],
-        'feature2': [4, 5, 6],
-        'feature3': [7, 8, 9]
-    })
-    metadata_arr = np.array(['control', 'treated', 'control'])
-    result_df = visual.plot_pca(df, metadata_arr)
-    assert isinstance(result_df, pd.DataFrame)
-    assert 'PCA1' in result_df.columns
-    assert 'PCA2' in result_df.columns
-    assert 'group' in result_df.columns
-
-
-def test_pca_no_numeric_columns():
-    df = pd.DataFrame({'idx': ['A', 'B', 'C']})
-    metadata_df = pd.DataFrame({
-        'sample': ['A', 'B', 'C'],
-        'group': ['control', 'treated', 'control']
-    })
-    try:
-        visual.plot_pca(df, metadata_df)
-    except ValueError as e:
-        assert str(e) == "The dataframe contains no numeric columns suitable for PCA."
-
-
-def test_pca_zero_std_columns():
-    df_with_zero_std = pd.DataFrame({
-        'idx': ['feature1', 'feature2', 'feature3'],
-        'A': [1, 1, 1],
-        'B': [1, 5, 6],
-        'C': [1, 8, 9]
-    })
-    metadata_df = pd.DataFrame({
-        'sample': ['A', 'B', 'C'],
-        'group': ['control', 'treated', 'control']
-    })
-    with patch('builtins.print') as mocked_print:
-        result_df = visual.plot_pca(df_with_zero_std, metadata_df)
-        print(mocked_print.mock_calls)  # Print the captured print calls for debugging
-        mocked_print.assert_any_call("Warning: The following columns have zero standard deviation and will be dropped: ['feature1']")