From 7d4c4290fae5ac1993500a434594af76688ceb1c Mon Sep 17 00:00:00 2001 From: Olga Ivanova Date: Tue, 10 Sep 2024 17:32:57 +0200 Subject: [PATCH] improve test coverage, blocked matplotlib def render under github action --- tests/test_rnaseq.py | 193 ++++++++++++++++++++++++++++++++++++++----- tests/test_visual.py | 70 ---------------- 2 files changed, 171 insertions(+), 92 deletions(-) delete mode 100644 tests/test_visual.py diff --git a/tests/test_rnaseq.py b/tests/test_rnaseq.py index ab9ff69..82e0ef7 100644 --- a/tests/test_rnaseq.py +++ b/tests/test_rnaseq.py @@ -2,6 +2,7 @@ import pandas as pd import numpy as np from unittest.mock import patch +import matplotlib import matplotlib.pyplot as plt from networkcommons.visual import (plot_density, build_volcano_plot, @@ -9,6 +10,9 @@ plot_pca, plot_heatmap_with_tree) +# Set the matplotlib backend to 'Agg' for headless environments (like GitHub Actions) +matplotlib.use('Agg') + @pytest.fixture def example_dataframe(): @@ -32,35 +36,57 @@ def metadata_dataframe(): return pd.DataFrame(metadata) -def test_plot_density(): - """Test the plot_density function with valid data.""" +### TESTING PLOT_DENSITY ### - # Create a sample dataframe with enough data points +def test_plot_density_valid_data(): + """Test plot_density with valid data.""" example_dataframe = pd.DataFrame({ 'idx': ['gene_1', 'gene_2', 'gene_3'], 'sample_1': [10, 15, 5], 'sample_2': [20, 18, 9], 'sample_3': [12, 22, 8], - 'sample_4': [14, 19, 7] # Adding more samples to ensure enough data points + 'sample_4': [14, 19, 7] }) - # Create metadata for grouping metadata_dataframe = pd.DataFrame({ 'sample_ID': ['sample_1', 'sample_2', 'sample_3', 'sample_4'], 'group': ['control', 'treated', 'control', 'treated'] }) - gene_ids = ['gene_1', 'gene_2'] # Make sure this has genes present in the dataframe + gene_ids = ['gene_1', 'gene_2'] - # Mock plt.show to avoid blocking during the test with patch('matplotlib.pyplot.show'): plot_density(example_dataframe, gene_ids, metadata_dataframe) - # Assert if the plot was created by checking the number of axes - assert len(plt.gcf().get_axes()) == 2 # Should have 2 subplots for 2 genes + assert len(plt.gcf().get_axes()) == 2 + + +def test_plot_density_missing_data(): + """Test plot_density with missing values in the data.""" + example_dataframe = pd.DataFrame({ + 'idx': ['gene_1', 'gene_2', 'gene_3'], + 'sample_1': [10, np.nan, 5], + 'sample_2': [20, 18, 9], + 'sample_3': [12, 22, np.nan], + 'sample_4': [14, 19, 7] + }) + + metadata_dataframe = pd.DataFrame({ + 'sample_ID': ['sample_1', 'sample_2', 'sample_3', 'sample_4'], + 'group': ['control', 'treated', 'control', 'treated'] + }) + + gene_ids = ['gene_1', 'gene_2'] -def test_build_volcano_plot(): - """Test the build_volcano_plot function.""" + with patch('matplotlib.pyplot.show'): + # Since missing data is present, we catch any errors that occur when plotting + with pytest.raises(ValueError, match="`dataset` input should have multiple elements"): + plot_density(example_dataframe, gene_ids, metadata_dataframe) + +### TESTING BUILD_VOLCANO_PLOT ### + +def test_build_volcano_plot_valid_data(): + """Test the build_volcano_plot function with valid data.""" data = pd.DataFrame({ 'log2FoldChange': [1.5, -2.0, 0.5, -0.3], 'pvalue': [0.01, 0.04, 0.20, 0.05] @@ -69,12 +95,39 @@ def test_build_volcano_plot(): with patch('matplotlib.pyplot.show'): build_volcano_plot(data) - # Assert if the plot was created assert len(plt.gcf().get_axes()) == 1 # Should have one main axis for the volcano plot -def test_build_ma_plot(): - """Test the build_ma_plot function.""" +def test_build_volcano_plot_empty_data(): + """Test build_volcano_plot with an empty dataframe.""" + data = pd.DataFrame({ + 'log2FoldChange': [], + 'pvalue': [] + }) + + with patch('matplotlib.pyplot.show'): + build_volcano_plot(data) + + assert len(plt.gcf().get_axes()) == 1 # The plot should still exist even if empty + + +def test_build_volcano_plot_edge_cases(): + """Test build_volcano_plot with edge cases (e.g., very high/low p-values).""" + data = pd.DataFrame({ + 'log2FoldChange': [1.5, -2.0, 0.5, -0.3], + 'pvalue': [1e-300, 1e-10, 1, 0.9999] # Extremely high and low p-values + }) + + with patch('matplotlib.pyplot.show'): + build_volcano_plot(data) + + assert len(plt.gcf().get_axes()) == 1 + + +### TESTING BUILD_MA_PLOT ### + +def test_build_ma_plot_valid_data(): + """Test build_ma_plot with valid data.""" data = pd.DataFrame({ 'log2FoldChange': [1.5, -2.0, 0.5, -0.3], 'meanExpression': [10, 15, 20, 25] @@ -83,21 +136,71 @@ def test_build_ma_plot(): with patch('matplotlib.pyplot.show'): build_ma_plot(data, log2fc='log2FoldChange', mean_exp='meanExpression') - # Assert if the plot was created assert len(plt.gcf().get_axes()) == 1 # Should have one main axis for the MA plot -def test_plot_pca(example_dataframe, metadata_dataframe): +def test_build_ma_plot_empty_data(): + """Test build_ma_plot with an empty dataframe.""" + data = pd.DataFrame({ + 'log2FoldChange': [], + 'meanExpression': [] + }) + + with patch('matplotlib.pyplot.show'): + build_ma_plot(data, log2fc='log2FoldChange', mean_exp='meanExpression') + + assert len(plt.gcf().get_axes()) == 1 # Plot should still exist + + +### TESTING PLOT_PCA ### + +def test_plot_pca_valid_data(example_dataframe, metadata_dataframe): """Test the plot_pca function.""" with patch('matplotlib.pyplot.show'): pca_df = plot_pca(example_dataframe, metadata_dataframe) - # Assert that the returned dataframe has the correct shape assert pca_df.shape[1] == 3 # Expecting PCA1, PCA2, and 'group' columns -def test_build_heatmap_with_tree(): - """Test the build_heatmap_with_tree function.""" +def test_plot_pca_empty_data(): + """Test plot_pca with empty dataframe.""" + empty_dataframe = pd.DataFrame({ + 'idx': ['gene_1', 'gene_2', 'gene_3'] + }) + + metadata = pd.DataFrame({ + 'sample_ID': [], + 'group': [] + }) + + with patch('matplotlib.pyplot.show'): + with pytest.raises(ValueError, match="The dataframe contains no numeric columns suitable for PCA."): + plot_pca(empty_dataframe, metadata) + + +def test_plot_pca_missing_data(): + """Test plot_pca with missing data.""" + example_dataframe = pd.DataFrame({ + 'idx': ['gene_1', 'gene_2', 'gene_3'], + 'sample_1': [10, np.nan, 5], + 'sample_2': [20, 18, np.nan], + 'sample_3': [np.nan, 22, 8] + }) + + metadata = pd.DataFrame({ + 'sample_ID': ['sample_1', 'sample_2', 'sample_3'], + 'group': ['control', 'treated', 'control'] + }) + + with patch('matplotlib.pyplot.show'): + with pytest.raises(ValueError, match="The dataframe contains no numeric columns suitable for PCA."): + plot_pca(example_dataframe, metadata) + + +### TESTING PLOT_HEATMAP_WITH_TREE ### + +def test_plot_heatmap_with_tree_valid_data(): + """Test the plot_heatmap_with_tree function with valid data.""" data = pd.DataFrame({ 'gene_1': [2.3, -1.1, 0.4], 'gene_2': [1.2, 0.5, -0.7], @@ -117,6 +220,52 @@ def test_build_heatmap_with_tree(): render=False ) - # Assert if the figure was created and contains an axes object - assert isinstance(fig, plt.Figure) # Check if the returned object is a matplotlib Figure - assert len(fig.get_axes()) > 0 # Assert that axes were created in the figure \ No newline at end of file + assert isinstance(fig, plt.Figure) + assert len(fig.get_axes()) > 0 # Check that axes exist in the figure + + +def test_plot_heatmap_with_tree_empty_data(): + """Test the plot_heatmap_with_tree function with empty dataframe.""" + empty_data = pd.DataFrame({ + 'gene_1': [], + 'gene_2': [], + 'gene_3': [] + }, index=[]) + + with patch('matplotlib.pyplot.show'): + with pytest.raises(ValueError, + match="The number of observations cannot be determined on an empty distance matrix."): + plot_heatmap_with_tree( + empty_data, + clustering_method='ward', + metric='euclidean', + title='Empty Heatmap', + xlabel='Samples', + ylabel='Genes', + cmap='viridis', + save=False, + render=False + ) + + +def test_plot_heatmap_with_tree_missing_data(): + """Test the plot_heatmap_with_tree function with missing data (NaN values).""" + data_with_nan = pd.DataFrame({ + 'gene_1': [2.3, np.nan, 0.4], + 'gene_2': [1.2, 0.5, -0.7], + 'gene_3': [3.1, 0.9, np.nan] + }, index=['condition_1', 'condition_2', 'condition_3']) + + with patch('matplotlib.pyplot.show'): + with pytest.raises(ValueError, match="The condensed distance matrix must contain only finite values."): + plot_heatmap_with_tree( + data_with_nan, + clustering_method='ward', + metric='euclidean', + title='Heatmap with Missing Data', + xlabel='Samples', + ylabel='Genes', + cmap='viridis', + save=False, + render=False + ) \ No newline at end of file diff --git a/tests/test_visual.py b/tests/test_visual.py deleted file mode 100644 index 833938c..0000000 --- a/tests/test_visual.py +++ /dev/null @@ -1,70 +0,0 @@ -import pandas as pd -import numpy as np -from sklearn.decomposition import PCA -import matplotlib.pyplot as plt -import seaborn as sns -import networkcommons.visual as visual -from unittest.mock import patch -import pytest - - -def test_pca_with_metadata_df(): - df = pd.DataFrame({ - 'idx': ['A', 'B', 'C'], - 'feature1': [1, 2, 3], - 'feature2': [4, 5, 6], - 'feature3': [7, 8, 9] - }) - metadata_df = pd.DataFrame({ - 'sample': ['A', 'B', 'C'], - 'group': ['control', 'treated', 'control'] - }) - result_df = visual.plot_pca(df, metadata_df) - assert isinstance(result_df, pd.DataFrame) - assert 'PCA1' in result_df.columns - assert 'PCA2' in result_df.columns - assert 'group' in result_df.columns - - -def test_pca_with_metadata_array(): - df = pd.DataFrame({ - 'idx': ['A', 'B', 'C'], - 'feature1': [1, 2, 3], - 'feature2': [4, 5, 6], - 'feature3': [7, 8, 9] - }) - metadata_arr = np.array(['control', 'treated', 'control']) - result_df = visual.plot_pca(df, metadata_arr) - assert isinstance(result_df, pd.DataFrame) - assert 'PCA1' in result_df.columns - assert 'PCA2' in result_df.columns - assert 'group' in result_df.columns - - -def test_pca_no_numeric_columns(): - df = pd.DataFrame({'idx': ['A', 'B', 'C']}) - metadata_df = pd.DataFrame({ - 'sample': ['A', 'B', 'C'], - 'group': ['control', 'treated', 'control'] - }) - try: - visual.plot_pca(df, metadata_df) - except ValueError as e: - assert str(e) == "The dataframe contains no numeric columns suitable for PCA." - - -def test_pca_zero_std_columns(): - df_with_zero_std = pd.DataFrame({ - 'idx': ['feature1', 'feature2', 'feature3'], - 'A': [1, 1, 1], - 'B': [1, 5, 6], - 'C': [1, 8, 9] - }) - metadata_df = pd.DataFrame({ - 'sample': ['A', 'B', 'C'], - 'group': ['control', 'treated', 'control'] - }) - with patch('builtins.print') as mocked_print: - result_df = visual.plot_pca(df_with_zero_std, metadata_df) - print(mocked_print.mock_calls) # Print the captured print calls for debugging - mocked_print.assert_any_call("Warning: The following columns have zero standard deviation and will be dropped: ['feature1']")