From f0eca7fbe8184e0cc3dac944f2adfb780b1199ed Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev Date: Tue, 1 Oct 2024 18:13:08 +0200 Subject: [PATCH] Add worksheet coloring --- .github/workflows/dtypes_benchmark.yml | 4 ++-- .github/workflows/integration.yml | 2 +- .github/workflows/minimum.yml | 2 +- .github/workflows/unit.yml | 2 +- tests/_external/gdrive_utils.py | 27 ++++++++++++++++++++++-- tests/benchmark/utils.py | 29 +++++++++++++++----------- 6 files changed, 47 insertions(+), 19 deletions(-) diff --git a/.github/workflows/dtypes_benchmark.yml b/.github/workflows/dtypes_benchmark.yml index 18355116c..1fa986691 100644 --- a/.github/workflows/dtypes_benchmark.yml +++ b/.github/workflows/dtypes_benchmark.yml @@ -2,8 +2,6 @@ name: Data Types Benchmark on: push: - branches: - - main jobs: run_dtypes_benchmark: @@ -39,6 +37,8 @@ jobs: run: | invoke benchmark-dtypes + continue-on-error: true + # Upload the json files as artifacts - name: Upload artifacts uses: actions/upload-artifact@v3 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 8ab8aecb0..f02bf417d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: [ '3.8', '3.12'] os: [ubuntu-latest, windows-latest] include: - os: macos-latest diff --git a/.github/workflows/minimum.yml b/.github/workflows/minimum.yml index de032a13b..479a22aca 100644 --- a/.github/workflows/minimum.yml +++ b/.github/workflows/minimum.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: [ '3.8', '3.12'] os: [ubuntu-latest, windows-latest] include: - os: macos-latest diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index 3b4217b1b..c53640243 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: [ '3.8', '3.12'] os: [ubuntu-latest, windows-latest] include: - os: macos-latest diff --git a/tests/_external/gdrive_utils.py b/tests/_external/gdrive_utils.py index 332787b77..5d7b6290b 100644 --- a/tests/_external/gdrive_utils.py +++ b/tests/_external/gdrive_utils.py @@ -101,7 +101,23 @@ def _set_column_width(writer, results, sheet_name): writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width + 2) -def save_to_gdrive(output_folder, results, output_filename=None): +def _set_color_fields(worksheet, data, marked_data, writer, color_code): + for _, row in marked_data.iterrows(): + dtype = row['dtype'] + sdtype = row['sdtype'] + method = row['method'] + + format_code = writer.book.add_format({'bg_color': color_code}) + + for data_row in range(len(data)): + if data.loc[data_row, 'dtype'] == dtype and data.loc[data_row, 'sdtype'] == sdtype: + method_col = data.columns.get_loc(method) + worksheet.write( + data_row + 1, method_col, bool(data.loc[data_row, method]), format_code + ) + + +def save_to_gdrive(output_folder, results, output_filename=None, mark_results=None): """Save a ``DataFrame`` to google drive folder as ``xlsx`` (spreadsheet). Given the output folder id (google drive folder id), store the given ``results`` as @@ -117,6 +133,8 @@ def save_to_gdrive(output_folder, results, output_filename=None): output_filename (str, optional): String representing the filename to be used for the results spreadsheet. If None, uses to the current date and commit as the name. Defaults to None. + mark_results (dict, optional): + A dict mapping to mark the results. Returns: str: @@ -126,11 +144,16 @@ def save_to_gdrive(output_folder, results, output_filename=None): output_filename = _generate_filename() output = io.BytesIO() - with pd.ExcelWriter(output, engine='xlsxwriter') as writer: # pylint: disable=E0110 for sheet_name, data in results.items(): data.to_excel(writer, sheet_name=sheet_name, index=False) _set_column_width(writer, data, sheet_name) + if mark_results: + for color_code, marked_results in mark_results.items(): + marked_data = marked_results[marked_results['python_version'] == sheet_name] + if not marked_data.empty: + worksheet = writer.sheets[sheet_name] + _set_color_fields(worksheet, data, marked_data, writer, color_code) file_config = {'title': output_filename, 'parents': [{'id': output_folder}]} drive = _get_drive_client() diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index 082ff58d1..cc2e1ab5e 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -14,7 +14,7 @@ from tests._external.gdrive_utils import get_latest_file, read_excel, save_to_gdrive from tests._external.slack_utils import post_slack_message -GDRIVE_OUTPUT_FOLDER = '16SkTOyQ3xkJDPJbyZCusb168JwreW5bm' +GDRIVE_OUTPUT_FOLDER = '1tjre6vNnbAv6jyfsF8N8EZfDX7Rx2HCT' PYTHON_VERSION = f'{sys.version_info.major}.{sys.version_info.minor}' TEMPRESULTS = Path(f'results/{sys.version_info.major}.{sys.version_info.minor}.json') @@ -62,20 +62,20 @@ def _get_output_filename(): def compare_previous_result_with_current(): """Compare the previous result with the current and post a message on slack.""" + + new_supported_dtypes = [] + unsupported_dtypes = [] + previously_unseen_dtypes = [] for result in Path('results/').rglob('*.json'): python_version = result.stem current_results = _load_temp_results(result) csv_output = Path(f'results/{python_version}.csv') current_results.to_csv(csv_output, index=False) - new_supported_dtypes = [] - unsupported_dtypes = [] - previously_unseen_dtypes = [] - for index, row in current_results.iterrows(): dtype = row['dtype'] sdtype = row['sdtype'] - for col in current_results.columns[1:]: + for col in current_results.columns[2:]: current_value = row[col] stored_value, previously_seen = get_previous_dtype_result( dtype, @@ -135,7 +135,6 @@ def save_results_to_json(results, filename=None): Defaults to `None`. """ filename = filename or TEMPRESULTS - if os.path.exists(filename): with open(filename, 'r') as file: try: @@ -150,10 +149,14 @@ def save_results_to_json(results, filename=None): def calculate_support_percentage(df): """Calculate the percentage of supported features (True) for each dtype in a DataFrame.""" - feature_columns = df.drop(columns=['dtype']) + feature_columns = df.drop(columns=['dtype', 'sdtype']) # Calculate percentage of TRUE values for each row (dtype) percentage_support = feature_columns.mean(axis=1) * 100 - return pd.DataFrame({'dtype': df['dtype'], 'percentage_supported': percentage_support}) + return pd.DataFrame({ + 'dtype': df['dtype'], + 'sdtype': df['sdtype'], + 'percentage_supported': percentage_support, + }) def compare_and_store_results_in_gdrive(): @@ -164,13 +167,16 @@ def compare_and_store_results_in_gdrive(): sorted_results = {} slack_messages = [] + mark_results = {} for key, value in comparison_results.items(): if not value.empty: sorted_results[key] = value if key == 'unsupported_dtypes': slack_messages.append(':fire: New unsupported DTypes!') + mark_results['#EB9999'] = value elif key == 'new_supported_dtypes': slack_messages.append(':party_blob: New DTypes supported!') + mark_results['#B7D7A8'] = value if len(slack_messages) == 0: slack_messages.append(':dealwithit: No new changes to the DTypes in SDV.') @@ -178,13 +184,12 @@ def compare_and_store_results_in_gdrive(): for key, value in results.items(): sorted_results[key] = value - file_id = save_to_gdrive(GDRIVE_OUTPUT_FOLDER, sorted_results) - + file_id = save_to_gdrive(GDRIVE_OUTPUT_FOLDER, sorted_results, mark_results=mark_results) slack_messages.append( f'See ' ) slack_message = '\n'.join(slack_messages) - post_slack_message('sdv-alerts', slack_message) + post_slack_message('sdv-alerts-debug', slack_message) if __name__ == '__main__':