Skip to content

Commit

Permalink
WIP: Test worksheet coloring
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer committed Oct 1, 2024
1 parent 6d133f2 commit 41b0735
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 13 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/dtypes_benchmark.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
name: Data Types Benchmark

on:
push:
branches:
- main
workflow_dispatch:

jobs:
run_dtypes_benchmark:
Expand Down Expand Up @@ -39,6 +37,8 @@ jobs:
run: |
invoke benchmark-dtypes
continue-on-error: true

# Upload the json files as artifacts
- name: Upload artifacts
uses: actions/upload-artifact@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/minimum.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
33 changes: 32 additions & 1 deletion tests/_external/gdrive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,29 @@ def _set_column_width(writer, results, sheet_name):
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width + 2)


def save_to_gdrive(output_folder, results, output_filename=None):
def _set_color_fields(worksheet, data, marked_data, writer, color_code):
for _, row in marked_data.iterrows():
dtype = row['dtype']
sdtype = row['sdtype']
method = row['method']

# Find matching rows in the main data
for data_row in range(len(data)):
if data.at[data_row, 'dtype'] == dtype and data.at[data_row, 'sdtype'] == sdtype:
dtype_col = data.columns.get_loc('dtype')
sdtype_col = data.columns.get_loc('sdtype')
method_col = data.columns.get_loc(method)

# Define formatting for marked cells
format_code = writer.book.add_format({'bg_color': color_code})

# Apply formatting to the relevant cells
worksheet.write(data_row + 1, dtype_col, data.at[data_row, 'dtype'], format_code)
worksheet.write(data_row + 1, sdtype_col, data.at[data_row, 'sdtype'], format_code)
worksheet.write(data_row + 1, method_col, data.at[data_row, 'method'], format_code)


def save_to_gdrive(output_folder, results, output_filename=None, mark_results=None):
"""Save a ``DataFrame`` to google drive folder as ``xlsx`` (spreadsheet).
Given the output folder id (google drive folder id), store the given ``results`` as
Expand All @@ -117,6 +139,8 @@ def save_to_gdrive(output_folder, results, output_filename=None):
output_filename (str, optional):
String representing the filename to be used for the results spreadsheet. If None,
uses to the current date and commit as the name. Defaults to None.
mark_results (dict, optional):
A dict mapping to mark the results.
Returns:
str:
Expand All @@ -131,6 +155,13 @@ def save_to_gdrive(output_folder, results, output_filename=None):
for sheet_name, data in results.items():
data.to_excel(writer, sheet_name=sheet_name, index=False)
_set_column_width(writer, data, sheet_name)
if mark_results:
for color_code, marked_results in mark_results.items():
marked_data = mark_results[mark_results['python_version'] == sheet_name]
if not marked_data.empty:
worksheet = writer.sheets[sheet_name]
# Apply the marking using the extracted method
_set_color_fields(worksheet, data, marked_data, writer, color_code)

file_config = {'title': output_filename, 'parents': [{'id': output_folder}]}
drive = _get_drive_client()
Expand Down
19 changes: 13 additions & 6 deletions tests/benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from tests._external.gdrive_utils import get_latest_file, read_excel, save_to_gdrive
from tests._external.slack_utils import post_slack_message

GDRIVE_OUTPUT_FOLDER = '16SkTOyQ3xkJDPJbyZCusb168JwreW5bm'
GDRIVE_OUTPUT_FOLDER = '1tjre6vNnbAv6jyfsF8N8EZfDX7Rx2HCT'
PYTHON_VERSION = f'{sys.version_info.major}.{sys.version_info.minor}'
TEMPRESULTS = Path(f'results/{sys.version_info.major}.{sys.version_info.minor}.json')

Expand Down Expand Up @@ -135,7 +135,6 @@ def save_results_to_json(results, filename=None):
Defaults to `None`.
"""
filename = filename or TEMPRESULTS

if os.path.exists(filename):
with open(filename, 'r') as file:
try:
Expand All @@ -150,10 +149,14 @@ def save_results_to_json(results, filename=None):

def calculate_support_percentage(df):
"""Calculate the percentage of supported features (True) for each dtype in a DataFrame."""
feature_columns = df.drop(columns=['dtype'])
feature_columns = df.drop(columns=['dtype', 'sdtype'])
# Calculate percentage of TRUE values for each row (dtype)
percentage_support = feature_columns.mean(axis=1) * 100
return pd.DataFrame({'dtype': df['dtype'], 'percentage_supported': percentage_support})
return pd.DataFrame({
'dtype': df['dtype'],
'sdtype': df['sdtype'],
'percentage_supported': percentage_support
})


def compare_and_store_results_in_gdrive():
Expand All @@ -164,13 +167,18 @@ def compare_and_store_results_in_gdrive():
sorted_results = {}

slack_messages = []
mark_results = {}
for key, value in comparison_results.items():
if not value.empty:
sorted_results[key] = value
if key == 'unsupported_dtypes':
slack_messages.append(':fire: New unsupported DTypes!')
# Red Color
mark_results['#EB9999'] = value
elif key == 'new_supported_dtypes':
slack_messages.append(':party_blob: New DTypes supported!')
# Green Color
mark_results['#B7D7A8'] = value

if len(slack_messages) == 0:
slack_messages.append(':dealwithit: No new changes to the DTypes in SDV.')
Expand All @@ -179,12 +187,11 @@ def compare_and_store_results_in_gdrive():
sorted_results[key] = value

file_id = save_to_gdrive(GDRIVE_OUTPUT_FOLDER, sorted_results)

slack_messages.append(
f'See <https://docs.google.com/spreadsheets/d/{file_id}|dtypes summary and details>'
)
slack_message = '\n'.join(slack_messages)
post_slack_message('sdv-alerts', slack_message)
post_slack_message('sdv-alerts-debug', slack_message)


if __name__ == '__main__':
Expand Down

0 comments on commit 41b0735

Please sign in to comment.