Skip to content

Commit

Permalink
Add worksheet coloring
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer committed Oct 3, 2024
1 parent c72c87f commit f0eca7f
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 19 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/dtypes_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: Data Types Benchmark

on:
push:
branches:
- main

jobs:
run_dtypes_benchmark:
Expand Down Expand Up @@ -39,6 +37,8 @@ jobs:
run: |
invoke benchmark-dtypes
continue-on-error: true

# Upload the json files as artifacts
- name: Upload artifacts
uses: actions/upload-artifact@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/minimum.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.8', '3.12']
os: [ubuntu-latest, windows-latest]
include:
- os: macos-latest
Expand Down
27 changes: 25 additions & 2 deletions tests/_external/gdrive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,23 @@ def _set_column_width(writer, results, sheet_name):
writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width + 2)


def save_to_gdrive(output_folder, results, output_filename=None):
def _set_color_fields(worksheet, data, marked_data, writer, color_code):
for _, row in marked_data.iterrows():
dtype = row['dtype']
sdtype = row['sdtype']
method = row['method']

format_code = writer.book.add_format({'bg_color': color_code})

for data_row in range(len(data)):
if data.loc[data_row, 'dtype'] == dtype and data.loc[data_row, 'sdtype'] == sdtype:
method_col = data.columns.get_loc(method)
worksheet.write(
data_row + 1, method_col, bool(data.loc[data_row, method]), format_code
)


def save_to_gdrive(output_folder, results, output_filename=None, mark_results=None):
"""Save a ``DataFrame`` to google drive folder as ``xlsx`` (spreadsheet).
Given the output folder id (google drive folder id), store the given ``results`` as
Expand All @@ -117,6 +133,8 @@ def save_to_gdrive(output_folder, results, output_filename=None):
output_filename (str, optional):
String representing the filename to be used for the results spreadsheet. If None,
uses to the current date and commit as the name. Defaults to None.
mark_results (dict, optional):
A dict mapping to mark the results.
Returns:
str:
Expand All @@ -126,11 +144,16 @@ def save_to_gdrive(output_folder, results, output_filename=None):
output_filename = _generate_filename()

output = io.BytesIO()

with pd.ExcelWriter(output, engine='xlsxwriter') as writer: # pylint: disable=E0110
for sheet_name, data in results.items():
data.to_excel(writer, sheet_name=sheet_name, index=False)
_set_column_width(writer, data, sheet_name)
if mark_results:
for color_code, marked_results in mark_results.items():
marked_data = marked_results[marked_results['python_version'] == sheet_name]
if not marked_data.empty:
worksheet = writer.sheets[sheet_name]
_set_color_fields(worksheet, data, marked_data, writer, color_code)

file_config = {'title': output_filename, 'parents': [{'id': output_folder}]}
drive = _get_drive_client()
Expand Down
29 changes: 17 additions & 12 deletions tests/benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from tests._external.gdrive_utils import get_latest_file, read_excel, save_to_gdrive
from tests._external.slack_utils import post_slack_message

GDRIVE_OUTPUT_FOLDER = '16SkTOyQ3xkJDPJbyZCusb168JwreW5bm'
GDRIVE_OUTPUT_FOLDER = '1tjre6vNnbAv6jyfsF8N8EZfDX7Rx2HCT'
PYTHON_VERSION = f'{sys.version_info.major}.{sys.version_info.minor}'
TEMPRESULTS = Path(f'results/{sys.version_info.major}.{sys.version_info.minor}.json')

Expand Down Expand Up @@ -62,20 +62,20 @@ def _get_output_filename():

def compare_previous_result_with_current():
"""Compare the previous result with the current and post a message on slack."""

new_supported_dtypes = []
unsupported_dtypes = []
previously_unseen_dtypes = []
for result in Path('results/').rglob('*.json'):
python_version = result.stem
current_results = _load_temp_results(result)
csv_output = Path(f'results/{python_version}.csv')
current_results.to_csv(csv_output, index=False)

new_supported_dtypes = []
unsupported_dtypes = []
previously_unseen_dtypes = []

for index, row in current_results.iterrows():
dtype = row['dtype']
sdtype = row['sdtype']
for col in current_results.columns[1:]:
for col in current_results.columns[2:]:
current_value = row[col]
stored_value, previously_seen = get_previous_dtype_result(
dtype,
Expand Down Expand Up @@ -135,7 +135,6 @@ def save_results_to_json(results, filename=None):
Defaults to `None`.
"""
filename = filename or TEMPRESULTS

if os.path.exists(filename):
with open(filename, 'r') as file:
try:
Expand All @@ -150,10 +149,14 @@ def save_results_to_json(results, filename=None):

def calculate_support_percentage(df):
"""Calculate the percentage of supported features (True) for each dtype in a DataFrame."""
feature_columns = df.drop(columns=['dtype'])
feature_columns = df.drop(columns=['dtype', 'sdtype'])
# Calculate percentage of TRUE values for each row (dtype)
percentage_support = feature_columns.mean(axis=1) * 100
return pd.DataFrame({'dtype': df['dtype'], 'percentage_supported': percentage_support})
return pd.DataFrame({
'dtype': df['dtype'],
'sdtype': df['sdtype'],
'percentage_supported': percentage_support,
})


def compare_and_store_results_in_gdrive():
Expand All @@ -164,27 +167,29 @@ def compare_and_store_results_in_gdrive():
sorted_results = {}

slack_messages = []
mark_results = {}
for key, value in comparison_results.items():
if not value.empty:
sorted_results[key] = value
if key == 'unsupported_dtypes':
slack_messages.append(':fire: New unsupported DTypes!')
mark_results['#EB9999'] = value
elif key == 'new_supported_dtypes':
slack_messages.append(':party_blob: New DTypes supported!')
mark_results['#B7D7A8'] = value

if len(slack_messages) == 0:
slack_messages.append(':dealwithit: No new changes to the DTypes in SDV.')

for key, value in results.items():
sorted_results[key] = value

file_id = save_to_gdrive(GDRIVE_OUTPUT_FOLDER, sorted_results)

file_id = save_to_gdrive(GDRIVE_OUTPUT_FOLDER, sorted_results, mark_results=mark_results)
slack_messages.append(
f'See <https://docs.google.com/spreadsheets/d/{file_id}|dtypes summary and details>'
)
slack_message = '\n'.join(slack_messages)
post_slack_message('sdv-alerts', slack_message)
post_slack_message('sdv-alerts-debug', slack_message)


if __name__ == '__main__':
Expand Down

0 comments on commit f0eca7f

Please sign in to comment.