Add new target application (#50)

Use plotly Pin shiny versions
posit-dev · Mar 20, 2024 · cc06464 · cc06464
1 parent aeb824b
commit cc06464
Show file tree

Hide file tree

Showing 5 changed files with 1,209 additions and 0 deletions.
diff --git a/apps/target-app/app.py b/apps/target-app/app.py
@@ -0,0 +1,130 @@
+from shiny.express import ui, input, render
+from shiny import render_plot, req, reactive
+import pandas as pd
+from pathlib import Path
+from plots import (
+    plot_score_distribution,
+    plot_auc_curve,
+    plot_precision_recall_curve,
+    plot_api_response,
+)
+import faicons as fa
+import io
+from shinywidgets import render_plotly
+
+file_path = Path(__file__).parent / "simulated-data.csv"
+
+
+@reactive.file_reader(file_path, interval_secs=0.2)
+def df():
+    out = pd.read_csv(file_path)
+    out["date"] = pd.to_datetime(out["date"], errors="coerce")
+    return out
+
+
+with ui.sidebar():
+    ui.input_select(
+        "account",
+        "Account",
+        choices=[
+            "Berge & Berge",
+            "Fritsch & Fritsch",
+            "Hintz & Hintz",
+            "Mosciski and Sons",
+            "Wolff Ltd",
+        ],
+    )
+    with ui.panel_conditional("input.tabs !== 'Training Dashboard'"):
+        ui.input_date_range(
+            "dates",
+            "Dates",
+            start="2023-01-01",
+            end="2023-04-01",
+        )
+        ui.input_numeric("sample", "Sample Size", value=10000, step=5000)
+
+
+@reactive.Calc
+def sampled_data() -> pd.DataFrame:
+    start_date, end_date = input.dates()
+    start_date = pd.to_datetime(start_date)
+    end_date = pd.to_datetime(end_date)
+    df_value = df()
+    out = df_value[
+        (df_value["date"] > start_date) & (df_value["date"] <= end_date)
+    ].sample(n=input.sample(), replace=True)
+    return out
+
+
+@reactive.Calc()
+def filtered_data() -> pd.DataFrame:
+    sample_df = sampled_data()
+    sample_df = sample_df.loc[sample_df["account"] == input.account()]
+    return sample_df.reset_index(drop=True)
+
+
+with ui.navset_bar(id="tabs", title="Monitoring"):
+    with ui.nav_panel("Training Dashboard"):
+        with ui.layout_columns():
+            with ui.card():
+                ui.card_header("Model Metrics")
+
+                @render_plotly
+                def metric():
+                    df_value = df()
+                    df_filtered = df_value[df_value["account"] == input.account()]
+                    if input.metric() == "ROC Curve":
+                        return plot_auc_curve(
+                            df_filtered, "is_electronics", "training_score"
+                        )
+                    else:
+                        return plot_precision_recall_curve(
+                            df_filtered, "is_electronics", "training_score"
+                        )
+
+                ui.input_select(
+                    "metric",
+                    "Metric",
+                    choices=["ROC Curve", "Precision-Recall"],
+                )
+            with ui.card():
+                ui.card_header("Training Scores")
+
+                @render_plotly
+                def score_dist():
+                    df_value = df()
+                    df_filtered = df_value[df_value["account"] == input.account()]
+                    return plot_score_distribution(df_filtered)
+
+        with ui.card(full_screen=True):
+            with ui.card_header():
+                "Data"
+            with ui.popover(title="Download"):
+                fa.icon_svg("download")
+
+                @render.download()
+                def download_data(filename="scores_data.csv"):
+                    with io.BytesIO() as buf:
+                        filtered_data().to_csv(buf, index=False)
+                        buf.seek(0)
+                        yield buf.getvalue()
+
+            @render.data_frame
+            def data_output():
+                return filtered_data().drop(columns=["text"])
+
+    with ui.nav_panel("Model Monitoring"):
+        with ui.layout_columns():
+            with ui.card():
+                ui.card_header("API Response Time")
+
+                @render_plotly
+                def api_response():
+                    return plot_api_response(filtered_data())
+
+            with ui.card():
+                ui.card_header("Production Scores")
+
+                @render_plotly
+                def prod_score_dist():
+                    return plot_score_distribution(filtered_data())
diff --git a/apps/target-app/plots.py b/apps/target-app/plots.py
@@ -0,0 +1,61 @@
+import plotly.express as px
+from pandas import DataFrame
+import pandas as pd
+from sklearn.metrics import roc_curve, auc, precision_recall_curve
+import numpy as np
+
+import plotly.io as pio
+
+# Set the default plotly theme to resemble ggplot's theme_light
+pio.templates.default = "plotly_white"
+
+
+def plot_score_distribution(df: DataFrame):
+    fig = px.histogram(df, x="training_score", nbins=50, title="Model scores")
+    fig.update_layout(xaxis_title="Score", yaxis_title="Density")
+    return fig
+
+
+def plot_auc_curve(df: DataFrame, true_col: str, pred_col: str):
+    fpr, tpr, _ = roc_curve(df[true_col], df[pred_col])
+    roc_auc = auc(fpr, tpr)
+
+    roc_df = DataFrame({"False Positive Rate": fpr, "True Positive Rate": tpr})
+
+    fig = px.line(
+        roc_df,
+        x="False Positive Rate",
+        y="True Positive Rate",
+        title=f"Receiver Operating Characteristic (ROC) - AUC: {roc_auc.round(2)}",
+        labels={
+            "False Positive Rate": "False Positive Rate",
+            "True Positive Rate": "True Positive Rate",
+        },
+    )
+    fig.add_shape(type="line", line=dict(dash="dash"), x0=0, x1=1, y0=0, y1=1)
+    return fig
+
+
+def plot_precision_recall_curve(df: DataFrame, true_col: str, pred_col: str):
+    precision, recall, _ = precision_recall_curve(df[true_col], df[pred_col])
+
+    pr_df = DataFrame({"Recall": recall, "Precision": precision})
+
+    fig = px.line(
+        pr_df,
+        x="Recall",
+        y="Precision",
+        title="Precision-Recall Curve",
+        labels={"Recall": "Recall", "Precision": "Precision"},
+    )
+    return fig
+
+
+def plot_api_response(df):
+    account = df["account"].unique()
+
+    data = np.random.lognormal(0, 1 / len(account), 10000)
+    df = pd.DataFrame({"Value": data})
+    fig = px.histogram(df, x="Value", nbins=50, title="API response time")
+    fig.update_layout(xaxis_title="Seconds", yaxis_title="Density")
+    return fig
diff --git a/apps/target-app/requirements.txt b/apps/target-app/requirements.txt
@@ -0,0 +1,6 @@
+shiny==0.8.1
+pandas
+plotly
+shinywidgets==0.3.1
+scikit-learn
+faicons
diff --git a/apps/target-app/rsconnect-python/target-app.json b/apps/target-app/rsconnect-python/target-app.json
@@ -0,0 +1,11 @@
+{
+    "https://api.shinyapps.io": {
+        "server_url": "https://api.shinyapps.io",
+        "filename": "/Users/gordon/Documents/presentations/shiny-python-workshop-2023/apps/target-app",
+        "app_url": "https://gordonposit.shinyapps.io/target-app/",
+        "app_id": 11534265,
+        "app_guid": null,
+        "title": "target-app",
+        "app_mode": "python-shiny"
+    }
+}