From 8f761ed6b7c3d93f6b0bd486ffc6d7c8bef9183d Mon Sep 17 00:00:00 2001 From: Virginia Partridge Date: Wed, 1 May 2024 14:45:31 -0400 Subject: [PATCH] Add button to export all clusters as CSV download (#25) * Inital download button, but still bugs * Button to download clusters is only triggered on click, not loads * Changelog notes updated * Added note about Dash version update in changelog * App server not run in debug mode --- CHANGELOG.md | 13 ++++++++++++- app.py | 36 +++++++++++++++++++++++++++++++++--- setup.cfg | 10 +++------- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e489516..d0257a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [2.1.0] ### Changed - Added support for python 3.10 in build and tests -- Made dependency verions less restrictive, except when necessary to avoid deprecations (sklearn, numpy) +- Made dependency versions less restrictive, except when necessary to avoid deprecations (sklearn, numpy) - Unit tests updated to handle sklearn deprecations - Updated prototype cluster browser to display 2023 data +- Upgraded Dash dependency version to >=2.4.1 for the cluster prototype browser app ### Fixed - Upgraded DVC version from 2.10.0 to 3.33.1 to avoid https://github.com/iterative/dvc-objects/issues/241 @@ -20,11 +21,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for processing Reddit comments from manually downloaded archives - Data and models for Reddit comments in 2023 tracked in DVC - Instructions and support for running the prototype cluster browser with gunicorn +- Added button to download all subreddit cluster assignments in prototype cluster browser ### Removed - Removed Unity documentation - Removed argparse from app.py so that it can be served with gunicorn +## [2.1.0] +### Changed +- Update visualizations for WebScience 2024 paper + +### Added +- Added citation information in Readme +- Trigger Zenodo DOI assignment for repository + + ## [2.0.0] ### Changed - Removed prefilled anti-immigrant subreddits selected in subreddit clustering app dropdown. Now the dropdown is initially empty. diff --git a/app.py b/app.py index 11dc5a3..294c6a2 100644 --- a/app.py +++ b/app.py @@ -101,6 +101,13 @@ ] ) +DOWNLOAD_CLUSTER_CSV = dash.html.Div( + [ + dbc.Button("Download Clusters CSV", id="cluster_csv_button", n_clicks=0), + dash.dcc.Download(id="download_cluster_csv"), + ] +) + # First section of page, define KMeans paramters, train model button and metrics values and explanation KMEANS_PARAM_SECTION = [ @@ -138,7 +145,7 @@ ] ), dash.html.Br(), - dbc.Button("Train clustering model", id="clustering_button"), + dbc.Button("Train clustering model", id="clustering_button",), ] ), dash.html.Br(), @@ -149,7 +156,10 @@ dash.dcc.Loading( id="loading-metrics", type="default", - children=[dash.html.Article(id="cluster-metrics")], + children=[ + dash.html.Article(id="cluster-metrics"), + DOWNLOAD_CLUSTER_CSV, + ], ), dash.html.Br(), ] @@ -267,7 +277,7 @@ dash.html.Br(), SUBREDDIT_FILTERING_SECTION, dash.html.Br(), - # Stores the dataframe with cluster assingments and the name of the cluster model (for exporting labels) + # Stores the dataframe with cluster assignments and the name of the cluster model (for exporting labels) dash.dcc.Store(id="cluster-assignment"), # Stores the list of subbreddits available in the c2v model, for user to select in drop down dash.dcc.Store(id="subreddits"), @@ -391,6 +401,7 @@ def load_vector_model(selected_month): dash.State("random-seed", "value"), dash.Input("month-dropdown", "value"), dash.Input("tsne-df", "data"), + running=[(dash.Output("clustering_button", "disabled"), True, False)] ) def train_clusters(n_clicks, n_clusters, random_seed, c2v_identifier, tsne_json_data): """Trains kmeans cluster with given number of clusters and random seed. @@ -625,6 +636,25 @@ def get_display_table( export_format="csv", ) +@app.callback( + dash.Output("download_cluster_csv", "data"), + dash.Input("cluster_csv_button", "n_clicks"), + dash.Input("cluster-assignment", "data"), + prevent_initial_call=True +) +def download_cluster_csv(n_clicks, cluster_json): + trigger = dash.ctx.triggered_id + logger.info("Cluster download triggered by '%s'", trigger) + if trigger == "cluster_csv_button": + logger.info("Cluster download button clicked times: %s", n_clicks) + model_name = cluster_json["name"] + cluster_df = iv.unjsonify_stored_df(cluster_json["clusters"], [model_name]) + cluster_df[CLUSTER_ASSIGNMENT_DISPLAY_NAME] = cluster_df[model_name] + csv_name = f"{model_name}.csv" + logger.info("Downloading clustering data to %s", csv_name) + return dash.dcc.send_data_frame(cluster_df.to_csv, csv_name, index=False) + else: + raise dash.exceptions.PreventUpdate if __name__ == "__main__": print("Starting IHOP subreddit visualization application") diff --git a/setup.cfg b/setup.cfg index e8b4858..531c595 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,13 +25,9 @@ install_requires = [options.extras_require] app = - dash==2.3.1 - dash-bootstrap-components==1.1.0 - dash-core-components==2.0.0 - dash-daq==0.5.0 - dash-html-components==2.0.0 - dash-renderer==1.9.0 - dash-table==5.0.0 + dash>=2.4.0 + dash_bootstrap_components + dash_daq gunicorn matplotlib==3.5.0 plotly==5.6.0