From 32bf54975527632409a918f58af5cde67d432b40 Mon Sep 17 00:00:00 2001 From: Christian Dalager Date: Wed, 9 Oct 2024 07:59:37 +0200 Subject: [PATCH 1/3] Ignore testsheets and *.xlsx.png plots --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f93b3ce..0985d4f 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,5 @@ cython_debug/ #.idea/ venv/Lib/site-packages venv +*.xlsx +*.xlsx.png From 9ea869d3f1221f419a3dcd3bade7c8d8f0e10f08 Mon Sep 17 00:00:00 2001 From: Christian Dalager Date: Wed, 9 Oct 2024 08:09:06 +0200 Subject: [PATCH 2/3] Readme --- README.md | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 15ad9ac..f78cd27 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ Tool to analyze and visualize dependencies between cells in Excel spreadsheets i Will generate a graph of the dependencies between cells in an Excel spreadsheet. Data extracted with `openpyxl` (), the graph is generated with the `networkx` library () and is visualized using `matplotlib`. -This is a simple tool and maybe even naïve in its approach - it was hacked together in two evenings and would benefit from some refactoring and more features. It is meant as a starting point for further development.
## Definitions @@ -39,10 +38,6 @@ graph TD ``` -The way the graph is built is by iterating over all cells in the spreadsheet and extracting the references in the formula of each cell. The references are then added as edges in the graph. - -A cell within a range is considered a dependency of the range itself, but not of the other cells in the range. - ## Installation from pypi package PyPi project: [graphedexcel](https://pypi.org/project/graphedexcel/) @@ -66,13 +61,11 @@ pip install -e . python -m graphedexcel [--verbose] [--no-visualize] [--keep-direction] [--open-image] ``` -Depending on the size of the spreadsheet you might want to adjust the plot configuration in the code to to make the graph more readable (remove labels, decrease widths and sizes etc) - -In [graph_visualizer.py](src/graph_visualizer.py) you will find three configuration for small, medium and large graphs. You can adjust the configuration to your needs. +Depending on the size of the spreadsheet you might want to adjust the plot configuration in the code to to make the graph more readable (remove labels, decrease widths and sizes etc) - you can find the configuration in [graph_visualizer.py](src/graphedexcel/graph_visualizer.py) with settings for small, medium and large graphs. You can adjust the configuration to your needs - but this only working if you run from source. ### Arguments -`--verbose` will dump formula cell contents during (more quiet) +`--verbose` will dump formula cell contents during (more noisy) `--no-visualize` will skip the visualization step and only print the summary (faster) @@ -82,7 +75,7 @@ In [graph_visualizer.py](src/graph_visualizer.py) you will find three configurat ## Sample output -The following is the output of running the script on the provided `docs/Book1.xlsx` file. +The following is the output of running the script on the sample `docs/Book1.xlsx` file. ```bash === Dependency Graph Summary === @@ -114,14 +107,16 @@ Graph visualization saved to images/.\Book1.xlsx.png ## Sample plot -More in `/images` folder. +More in `docs/images` folder. ![Sample graph](docs/images/simplified_1.xlsx5.png) ## Tests +Just run pytest in the root folder. + ```bash -pytest test_cell_reference_extraction.py +pytest ``` ## Contribute @@ -136,4 +131,3 @@ You can help with the following, that I have thought of so far: - Improve the visualization and the ease of configuration - Add more examples - Add more documentation -- Package the script for easier installation and use with PyPi From ba5449ce125741f21ef0342fd271b413614847c8 Mon Sep 17 00:00:00 2001 From: Christian Dalager Date: Wed, 9 Oct 2024 08:21:42 +0200 Subject: [PATCH 3/3] Adding scipy to dependencies --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7a373ec..bf668f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,12 @@ classifiers = [ "Operating System :: OS Independent", ] -dependencies = ["networkx>=3.3", "openpyxl>=3.1", "matplotlib>=3.9"] +dependencies = [ + "networkx>=3.3", + "openpyxl>=3.1", + "matplotlib>=3.9", + "scipy>=1.14", +] [project.optional-dependencies] test = ["black==21.9b0", "pytest==8.3"]