Merge pull request #17 from dalager/general-improvements

General improvements
dalager · Oct 10, 2024 · a9687fb · a9687fb
2 parents b8762dd + 9d65405
commit a9687fb
Show file tree

Hide file tree

Showing 14 changed files with 473 additions and 116 deletions.
diff --git a/.gitignore b/.gitignore
@@ -163,4 +163,6 @@ cython_debug/
 venv/Lib/site-packages
 venv
 *.xlsx
-*.xlsx.png
+*.png
+combined_project.py
+plotsettings.json
diff --git a/README.md b/README.md
@@ -58,20 +58,37 @@ pip install -e .
 ## Usage
 
 ```bash
-python -m graphedexcel <path_to_excel_file> [--verbose] [--no-visualize] [--keep-direction] [--open-image]
+python -m graphedexcel <path_to_excel_file>
 ```
 
-Depending on the size of the spreadsheet you might want to adjust the plot configuration in the code to to make the graph more readable (remove labels, decrease widths and sizes etc) - you can find the configuration in [graph_visualizer.py](src/graphedexcel/graph_visualizer.py) with settings for small, medium and large graphs. You can adjust the configuration to your needs - but this only working if you run from source.
+### Parameters from `--help`
 
-### Arguments
-
-`--verbose` will dump formula cell contents during (more noisy)
-
-`--no-visualize` will skip the visualization step and only print the summary (faster)
-
-`--keep-direction` will keep the direction of the graph as it is in the excel file, otherwise it will be simplified to an undirected graph (slower)
-
-`--open-image` will open the generated image in the default image viewer (only on Windows)
+```
+usage: graphedexcel [-h] [--remove-unconnected] [--as-directed-graph] [--no-visualize]
+                    [--layout {spring,circular,kamada_kawai,shell,spectral}] [--config CONFIG]
+                    [--output-path OUTPUT_PATH] [--open-image]
+                    path_to_excel
+
+Process an Excel file to build and visualize dependency graphs.
+
+positional arguments:
+  path_to_excel         Path to the Excel file to process.
+
+options:
+  -h, --help            show this help message and exit
+  --remove-unconnected, -r
+                        Remove unconnected nodes from the dependency graph.
+  --as-directed-graph, -d
+                        Treat the dependency graph as directed.
+  --no-visualize, -n    Skip the visualization of the dependency graph.
+  --layout,-l {spring,circular,kamada_kawai,shell,spectral}
+                        Layout algorithm for graph visualization (default: spring).
+  --config CONFIG, -c CONFIG
+                        Path to the configuration file for visualization. See README for details.
+  --output-path OUTPUT_PATH, -o OUTPUT_PATH
+                        Specify the output path for the generated graph image.
+  --open-image          Open the generated image after visualization.
+```
 
 ## Sample output
 
@@ -136,7 +153,7 @@ base_graph_settings = {
 
 # Sized-based settings for small, medium, and large graphs
 small_graph_settings = {
-    "with_labels": False, 
+    "with_labels": False,
     "alpha": 0.8}
 
 medium_graph_settings = {
@@ -174,7 +191,7 @@ To override these settings, create a JSON file (e.g., graph_settings.json) with
 To use the custom configuration, pass the path to the JSON file as an argument to the script:
 
 ```bash
-python -m graphedexcel <path_to_excel_file> --config <path to grap_settings.json>
+python -m graphedexcel myexcel.xlsx --config graph_settings.json
 ```
 
 This will render the graph using the custom settings defined in the JSON file.
@@ -186,16 +203,3 @@ Just run pytest in the root folder.
 ```bash
 pytest
 ```
-
-## Contribute
-
-Feel free to contribute by opening an issue or a pull request.
-
-You can help with the following, that I have thought of so far:
-
-- Add more tests
-- Improve the code
-- Add more features
-- Improve the visualization and the ease of configuration
-- Add more examples
-- Add more documentation
diff --git a/Packaging_notes.md → docs/Packaging_notes.md b/Packaging_notes.md → docs/Packaging_notes.md
@@ -1,22 +1,23 @@
 # Packaging notes
 
+Notes on packaging and distributing the package.
+
 ## Test PyPi
 
 ```bash
 rimraf .\dist\; python -m build; python -m twine upload --repository pypi dist/* --verbose
-```
 
 python -m build
 
 python -m twine upload --repository testpypi dist/\* --verbose
 
-````
+```
 
 ## Installation
 
 ```bash
 pip install -i https://test.pypi.org/simple/ graphedexcel
-````
+```
 
 ## installation from local dist
 

diff --git a/docs/ai_chat_context_creator.py b/docs/ai_chat_context_creator.py
@@ -0,0 +1,32 @@
+def combine_python_files(file_list, output_file):
+    """
+    Combines multiple Python files into a single file with comments indicating
+    the start and end of each original file.
+
+    Parameters:
+    - file_list: List of Python file names to combine.
+    - output_file: Name of the output file.
+    """
+    with open(output_file, "w") as outfile:
+        for fname in file_list:
+            # Add a comment indicating the start of a file
+            outfile.write(f"# --- Start of {fname} ---\n\n")
+            with open(fname, "r") as infile:
+                outfile.write(infile.read())
+                outfile.write("\n")
+            # Add a comment indicating the end of a file
+            outfile.write(f"# --- End of {fname} ---\n\n")
+    print(f"All files have been combined into {output_file}")
+
+
+if __name__ == "__main__":
+    # Replace these with your actual file names
+    python_files = [
+        "src/graphedexcel/__main__.py",
+        "src/graphedexcel/graphbuilder.py",
+        "src/graphedexcel/graph_visualizer.py",
+        "src/graphedexcel/graph_summarizer.py",
+        "src/graphedexcel/excel_parser.py",
+    ]
+    output_filename = "combined_project.py"
+    combine_python_files(python_files, output_filename)
diff --git a/src/graphedexcel/__init__.py b/src/graphedexcel/__init__.py
@@ -1,3 +1 @@
-import sys
-from .graphbuilder import extract_formulas_and_build_dependencies
-
+# package graphedexcel
diff --git a/src/graphedexcel/__main__.py b/src/graphedexcel/__main__.py
@@ -1,36 +1,146 @@
 import os
 import sys
-from .graphbuilder import extract_formulas_and_build_dependencies
+import argparse
+import logging
+from .graphbuilder import build_graph_and_stats
 from .graph_summarizer import print_summary
 from .graph_visualizer import visualize_dependency_graph
+import src.graphedexcel.logger_config  # noqa
 
-if __name__ == "__main__":
-    if len(sys.argv) > 1:
-        path_to_excel = sys.argv[1]
-    else:
-        print("Please provide the path to the Excel file as an argument.")
-        sys.exit(1)
+logger = logging.getLogger("graphedexcel.main")
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        prog="graphedexcel",
+        description="Process an Excel file to build and visualize dependency graphs.",
+    )
+
+    # Positional argument for the path to the Excel file
+    parser.add_argument(
+        "path_to_excel", type=str, help="Path to the Excel file to process."
+    )
+
+    # Optional flags with shorthand aliases
+    parser.add_argument(
+        "--remove-unconnected",
+        "-r",
+        action="store_true",
+        help="Remove unconnected nodes from the dependency graph.",
+    )
+
+    parser.add_argument(
+        "--as-directed-graph",
+        "-d",
+        action="store_true",
+        help="Treat the dependency graph as directed.",
+    )
+
+    parser.add_argument(
+        "--no-visualize",
+        "-n",
+        action="store_true",
+        help="Skip the visualization of the dependency graph.",
+    )
+
+    parser.add_argument(
+        "--layout",
+        "-l",
+        type=str,
+        default="spring",
+        choices=["spring", "circular", "kamada_kawai", "shell", "spectral"],
+        help="Layout algorithm for graph visualization (default: spring).",
+    )
+
+    parser.add_argument(
+        "--config",
+        "-c",
+        type=str,
+        help="Path to the configuration file for visualization. See README for details.",
+    )
+
+    parser.add_argument(
+        "--output-path",
+        "-o",
+        type=str,
+        default=None,
+        help="Specify the output path for the generated graph image.",
+    )
+
+    parser.add_argument(
+        "--open-image",
+        action="store_true",
+        help="Open the generated image after visualization.",
+    )
+
+    return parser.parse_args()
 
-    # does the file exist?
+
+def main():
+    args = parse_arguments()
+
+    path_to_excel = args.path_to_excel
+
+    # Check if the file exists
     if not os.path.exists(path_to_excel):
-        print(f"File not found: {path_to_excel}")
+        logger.error(f"File not found: {path_to_excel}")
         sys.exit(1)
 
-    # Extract formulas and build the dependency graph
-    dependency_graph, functions = extract_formulas_and_build_dependencies(path_to_excel)
+    # Build the dependency graph and gather statistics
+    dependency_graph, function_stats = build_graph_and_stats(
+        path_to_excel,
+        remove_unconnected=args.remove_unconnected,
+        as_directed=args.as_directed_graph,
+    )
+
+    # Print summary of the dependency graph
+    print_summary(dependency_graph, function_stats)
+
+    if args.no_visualize:
+        logger.info("Skipping visualization as per the '--no-visualize' flag.")
+        sys.exit(0)
+
+    logger.info("Visualizing the graph of dependencies. (This might take a while...)")
+
+    # Determine layout
+    layout = args.layout
 
-    print_summary(dependency_graph, functions)
+    # Configuration path
+    config_path = args.config
 
-    if "--no-visualize" not in sys.argv:
-        print(
-            "\033[1;30;40m\nVisualizing the graph of dependencies.\nThis might take a while...\033[0;37;40m\n"  # noqa
-        )
+    # Determine output filename
+    if args.output_path:
+        filename = args.output_path
+    else:
+        # Create a default filename based on the Excel file name
+        base_name = os.path.splitext(os.path.basename(path_to_excel))[0]
+        filename = f"{base_name}_dependency_graph.png"
+
+    # Visualize the dependency graph
+    visualize_dependency_graph(dependency_graph, filename, config_path, layout)
+
+    logger.info(f"Dependency graph image saved to {filename}.")
+
+    # Open the image file if requested
+    if args.open_image:
+        try:
+            os.startfile(filename)  # Note: os.startfile is Windows-specific
+        except AttributeError:
+            # For macOS and Linux, use 'open' and 'xdg-open' respectively
+            import subprocess
+            import platform
 
-        # if commandline argument --config is provided with a path to a JSON file, pass that path to the visualizer
+            if platform.system() == "Darwin":  # macOS
+                subprocess.call(["open", filename])
+            elif platform.system() == "Linux":
+                subprocess.call(["xdg-open", filename])
+            else:
+                logger.warning("Unable to open the image automatically on this OS.")
 
-        if "--config" in sys.argv:
-            config_index = sys.argv.index("--config")
-            config_path = sys.argv[config_index + 1]
-            visualize_dependency_graph(dependency_graph, path_to_excel, config_path)
-        else:
-            visualize_dependency_graph(dependency_graph, path_to_excel)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        logger.exception("An unexpected error occurred:", e)
+        sys.exit(1)
diff --git a/src/graphedexcel/excel_parser.py b/src/graphedexcel/excel_parser.py
@@ -1,10 +1,14 @@
 from openpyxl.utils import get_column_letter, range_boundaries
 import re
 from typing import List, Tuple, Dict
+import logging
+
+logger = logging.getLogger(__name__)
 
 # Regex to detect cell references like A1, B2, or ranges like A1:B2
 CELL_REF_REGEX = r"('?[A-Za-z0-9_\-\[\] ]+'?![A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)|([A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)"  # noqa
 
+
 def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, str]]:
     """
     Extract all referenced cells and ranges from a formula using regular expressions.
@@ -15,7 +19,7 @@ def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, st
 
     Returns:
         Tuple[List[str], List[str], Dict[str, str]]: A tuple containing lists of direct references,
-                                                     range references, and a dictionary of dependencies.
+        range references, and a dictionary of dependencies.
     """
     formula = formula.replace("$", "")
     matches = re.findall(CELL_REF_REGEX, formula)
@@ -42,6 +46,7 @@ def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, st
 
     return direct_references, range_references, dependencies
 
+
 def expand_range(range_reference: str) -> List[str]:
     """
     Expand a range reference (e.g., 'A1:A3') into a list of individual cell references.
@@ -71,5 +76,3 @@ def expand_range(range_reference: str) -> List[str]:
                 expanded_cells.append(cell_ref)
 
     return expanded_cells
-
-
diff --git a/src/graphedexcel/graph_summarizer.py b/src/graphedexcel/graph_summarizer.py
@@ -1,16 +1,20 @@
 from collections import Counter
+import networkx as nx
 
 
-def print_summary(graph, functionsdict):
+def print_summary(graph: nx.Graph, functionsdict: dict[str, int]) -> None:
     """
-    Summarize a networkx DiGraph representing a dependency graph and print the most used functions in the formulas.
+    Summarize a networkx DiGraph representing a dependency
+    graph and print the most used functions in the formulas.
     """
     strpadsize = 28
     numpadsize = 5
 
+    print()
     print_basic_info(graph, strpadsize, numpadsize)
     print_highest_degree_nodes(graph, strpadsize, numpadsize)
     print_most_used_functions(functionsdict, strpadsize, numpadsize)
+    print()
 
 
 def print_basic_info(graph, strpadsize, numpadsize):