From 6607402eebe9f16c95528110f9087bc2198bd52a Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 16:59:44 -0500 Subject: [PATCH 01/11] add folders to src --- {{cookiecutter.repo_name}}/src/libs/__init__.py | 1 + {{cookiecutter.repo_name}}/src/libs/dir.txt | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 {{cookiecutter.repo_name}}/src/libs/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/dir.txt diff --git a/{{cookiecutter.repo_name}}/src/libs/__init__.py b/{{cookiecutter.repo_name}}/src/libs/__init__.py new file mode 100644 index 0000000..eb41cf4 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/libs/__init__.py @@ -0,0 +1 @@ +"""Source code of your project""" diff --git a/{{cookiecutter.repo_name}}/src/libs/dir.txt b/{{cookiecutter.repo_name}}/src/libs/dir.txt new file mode 100644 index 0000000..0929e97 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/libs/dir.txt @@ -0,0 +1,15 @@ +./transform_pipes +./metrics +./feat_imputation +./feat_encoding +./model_evaluation +./feat_scaling +./feat_new_features +./feat_selection +./model +./data_store +./cleaning +./feat_strings +./model_selection +./feat_preprocess_strings +./validation From dacaab33a3f3691e7da9a5fb602544b542e46533 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 17:47:09 -0500 Subject: [PATCH 02/11] =?UTF-8?q?=F0=9F=8E=A8=20add=20folders=20for=20src?= =?UTF-8?q?=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- {{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py | 1 + {{cookiecutter.repo_name}}/src/libs/data_store/__init__.py | 1 + {{cookiecutter.repo_name}}/src/libs/feat_encoding/__init__.py | 1 + {{cookiecutter.repo_name}}/src/libs/feat_imputation/__init__.py | 0 .../src/libs/feat_new_features/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/feat_pipelines/__init__.py | 0 .../src/libs/feat_preprocess_strings/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/feat_scaling/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/feat_selection/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/feat_strings/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/metrics/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/model/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/model_evaluation/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/model_selection/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/reports/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/validation/__init__.py | 0 16 files changed, 3 insertions(+) create mode 100644 {{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/data_store/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_encoding/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_imputation/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_new_features/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_pipelines/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_preprocess_strings/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_scaling/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_selection/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/feat_strings/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/metrics/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/model/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/model_evaluation/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/model_selection/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/reports/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/libs/validation/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py b/{{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py @@ -0,0 +1 @@ + diff --git a/{{cookiecutter.repo_name}}/src/libs/data_store/__init__.py b/{{cookiecutter.repo_name}}/src/libs/data_store/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/libs/data_store/__init__.py @@ -0,0 +1 @@ + diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_encoding/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_encoding/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/{{cookiecutter.repo_name}}/src/libs/feat_encoding/__init__.py @@ -0,0 +1 @@ + diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_imputation/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_imputation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_new_features/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_new_features/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_pipelines/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_preprocess_strings/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_preprocess_strings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_scaling/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_scaling/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_selection/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_selection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/feat_strings/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_strings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/metrics/__init__.py b/{{cookiecutter.repo_name}}/src/libs/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/model/__init__.py b/{{cookiecutter.repo_name}}/src/libs/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/model_evaluation/__init__.py b/{{cookiecutter.repo_name}}/src/libs/model_evaluation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/model_selection/__init__.py b/{{cookiecutter.repo_name}}/src/libs/model_selection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/reports/__init__.py b/{{cookiecutter.repo_name}}/src/libs/reports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/libs/validation/__init__.py b/{{cookiecutter.repo_name}}/src/libs/validation/__init__.py new file mode 100644 index 0000000..e69de29 From 52dd9d64019cc65bb9ed6759ad52bb3b4a039421 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 18:09:08 -0500 Subject: [PATCH 03/11] =?UTF-8?q?=F0=9F=8E=A8=20add=20pipelines=20folders?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/libs/{cleaning => data_etl}/__init__.py | 0 .../src/libs/{data_store => feat_cleaning}/__init__.py | 0 {{cookiecutter.repo_name}}/src/pipelines/__init__.py | 0 {{cookiecutter.repo_name}}/src/pipelines/data_etl/__init__.py | 0 .../src/pipelines/feature_engineering/__init__.py | 0 .../src/pipelines/model_evaluation/__init__.py | 0 .../src/pipelines/model_prediction/__init__.py | 0 {{cookiecutter.repo_name}}/src/pipelines/model_train/__init__.py | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename {{cookiecutter.repo_name}}/src/libs/{cleaning => data_etl}/__init__.py (100%) rename {{cookiecutter.repo_name}}/src/libs/{data_store => feat_cleaning}/__init__.py (100%) create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/data_etl/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/feature_engineering/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/model_evaluation/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/model_prediction/__init__.py create mode 100644 {{cookiecutter.repo_name}}/src/pipelines/model_train/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py b/{{cookiecutter.repo_name}}/src/libs/data_etl/__init__.py similarity index 100% rename from {{cookiecutter.repo_name}}/src/libs/cleaning/__init__.py rename to {{cookiecutter.repo_name}}/src/libs/data_etl/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/libs/data_store/__init__.py b/{{cookiecutter.repo_name}}/src/libs/feat_cleaning/__init__.py similarity index 100% rename from {{cookiecutter.repo_name}}/src/libs/data_store/__init__.py rename to {{cookiecutter.repo_name}}/src/libs/feat_cleaning/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/pipelines/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/pipelines/data_etl/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/data_etl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/pipelines/feature_engineering/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/feature_engineering/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/pipelines/model_evaluation/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/model_evaluation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/pipelines/model_prediction/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/model_prediction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/{{cookiecutter.repo_name}}/src/pipelines/model_train/__init__.py b/{{cookiecutter.repo_name}}/src/pipelines/model_train/__init__.py new file mode 100644 index 0000000..e69de29 From 1ddf388f90a7eacd341cc88472795f6a731604e3 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 18:26:40 -0500 Subject: [PATCH 04/11] =?UTF-8?q?=F0=9F=94=A5=20remove=20bandit=20configur?= =?UTF-8?q?ation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .code_quality/bandit.yaml | 1 - 1 file changed, 1 deletion(-) delete mode 120000 .code_quality/bandit.yaml diff --git a/.code_quality/bandit.yaml b/.code_quality/bandit.yaml deleted file mode 120000 index d35efac..0000000 --- a/.code_quality/bandit.yaml +++ /dev/null @@ -1 +0,0 @@ -../{{cookiecutter.repo_name}}/.code_quality/bandit.yaml \ No newline at end of file From 9f5b51e4c37c3d3f7a4af2bb84e3b9c05c951343 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 18:30:00 -0500 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=94=A5=20remove=20unused=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- {{cookiecutter.repo_name}}/src/libs/dir.txt | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 {{cookiecutter.repo_name}}/src/libs/dir.txt diff --git a/{{cookiecutter.repo_name}}/src/libs/dir.txt b/{{cookiecutter.repo_name}}/src/libs/dir.txt deleted file mode 100644 index 0929e97..0000000 --- a/{{cookiecutter.repo_name}}/src/libs/dir.txt +++ /dev/null @@ -1,15 +0,0 @@ -./transform_pipes -./metrics -./feat_imputation -./feat_encoding -./model_evaluation -./feat_scaling -./feat_new_features -./feat_selection -./model -./data_store -./cleaning -./feat_strings -./model_selection -./feat_preprocess_strings -./validation From 3e10616c8415ee5c764deaef4bdb39a71aa75ca1 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 18:54:01 -0500 Subject: [PATCH 06/11] =?UTF-8?q?=F0=9F=8D=AA=F0=9F=94=A5=20remove=20bandi?= =?UTF-8?q?t=20configuration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../.code_quality/bandit.yaml | 404 ------------------ 1 file changed, 404 deletions(-) delete mode 100644 {{cookiecutter.repo_name}}/.code_quality/bandit.yaml diff --git a/{{cookiecutter.repo_name}}/.code_quality/bandit.yaml b/{{cookiecutter.repo_name}}/.code_quality/bandit.yaml deleted file mode 100644 index 5e38b72..0000000 --- a/{{cookiecutter.repo_name}}/.code_quality/bandit.yaml +++ /dev/null @@ -1,404 +0,0 @@ - -### Bandit config file: - -### This config may optionally select a subset of tests to run or skip by -### filling out the 'tests' and 'skips' lists given below. If no tests are -### specified for inclusion then it is assumed all tests are desired. The skips -### set will remove specific tests from the include set. This can be controlled -### using the -t/-s CLI options. Note that the same test ID should not appear -### in both 'tests' and 'skips', this would be nonsensical and is detected by -### Bandit at runtime. - -# Available tests: -# B101 : assert_used -# B102 : exec_used -# B103 : set_bad_file_permissions -# B104 : hardcoded_bind_all_interfaces -# B105 : hardcoded_password_string -# B106 : hardcoded_password_funcarg -# B107 : hardcoded_password_default -# B108 : hardcoded_tmp_directory -# B110 : try_except_pass -# B112 : try_except_continue -# B201 : flask_debug_true -# B301 : pickle -# B302 : marshal -# B303 : md5 -# B304 : ciphers -# B305 : cipher_modes -# B306 : mktemp_q -# B307 : eval -# B308 : mark_safe -# B309 : httpsconnection -# B310 : urllib_urlopen -# B311 : random -# B312 : telnetlib -# B313 : xml_bad_cElementTree -# B314 : xml_bad_ElementTree -# B315 : xml_bad_expatreader -# B316 : xml_bad_expatbuilder -# B317 : xml_bad_sax -# B318 : xml_bad_minidom -# B319 : xml_bad_pulldom -# B320 : xml_bad_etree -# B321 : ftplib -# B322 : input -# B323 : unverified_context -# B324 : hashlib_new_insecure_functions -# B325 : tempnam -# B401 : import_telnetlib -# B402 : import_ftplib -# B403 : import_pickle -# B404 : import_subprocess -# B405 : import_xml_etree -# B406 : import_xml_sax -# B407 : import_xml_expat -# B408 : import_xml_minidom -# B409 : import_xml_pulldom -# B410 : import_lxml -# B411 : import_xmlrpclib -# B412 : import_httpoxy -# B413 : import_pycrypto -# B501 : request_with_no_cert_validation -# B502 : ssl_with_bad_version -# B503 : ssl_with_bad_defaults -# B504 : ssl_with_no_version -# B505 : weak_cryptographic_key -# B506 : yaml_load -# B507 : ssh_no_host_key_verification -# B601 : paramiko_calls -# B602 : subprocess_popen_with_shell_equals_true -# B603 : subprocess_without_shell_equals_true -# B604 : any_other_function_with_shell_equals_true -# B605 : start_process_with_a_shell -# B606 : start_process_with_no_shell -# B607 : start_process_with_partial_path -# B608 : hardcoded_sql_expressions -# B609 : linux_commands_wildcard_injection -# B610 : django_extra_used -# B611 : django_rawsql_used -# B701 : jinja2_autoescape_false -# B702 : use_of_mako_templates -# B703 : django_mark_safe - -exclude_dirs: - - 'venv' - - 'env' - - 'build' - - 'dist' - - 'migrations' - -# (optional) list included test IDs here, eg '[B101, B406]': -tests: - -# (optional) list skipped test IDs here, eg '[B101, B406]': -skips: - -### (optional) plugin settings - some test plugins require configuration data -### that may be given here, per-plugin. All bandit test plugins have a built in -### set of sensible defaults and these will be used if no configuration is -### provided. It is not necessary to provide settings for every (or any) plugin -### if the defaults are acceptable. -assert_used: - skips: ['*test_*.py'] - -any_other_function_with_shell_equals_true: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -hardcoded_tmp_directory: - tmp_dirs: - - /tmp - - /var/tmp - - /dev/shm -linux_commands_wildcard_injection: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -ssl_with_bad_defaults: - bad_protocol_versions: - - PROTOCOL_SSLv2 - - SSLv2_METHOD - - SSLv23_METHOD - - PROTOCOL_SSLv3 - - PROTOCOL_TLSv1 - - SSLv3_METHOD - - TLSv1_METHOD -ssl_with_bad_version: - bad_protocol_versions: - - PROTOCOL_SSLv2 - - SSLv2_METHOD - - SSLv23_METHOD - - PROTOCOL_SSLv3 - - PROTOCOL_TLSv1 - - SSLv3_METHOD - - TLSv1_METHOD -start_process_with_a_shell: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -start_process_with_no_shell: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -start_process_with_partial_path: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -subprocess_popen_with_shell_equals_true: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -subprocess_without_shell_equals_true: - no_shell: - - os.execl - - os.execle - - os.execlp - - os.execlpe - - os.execv - - os.execve - - os.execvp - - os.execvpe - - os.spawnl - - os.spawnle - - os.spawnlp - - os.spawnlpe - - os.spawnv - - os.spawnve - - os.spawnvp - - os.spawnvpe - - os.startfile - shell: - - os.system - - os.popen - - os.popen2 - - os.popen3 - - os.popen4 - - popen2.popen2 - - popen2.popen3 - - popen2.popen4 - - popen2.Popen3 - - popen2.Popen4 - - commands.getoutput - - commands.getstatusoutput - subprocess: - - subprocess.Popen - - subprocess.call - - subprocess.check_call - - subprocess.check_output - - subprocess.run -try_except_continue: - check_typed_exception: false -try_except_pass: - check_typed_exception: false -weak_cryptographic_key: - weak_key_size_dsa_high: 1024 - weak_key_size_dsa_medium: 2048 - weak_key_size_ec_high: 160 - weak_key_size_ec_medium: 224 - weak_key_size_rsa_high: 1024 - weak_key_size_rsa_medium: 2048 From 84f5e14d00a74be0d473e85c810b11da5cd47630 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 19:01:33 -0500 Subject: [PATCH 07/11] =?UTF-8?q?=F0=9F=94=A7=20add=20make=20command=20to?= =?UTF-8?q?=20watch=20tree=20structure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 642a390..09d0449 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,10 @@ docs_view: ## Build and serve the documentation docs-test: ## Test if documentation can be built without warnings or errors @poetry run mkdocs build -s +view-tree: ## View the project tree + @echo "🚀 Viewing project tree..." + @tree -a {{cookiecutter.repo_name}} -I '__init__.py|.gitkeep' + ####----Tests----#### test: ## Test the code with pytest and coverage From ceccff5f1ebe4fb63e023ea864079df89d2d9de4 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 19:46:54 -0500 Subject: [PATCH 08/11] =?UTF-8?q?=F0=9F=8D=AA=F0=9F=8E=A8=20add=20data=20v?= =?UTF-8?q?alidation=20and=20model=20validation=20folder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/libs/{validation => data_validation}/__init__.py | 0 {{cookiecutter.repo_name}}/src/libs/model_validation/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {{cookiecutter.repo_name}}/src/libs/{validation => data_validation}/__init__.py (100%) create mode 100644 {{cookiecutter.repo_name}}/src/libs/model_validation/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/libs/validation/__init__.py b/{{cookiecutter.repo_name}}/src/libs/data_validation/__init__.py similarity index 100% rename from {{cookiecutter.repo_name}}/src/libs/validation/__init__.py rename to {{cookiecutter.repo_name}}/src/libs/data_validation/__init__.py diff --git a/{{cookiecutter.repo_name}}/src/libs/model_validation/__init__.py b/{{cookiecutter.repo_name}}/src/libs/model_validation/__init__.py new file mode 100644 index 0000000..e69de29 From 9d353c4b3ead42ab8b1b7b99dc5d2f8e74ca7deb Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 19:47:16 -0500 Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=93=9D=20update=20data=20folder=20s?= =?UTF-8?q?tructure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a63e418..5ac298b 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,6 @@ Folder structure for data science projects [why?](https://towardsdatascience.co . ├── codecov.yml # configuration for codecov ├── .code_quality -│   ├── bandit.yaml # bandit configuration │   ├── mypy.ini # mypy configuration │   └── ruff.toml # ruff configuration ├── data @@ -127,12 +126,13 @@ Folder structure for data science projects [why?](https://towardsdatascience.co │   │   └── action.yml # github action to setup python environment │   ├── dependabot.md # github action to update dependencies │   ├── pull_request_template.md # template for pull requests -│   └── workflows -│   ├── docs.yml # github action to build documentation (mkdocs) -│   ├── pre-commit_autoupdate.yml # github action update pre-commit hooks -│   └── test.yml +│   └── workflows # github actions workflows +│   ├── ci.yml # run continuous integration (tests, pre-commit, etc.) +│   ├── dependency_review.yml # review dependencies +│   ├── docs.yml # build documentation (mkdocs) +│   └── pre-commit_autoupdate.yml # update pre-commit hooks ├── .gitignore # files to ignore in git -├── Makefile # useful commands to setup environment, +├── Makefile # useful commands to setup environment, run tests, etc. ├── models # store final models ├── notebooks │   ├── 1-data # data extraction and cleaning @@ -149,9 +149,34 @@ Folder structure for data science projects [why?](https://towardsdatascience.co ├── pyproject.toml # dependencies for poetry ├── README.md # description of your project ├── src # source code for use in this project +│ ├── libs # custom python scripts +│ │ ├── data_etl # data extraction, transformation, and loading +│ │ ├── data_validation # data validation +│ │ ├── feat_cleaning # feature engineering data cleaning +│ │ ├── feat_encoding # feature engineering encoding +│ │ ├── feat_imputation # feature engineering imputation +│ │ ├── feat_new_features # feature engineering new features +│ │ ├── feat_pipelines # feature engineering pipelines +│ │ ├── feat_preprocess_strings # feature engineering pre process strings +│ │ ├── feat_scaling # feature engineering scaling data +│ │ ├── feat_selection # feature engineering feature selection +│ │ ├── feat_strings # feature engineering strings +│ │ ├── metrics # evaluation metrics +│ │ ├── model # model training and prediction +│ │ ├── model_evaluation # model evaluation +│ │ ├── model_selection # model selection +│ │ ├── model_validation # model validation +│ │ └── reports # reports +│ ├── pipelines +│ │ ├── data_etl # data extraction, transformation, and loading +│ │ ├── feature_engineering # prepare data for modeling +│ │ ├── model_evaluation # evaluate model performance +│ │ ├── model_prediction # model predictions +│ │ └── model_train # train models ├── tests # test code for your project └── .vscode # vscode configuration ├── extensions.json # list of recommended extensions + ├── launch.json # vscode launch configuration └── settings.json # vscode settings ``` From e23273a52b238627dc5fb59656b050b5cdea4a5d Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 20:09:31 -0500 Subject: [PATCH 10/11] =?UTF-8?q?=F0=9F=8E=A8=20=F0=9F=94=A7=20format=20ma?= =?UTF-8?q?kefile=20command=20with=20=5F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 09d0449..25911e4 100644 --- a/Makefile +++ b/Makefile @@ -28,10 +28,10 @@ docs_view: ## Build and serve the documentation @echo "🚀 Viewing documentation..." @poetry run mkdocs serve -docs-test: ## Test if documentation can be built without warnings or errors +docs_test: ## Test if documentation can be built without warnings or errors @poetry run mkdocs build -s -view-tree: ## View the project tree +view_tree: ## View the project tree @echo "🚀 Viewing project tree..." @tree -a {{cookiecutter.repo_name}} -I '__init__.py|.gitkeep' From a13581b22df82ec0dad5876f4152e0a0e4a66010 Mon Sep 17 00:00:00 2001 From: "Jose R. Zapata" Date: Wed, 27 Mar 2024 20:48:20 -0500 Subject: [PATCH 11/11] =?UTF-8?q?=F0=9F=8D=AA=F0=9F=93=9D=20update=20docum?= =?UTF-8?q?entation=20with=20directory=20and=20data=20structure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/data_schema.md | 1 + docs/directory_hierarchy.md | 1 + docs/index.md | 3 ++- mkdocs.yml | 8 ++++++-- {{cookiecutter.repo_name}}/data/README.md | 18 +++++++++--------- 5 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 docs/data_schema.md create mode 100644 docs/directory_hierarchy.md diff --git a/docs/data_schema.md b/docs/data_schema.md new file mode 100644 index 0000000..b6209db --- /dev/null +++ b/docs/data_schema.md @@ -0,0 +1 @@ +--8<-- "{{cookiecutter.repo_name}}/data/README.md" diff --git a/docs/directory_hierarchy.md b/docs/directory_hierarchy.md new file mode 100644 index 0000000..1110ce7 --- /dev/null +++ b/docs/directory_hierarchy.md @@ -0,0 +1 @@ +--8<-- "README.md:104:182" diff --git a/docs/index.md b/docs/index.md index 612c7a5..7f5d635 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1,2 @@ ---8<-- "README.md" +--8<-- "README.md::20" +--8<-- "README.md:52" diff --git a/mkdocs.yml b/mkdocs.yml index 5635c5b..1959008 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,12 +19,15 @@ theme: nav: - 📖 Introduction: 'index.md' + - 🗃️ Project structure: + - 🗂️ Directory Hierarchy: 'directory_hierarchy.md' + - 🗄️ Data Schema: 'data_schema.md' - 🔑 Setup Tokens: 'setup_tokens.md' - - ⚙️ Pre-commit configuration: 'pre-commit.md' + - ⚙️ Pre-commit configuration: 'pre-commit.md' - 🚀 Github Actions: - Pre-commit_autoupdate: 'github_actions/gh_action_pre-commit-autoupdate.md' - 💻 VSCode configuration: 'vscode.md' - - 🏠 Local Dev setup: 'local_setup.md' + - 🛠️ Local Dev setup: 'local_setup.md' markdown_extensions: - pymdownx.snippets: @@ -39,3 +42,4 @@ markdown_extensions: pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.superfences + - tables diff --git a/{{cookiecutter.repo_name}}/data/README.md b/{{cookiecutter.repo_name}}/data/README.md index 63ba392..af0635c 100644 --- a/{{cookiecutter.repo_name}}/data/README.md +++ b/{{cookiecutter.repo_name}}/data/README.md @@ -4,16 +4,16 @@ layered data-engineering convention ![layeded data engineering](https://docs.kedro.org/en/0.18.6/_images/data_engineering_convention.png) -| ****Folder in data**** | ****Description**** | +| `Folder in data` | `Description` | | ---------------------- | --- | -| ****raw**** | initial start of the pipeline, containing the sourced data model(s) that should never be changed, it forms your single source of truth to work from. these data models are typically un-typed in most cases e.g. csv, but this will vary from case to case | -| ****intermediate**** | optional data model(s), which are introduced to type your raw data model(s), e.g. converting string based values into their current typed representation | -| ****primary**** | domain specific data model(s) containing cleansed, transformed and wrangled data from either raw or intermediate, which forms your layer that you input into your feature engineering | -| ****feature**** | analytics specific data model(s) containing a set of features defined against the primary data, which are grouped by feature area of analysis and stored against a common dimension | -| ****model input**** | analytics specific data model(s) containing all feature data against a common dimension and in the case of live projects against an analytics run date to ensure that you track the historical changes of the features over time | -| ****models**** | stored, serialised pre-trained machine learning models | -| ****model output**** | analytics specific data model(s) containing the results generated by the model based on the model input data | -| ****reporting**** | reporting data model(s) that are used to combine a set of primary, feature, model input and model output data used to drive the dashboard and the views constructed. it encapsulates and removes the need to define any blending or joining of data, improve performance and replacement of presentation layer without having to redefine the data models | +| `raw` | initial start of the pipeline, containing the sourced data model(s) that should never be changed, it forms your single source of truth to work from. these data models are typically un-typed in most cases e.g. csv, but this will vary from case to case | +| `intermediate` | optional data model(s), which are introduced to type your raw data model(s), e.g. converting string based values into their current typed representation | +| `primary` | domain specific data model(s) containing cleansed, transformed and wrangled data from either raw or intermediate, which forms your layer that you input into your feature engineering | +| `feature` | analytics specific data model(s) containing a set of features defined against the primary data, which are grouped by feature area of analysis and stored against a common dimension | +| `model input` | analytics specific data model(s) containing all feature data against a common dimension and in the case of live projects against an analytics run date to ensure that you track the historical changes of the features over time | +| `models` | stored, serialised pre-trained machine learning models | +| `model output` | analytics specific data model(s) containing the results generated by the model based on the model input data | +| `reporting` | reporting data model(s) that are used to combine a set of primary, feature, model input and model output data used to drive the dashboard and the views constructed. it encapsulates and removes the need to define any blending or joining of data, improve performance and replacement of presentation layer without having to redefine the data models | ## References