From cfc94656704c9f07e12c65adba63710701d89158 Mon Sep 17 00:00:00 2001 From: carlvoller <27472988+carlvoller@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:20:31 +0800 Subject: [PATCH] Release 0.5.0, added ExcelPostgresBuilder, cleaned up project --- .github/workflows/release.yml | 4 + Cargo.lock | 65 +++++---- Cargo.toml | 36 ++--- LICENSE | 19 +++ README.md | 18 ++- crates/excel-rs-csv/Cargo.toml | 11 ++ crates/excel-rs-csv/src/lib.rs | 39 +++++ crates/excel-rs-postgres/Cargo.toml | 22 +++ crates/excel-rs-postgres/src/client.rs | 59 ++++++++ crates/excel-rs-postgres/src/lib.rs | 56 +++++++ .../excel-rs-postgres/src/sql_impl.rs | 105 +------------- {src => crates/excel-rs-postgres/src}/ssl.rs | 0 crates/excel-rs-xlsx/Cargo.toml | 16 ++ .../excel-rs-xlsx/src}/format.rs | 0 crates/excel-rs-xlsx/src/lib.rs | 17 +++ .../excel-rs-xlsx/src}/sheet.rs | 9 +- .../excel-rs-xlsx/src}/workbook.rs | 0 py-excel-rs/Cargo.toml | 20 +++ .../py_excel_rs/__init__.py | 1 + .../py_excel_rs/df_to_xlsx.py | 10 +- py-excel-rs/py_excel_rs/postgres.py | 91 ++++++++++++ pyproject.toml => py-excel-rs/pyproject.toml | 3 +- py-excel-rs/src/lib.rs | 95 ++++++++++++ py-excel-rs/src/postgres.rs | 137 ++++++++++++++++++ src/lib.rs | 107 -------------- src/main.rs | 30 ---- src/xlsx/mod.rs | 6 - 27 files changed, 672 insertions(+), 304 deletions(-) create mode 100644 LICENSE create mode 100644 crates/excel-rs-csv/Cargo.toml create mode 100644 crates/excel-rs-csv/src/lib.rs create mode 100644 crates/excel-rs-postgres/Cargo.toml create mode 100644 crates/excel-rs-postgres/src/client.rs create mode 100644 crates/excel-rs-postgres/src/lib.rs rename src/export_to_xlsx.rs => crates/excel-rs-postgres/src/sql_impl.rs (54%) rename {src => crates/excel-rs-postgres/src}/ssl.rs (100%) create mode 100644 crates/excel-rs-xlsx/Cargo.toml rename {src/xlsx => crates/excel-rs-xlsx/src}/format.rs (100%) create mode 100644 crates/excel-rs-xlsx/src/lib.rs rename {src/xlsx => crates/excel-rs-xlsx/src}/sheet.rs (94%) rename {src/xlsx => crates/excel-rs-xlsx/src}/workbook.rs (100%) create mode 100644 py-excel-rs/Cargo.toml rename {python => py-excel-rs}/py_excel_rs/__init__.py (54%) rename {python => py-excel-rs}/py_excel_rs/df_to_xlsx.py (54%) create mode 100644 py-excel-rs/py_excel_rs/postgres.py rename pyproject.toml => py-excel-rs/pyproject.toml (94%) create mode 100644 py-excel-rs/src/lib.rs create mode 100644 py-excel-rs/src/postgres.rs delete mode 100644 src/lib.rs delete mode 100644 src/main.rs delete mode 100644 src/xlsx/mod.rs diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0c507aa..54f2423 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -59,6 +59,7 @@ jobs: python3 -m ensurepip fi manylinux: manylinux_2_28 + working-directory: py-excel-rs - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -90,6 +91,7 @@ jobs: args: --release --out dist --find-interpreter sccache: 'true' manylinux: musllinux_1_2 + working-directory: py-excel-rs - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -143,6 +145,7 @@ jobs: target: ${{ matrix.platform.target }} args: --release --out dist --find-interpreter sccache: 'true' + working-directory: py-excel-rs - name: Upload wheels uses: actions/upload-artifact@v4 with: @@ -158,6 +161,7 @@ jobs: with: command: sdist args: --out dist + working-directory: py-excel-rs - name: Upload sdist uses: actions/upload-artifact@v4 with: diff --git a/Cargo.lock b/Cargo.lock index d19ce1c..d2aaf95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,19 +4,13 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "adler2" version = "2.0.0" @@ -98,17 +92,17 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide 0.7.4", + "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] @@ -385,13 +379,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "excel-rs" -version = "0.4.2" +name = "excel-rs-csv" +version = "0.5.0" +dependencies = [ + "csv", +] + +[[package]] +name = "excel-rs-postgres" +version = "0.5.0" dependencies = [ "anyhow", "chrono", - "csv", - "numpy", + "excel-rs-xlsx", "postgres", "postgres-protocol", "postgres_money", @@ -399,6 +399,13 @@ dependencies = [ "rust_decimal", "rustls", "tokio-postgres-rustls", +] + +[[package]] +name = "excel-rs-xlsx" +version = "0.5.0" +dependencies = [ + "anyhow", "zip", ] @@ -416,7 +423,7 @@ checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", "libz-ng-sys", - "miniz_oxide 0.8.0", + "miniz_oxide", ] [[package]] @@ -502,9 +509,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] name = "hashbrown" @@ -669,15 +676,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - [[package]] name = "miniz_oxide" version = "0.8.0" @@ -982,6 +980,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "py-excel-rs" +version = "0.5.0" +dependencies = [ + "excel-rs-csv", + "excel-rs-postgres", + "excel-rs-xlsx", + "numpy", + "pyo3", +] + [[package]] name = "pyo3" version = "0.21.2" diff --git a/Cargo.toml b/Cargo.toml index a9d3410..f55c4e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,19 @@ -[package] -name = "excel-rs" -version = "0.4.2" +[workspace] +members = ["crates/*", "py-excel-rs"] +resolver = "2" + +[workspace.package] +version = "0.5.0" +authors = ["Carl Voller"] edition = "2021" +homepage = "https://github.com/carlvoller/excel-rs" +license = "MIT" +repository = "https://github.com/carlvoller/excel-rs" + +[workspace.dependencies] +excel-rs-xlsx = { version = "0.5.0", path = "crates/excel-rs-xlsx", default-features = false } +excel-rs-csv = { version = "0.5.0", path = "crates/excel-rs-csv", default-features = false } +excel-rs-postgres = { version = "0.5.0", path = "crates/excel-rs-postgres", default-features = false } [profile.release] opt-level = 3 @@ -9,21 +21,3 @@ lto = "fat" debug = true overflow-checks = false debug-assertions = false - -[dependencies] -csv = "1" -pyo3 = { version = "0.21", features = ["extension-module"] } -anyhow = "1.0.86" -zip = { version = "2.2.0", default-features = false, features = ["deflate-flate2", "deflate-zlib-ng"] } -numpy = "0.21" -postgres = "0.19.8" -chrono = "0.4.38" -postgres-protocol = "0.6.7" -rust_decimal = { version = "1.36.0", features = ["db-postgres"] } -postgres_money = { version = "0.4.0", features = ["sql"] } -tokio-postgres-rustls = "0.12.0" -rustls = { version = "0.23.12", default-features = false, features = ["ring"] } - -[lib] -name = "excel_rs" -crate-type = ["cdylib"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7a178fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright 2024 Carl Ian Voller + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 66d7f79..cf4289f 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,22 @@ with open('report.xlsx', 'wb') as f: f.write(xlsx) ``` +### Build Postgres Query to Excel: +```python +from py_excel_rs import ExcelPostgresBuilder, OrderBy + +conn_string = "dbname=* user=* password=* host=*" +builder = ExcelPostgresBuilder(conn_str=conn_string, table_name="my_schema.my_table") + +xlsx = builder.select_all() + .exclude(["Unwanted_Column1", "Unwanted_Column2"]) + .orderBy("Usernames", OrderBy.ASCENDING) + .execute() + +with open('report.xlsx', 'wb') as f: + f.write(xlsx) +``` + ## Rust TODO: Add rust documentation @@ -60,7 +76,7 @@ Tests were conducted on an Macbook Pro M1 Max with 64GB of RAM ### Python -#### py-excel-rs (2.186s) +#### py-excel-rs (2.89s) ``` $ time python test-py-excel-rs.py python3 test-py-excel-rs.py 2.00s user 0.18s system 99% cpu 2.186 total diff --git a/crates/excel-rs-csv/Cargo.toml b/crates/excel-rs-csv/Cargo.toml new file mode 100644 index 0000000..499df4d --- /dev/null +++ b/crates/excel-rs-csv/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "excel-rs-csv" +version.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +csv = "1" \ No newline at end of file diff --git a/crates/excel-rs-csv/src/lib.rs b/crates/excel-rs-csv/src/lib.rs new file mode 100644 index 0000000..b978ec8 --- /dev/null +++ b/crates/excel-rs-csv/src/lib.rs @@ -0,0 +1,39 @@ +use std::io::Read; + +use csv::{ByteRecord, Reader}; + +pub fn bytes_to_csv(bytes: V) -> Reader { + csv::ReaderBuilder::new().from_reader(bytes) +} + +pub fn get_headers(reader: &mut Reader) -> Option<&ByteRecord> { + match reader.byte_headers() { + Ok(record) => Some(record), + Err(_) => None, + } +} + +pub fn get_next_record(reader: &mut Reader) -> Option { + let mut record = csv::ByteRecord::new(); + match reader.read_byte_record(&mut record) { + Ok(status) => { + if status { + Some(record) + } else { + None + } + } + Err(_) => None, + } +} + +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn it_works() { +// let result = add(2, 2); +// assert_eq!(result, 4); +// } +// } diff --git a/crates/excel-rs-postgres/Cargo.toml b/crates/excel-rs-postgres/Cargo.toml new file mode 100644 index 0000000..670912b --- /dev/null +++ b/crates/excel-rs-postgres/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "excel-rs-postgres" +version.workspace = true +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true +readme = "../../README.md" +description = "excel-rs postgres" + +[dependencies] +anyhow = "1.0.86" +chrono = "0.4.38" +excel-rs-xlsx = { workspace = true } +pyo3 = { version = "0.21", features = ["extension-module"] } +postgres = "0.19.8" +postgres-protocol = "0.6.7" +rust_decimal = { version = "1.36.0", features = ["db-postgres"] } +postgres_money = { version = "0.4.0", features = ["sql"] } +tokio-postgres-rustls = "0.12.0" +rustls = { version = "0.23.12", default-features = false, features = ["ring"] } diff --git a/crates/excel-rs-postgres/src/client.rs b/crates/excel-rs-postgres/src/client.rs new file mode 100644 index 0000000..44da554 --- /dev/null +++ b/crates/excel-rs-postgres/src/client.rs @@ -0,0 +1,59 @@ +use anyhow::Result; +use postgres::{Client, NoTls, RowIter}; +use rustls::ClientConfig; +use tokio_postgres_rustls::MakeRustlsConnect; + +use crate::ssl::SkipServerVerification; + +pub struct PostgresClient { + client: Client, +} + +impl PostgresClient { + pub fn new(conn_string: &str) -> PostgresClient { + let config = ClientConfig::builder() + .with_root_certificates(rustls::RootCertStore::empty()) + .with_no_client_auth(); + + let tls = MakeRustlsConnect::new(config); + + let client = match Client::connect(conn_string, tls) { + Ok(c) => c, + Err(_) => { + // Attempt SSL with Skipped Verification + let mut config = ClientConfig::builder() + .with_root_certificates(rustls::RootCertStore::empty()) + .with_no_client_auth(); + + config + .dangerous() + .set_certificate_verifier(SkipServerVerification::new()); + + let tls = MakeRustlsConnect::new(config); + + match Client::connect(&conn_string, tls) { + Ok(c) => c, + Err(_) => { + // Attempt no SSL + match Client::connect(&conn_string, NoTls) { + Ok(c) => c, + Err(e) => panic!("Couldn't connec to server: {e}"), + } + } + } + } + }; + + PostgresClient { client } + } + + pub fn make_query(&mut self, query: &str, params: Vec) -> Result> { + let iter: RowIter<'_> = self.client.query_raw(query, params)?; + Ok(iter) + } + + pub fn close(self) -> Result<()> { + self.client.close()?; + Ok(()) + } +} diff --git a/crates/excel-rs-postgres/src/lib.rs b/crates/excel-rs-postgres/src/lib.rs new file mode 100644 index 0000000..c7cb036 --- /dev/null +++ b/crates/excel-rs-postgres/src/lib.rs @@ -0,0 +1,56 @@ +mod client; +mod sql_impl; +mod ssl; + +use std::io::Cursor; + +use anyhow::Result; +pub use client::PostgresClient; +use excel_rs_xlsx::WorkBook; +pub use postgres::fallible_iterator::FallibleIterator; +use postgres::RowIter; +pub use sql_impl::{ExcelBytes, ExcelBytesBorrowed}; + +pub fn postgres_to_xlsx<'a>(mut iter: RowIter<'a>) -> Result> { + let output_buffer = vec![]; + let mut workbook = WorkBook::new(Cursor::new(output_buffer)); + let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); + + let headers = iter.next().ok().unwrap().unwrap(); + let len = headers.len(); + + // TODO: Add if len == 0 check + + // Write headers + let mut row_vec: Vec<&[u8]> = vec![&[]; len]; + + for col in 0..len { + let column = headers.columns().get(col).unwrap(); + row_vec[col] = column.name().as_bytes(); + } + + worksheet.write_row(row_vec)?; + + while let Some(row) = iter.next()? { + let mut row_vec: Vec> = vec![Box::from([]); len]; + + for col in 0..len { + if let Ok(bytes) = row.try_get::(col) { + row_vec[col] = Box::from(bytes.0); + } else if let Ok(bytes) = row.try_get::(col) { + let asdasd = bytes.0; + row_vec[col] = asdasd + } + } + + let new_vec: Vec<&[u8]> = row_vec.iter().map(|x| x.as_ref()).collect(); + + worksheet.write_row(new_vec)?; + } + + worksheet.close()?; + + let final_buffer = workbook.finish()?; + + Ok(final_buffer.into_inner()) +} diff --git a/src/export_to_xlsx.rs b/crates/excel-rs-postgres/src/sql_impl.rs similarity index 54% rename from src/export_to_xlsx.rs rename to crates/excel-rs-postgres/src/sql_impl.rs index a677ae1..479b3d1 100644 --- a/src/export_to_xlsx.rs +++ b/crates/excel-rs-postgres/src/sql_impl.rs @@ -1,20 +1,13 @@ use std::error::Error; -use std::io::Cursor; -use super::xlsx::WorkBook as NewWorkBook; - -use anyhow::Result; use chrono::DateTime; -use numpy::ndarray::Array2; use postgres::types::{FromSql, Type}; -use postgres::RowIter; -use postgres::{fallible_iterator::FallibleIterator, Client}; use postgres_money::Money; use postgres_protocol::types; use rust_decimal::Decimal; -struct ExcelBytesBorrowed<'a>(&'a [u8]); -struct ExcelBytes(Box<[u8]>); +pub struct ExcelBytesBorrowed<'a>(pub &'a [u8]); +pub struct ExcelBytes(pub Box<[u8]>); // Int8, Money, Timestamp, VarChar, Text, Numeric impl<'a> FromSql<'a> for ExcelBytesBorrowed<'a> { @@ -115,97 +108,3 @@ impl<'a> FromSql<'a> for ExcelBytes { } } } - -pub fn export_to_custom_xlsx(x: &[u8]) -> Result> { - let output_buffer = vec![]; - let mut workbook = NewWorkBook::new(Cursor::new(output_buffer)); - let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); - - let mut reader = csv::ReaderBuilder::new().from_reader(x); - - let headers = reader.byte_headers()?; - worksheet.write_row(1, headers.iter().to_owned().collect())?; - - let mut record = csv::ByteRecord::new(); - let mut row = 2; - while reader.read_byte_record(&mut record)? { - let row_data = record.iter().to_owned().collect(); - worksheet.write_row(row, row_data)?; - row += 1; - } - - worksheet.close()?; - - let final_buffer = workbook.finish()?; - - Ok(final_buffer.into_inner()) -} - -pub fn export_ndarray_to_custom_xlsx(x: Array2) -> Result> { - let output_buffer = vec![]; - let mut workbook = NewWorkBook::new(Cursor::new(output_buffer)); - let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); - - let mut row_num = 1; - for row in x.rows() { - let bytes = row.map(|x| x.as_bytes()).to_vec(); - worksheet.write_row(row_num, bytes)?; - - row_num += 1; - } - - worksheet.close()?; - - let final_buffer = workbook.finish()?; - - Ok(final_buffer.into_inner()) -} - -pub fn export_pg_client_to_custom_xlsx<'a>(query: &str, client: &'a mut Client) -> Result> { - let params: Vec = vec![]; - let mut iter: RowIter<'a> = client.query_raw(query, params)?; - - let output_buffer = vec![]; - let mut workbook = NewWorkBook::new(Cursor::new(output_buffer)); - let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); - - let mut row_num = 1; - let headers = iter.next().ok().unwrap().unwrap(); - let len = headers.len(); - - // TODO: Add if len == 0 check - - // Write headers - let mut row_vec: Vec<&[u8]> = vec![&[]; len]; - - for col in 0..len { - let column = headers.columns().get(col).unwrap(); - row_vec[col] = column.name().as_bytes(); - } - - worksheet.write_row(1, row_vec)?; - - - while let Some(row) = iter.next()? { - row_num += 1; - let mut row_vec: Vec> = vec![Box::from([]); len]; - - for col in 0..len { - if let Ok(bytes) = row.try_get::(col) { - row_vec[col] = Box::from(bytes.0); - } else if let Ok(bytes) = row.try_get::(col) { - let asdasd = bytes.0; - row_vec[col] = asdasd - } - } - - let new_vec: Vec<&[u8]> = row_vec.iter().map(|x| x.as_ref()).collect(); - - worksheet.write_row(row_num, new_vec)?; - } - worksheet.close()?; - - let final_buffer = workbook.finish()?; - - Ok(final_buffer.into_inner()) -} diff --git a/src/ssl.rs b/crates/excel-rs-postgres/src/ssl.rs similarity index 100% rename from src/ssl.rs rename to crates/excel-rs-postgres/src/ssl.rs diff --git a/crates/excel-rs-xlsx/Cargo.toml b/crates/excel-rs-xlsx/Cargo.toml new file mode 100644 index 0000000..98c2af3 --- /dev/null +++ b/crates/excel-rs-xlsx/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "excel-rs-xlsx" +version = { workspace = true } +edition = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +readme = "../../README.md" +description = "xlsx processor for excel-rs" + +[dependencies] +zip = { version = "2.2.0", default-features = false, features = [ + "deflate-flate2", + "deflate-zlib-ng", +] } +anyhow = "1.0.86" diff --git a/src/xlsx/format.rs b/crates/excel-rs-xlsx/src/format.rs similarity index 100% rename from src/xlsx/format.rs rename to crates/excel-rs-xlsx/src/format.rs diff --git a/crates/excel-rs-xlsx/src/lib.rs b/crates/excel-rs-xlsx/src/lib.rs new file mode 100644 index 0000000..a60e9c0 --- /dev/null +++ b/crates/excel-rs-xlsx/src/lib.rs @@ -0,0 +1,17 @@ + +mod format; +pub mod workbook; +pub mod sheet; + +pub use workbook::WorkBook; + +// TODO: Implement Tests +// #[cfg(test)] +// mod tests { +// use super::*; + +// #[test] +// fn it_works() { +// assert_eq!(1, 1); +// } +// } diff --git a/src/xlsx/sheet.rs b/crates/excel-rs-xlsx/src/sheet.rs similarity index 94% rename from src/xlsx/sheet.rs rename to crates/excel-rs-xlsx/src/sheet.rs index 8f900f2..112b5e9 100644 --- a/src/xlsx/sheet.rs +++ b/crates/excel-rs-xlsx/src/sheet.rs @@ -11,7 +11,8 @@ pub struct Sheet<'a, W: Write + Seek> { pub _name: String, // pub id: u16, // pub is_closed: bool, - col_num_to_letter: Vec> + col_num_to_letter: Vec>, + current_row_num: u32 } @@ -36,14 +37,16 @@ impl<'a, W: Write + Seek> Sheet<'a, W> { _name: name, // is_closed: false, col_num_to_letter: Vec::with_capacity(64), + current_row_num: 0 } } - pub fn write_row(&mut self, row_num: u32, data: Vec<&[u8]>) -> Result<()> { + // TOOD: Use ShortVec over Vec for cell ID + pub fn write_row(&mut self, data: Vec<&[u8]>) -> Result<()> { let mut final_vec = Vec::with_capacity(512 * data.len()); // TODO: Proper Error Handling - let (row_in_chars_arr, digits) = self.num_to_bytes(row_num); + let (row_in_chars_arr, digits) = self.num_to_bytes(self.current_row_num); final_vec.write(b" bytes: - return _excel_rs.export_to_xlsx(buf) + return _excel_rs.csv_to_xlsx(buf) def df_to_xlsx(df: pd.DataFrame) -> bytes: py_list = np.vstack((df.keys().to_numpy(), df.to_numpy(dtype='object'))) return _excel_rs.py_2d_to_xlsx(py_list) -def pg_to_xlsx(query: str, conn_string: str, disable_strict_ssl=False) -> bytes: - return _excel_rs.pg_query_to_xlsx(query, conn_string, disable_strict_ssl) \ No newline at end of file +def pg_to_xlsx(query: str, conn_string: str) -> bytes: + + client = _excel_rs.PyPostgresClient.new(conn_string) + xlsx = client.get_xlsx_from_query(query) + client.close() + return xlsx \ No newline at end of file diff --git a/py-excel-rs/py_excel_rs/postgres.py b/py-excel-rs/py_excel_rs/postgres.py new file mode 100644 index 0000000..2ba31eb --- /dev/null +++ b/py-excel-rs/py_excel_rs/postgres.py @@ -0,0 +1,91 @@ +from enum import Enum +from typing import Optional + +from py_excel_rs import _excel_rs + + +class OrderBy(Enum): + ASCENDING = "ASC" + DESCENDING = "DESC" + + +class ExcelPostgresBuilder: + _conn_str: str + _selected: str + _excluded: Optional[list[str]] + _table_name: str + _order_by: Optional[OrderBy] + _order_by_col: Optional[str] + _consumed: bool + + def __init__(self, conn_str: str, table_name: str): + if ( + not conn_str + or not table_name + or not isinstance(conn_str, str) + or not isinstance(table_name, str) + ): + raise ValueError("missing or invalid type for conn_str or table_name") + + self._consumed = False + self._conn_str = conn_str + self._table_name = table_name + self._excluded = None + self._order_by = None + self._order_by_col = None + + def select_all(self): + if self._consumed: + raise RuntimeError("Cannot modify PostgresBuilder after execute()") + self._selected = "*" + return self + + def select(self, columns: list[str]): + if self._consumed: + raise RuntimeError("Cannot modify PostgresBuilder after execute()") + self._selected = ", ".join(columns) + return self + + def exclude(self, columns: Optional[list[str]]): + if self._consumed: + raise RuntimeError("Cannot modify PostgresBuilder after execute()") + self._excluded = columns + return self + + def order_by(self, col: Optional[str], order: Optional[OrderBy]): + if self._consumed: + raise RuntimeError("Cannot modify PostgresBuilder after execute()") + self._order_by = order + self._order_by_col = col + return self + + def execute(self): + if self._consumed: + raise RuntimeError("Cannot execute PostgresBuilder after execute()") + + if self._selected is None: + raise ValueError( + "PostgresBuilder requires select_all() or select() to be ran once before execute()" + ) + + client = _excel_rs.PyPostgresClient.new(self._conn_str) + (schema_name, table_name) = self._table_name.split(".") + if not table_name: + table_name = schema_name + schema_name = "" + + columns = [f"\"{x}\"" for x in client.get_columns(table_name, schema_name, self._excluded)] + if self._selected == "*": + query = f"SELECT {', '.join(columns)} FROM {self._table_name}" + else: + parsed = [f"'{x}'" for x in self._selected] + query = f"SELECT {', '.join(parsed)} FROM {self._table_name}" + + if self._order_by is not None and self._order_by_col is not None: + query += f" ORDER BY \"{self._order_by_col}\" {self._order_by.value}" + + + xlsx = client.get_xlsx_from_query(query) + client.close() + return xlsx + diff --git a/pyproject.toml b/py-excel-rs/pyproject.toml similarity index 94% rename from pyproject.toml rename to py-excel-rs/pyproject.toml index 7b70f2a..a205e35 100644 --- a/pyproject.toml +++ b/py-excel-rs/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "py-excel-rs" -version = "0.4.2" +version = "0.5.0" description = "Some performant utility functions to convert common data structures to XLSX" dependencies = ["pandas", "numpy"] requires-python = ">=3.7" @@ -20,5 +20,4 @@ classifiers = [ ] [tool.maturin] -python-source = "python" module-name = "py_excel_rs._excel_rs" \ No newline at end of file diff --git a/py-excel-rs/src/lib.rs b/py-excel-rs/src/lib.rs new file mode 100644 index 0000000..0eceff1 --- /dev/null +++ b/py-excel-rs/src/lib.rs @@ -0,0 +1,95 @@ +mod postgres; + +use std::io::Cursor; + +use excel_rs_csv::{bytes_to_csv, get_headers, get_next_record}; +use excel_rs_xlsx::WorkBook; +use numpy::PyReadonlyArray2; +use postgres::PyPostgresClient; +use pyo3::{prelude::*, types::PyBytes}; + +#[pymodule] +fn _excel_rs<'py>(m: &Bound<'py, PyModule>) -> PyResult<()> { + #[pyfn(m)] + #[pyo3(name = "csv_to_xlsx")] + fn csv_to_xlsx<'py>(py: Python<'py>, buf: Bound<'py, PyBytes>) -> Bound<'py, PyBytes> { + let x = buf.as_bytes(); + + let output_buffer = vec![]; + let mut workbook = WorkBook::new(Cursor::new(output_buffer)); + let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); + + let mut reader = bytes_to_csv(x); + let headers = get_headers(&mut reader); + + if headers.is_some() { + let headers_to_bytes = headers.unwrap().iter().to_owned().collect(); + if let Err(e) = worksheet.write_row(headers_to_bytes) { + panic!("{e}"); + } + } + + while let Some(record) = get_next_record(&mut reader) { + let row_data = record.iter().to_owned().collect(); + if let Err(e) = worksheet.write_row(row_data) { + panic!("{e}"); + } + } + + if let Err(e) = worksheet.close() { + panic!("{e}"); + } + + let final_buffer = workbook.finish().ok().unwrap(); + + PyBytes::new_bound(py, &final_buffer.into_inner()) + } + + #[pyfn(m)] + #[pyo3(name = "py_2d_to_xlsx")] + fn py_2d_to_xlsx<'py>( + py: Python<'py>, + list: PyReadonlyArray2<'py, PyObject>, + ) -> Bound<'py, PyBytes> { + let ndarray = list.as_array(); + + let ndarray_str = ndarray.mapv(|x| { + if let Ok(inner_str) = x.extract::(py) { + inner_str + } else { + if let Ok(inner_num) = x.extract::(py) { + if inner_num.is_nan() { + String::from("") + } else { + inner_num.to_string() + } + } else { + String::from("") + } + } + }); + + let output_buffer = vec![]; + let mut workbook = WorkBook::new(Cursor::new(output_buffer)); + let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); + + for row in ndarray_str.rows() { + let bytes = row.map(|x| x.as_bytes()).to_vec(); + if let Err(e) = worksheet.write_row(bytes) { + panic!("{e}"); + } + } + + if let Err(e) = worksheet.close() { + panic!("{e}"); + } + + let final_buffer = workbook.finish().ok().unwrap(); + + PyBytes::new_bound(py, &final_buffer.into_inner()) + } + + m.add_class::()?; + + Ok(()) +} diff --git a/py-excel-rs/src/postgres.rs b/py-excel-rs/src/postgres.rs new file mode 100644 index 0000000..40ec887 --- /dev/null +++ b/py-excel-rs/src/postgres.rs @@ -0,0 +1,137 @@ +use std::{borrow::Cow, io::Cursor}; + +use excel_rs_postgres::{ExcelBytes, ExcelBytesBorrowed, FallibleIterator, PostgresClient}; +use excel_rs_xlsx::WorkBook; +use pyo3::{pyclass, pymethods, PyResult}; + +#[pyclass] +pub struct PyPostgresClient { + client: Option, +} + +#[pymethods] +impl PyPostgresClient { + #[staticmethod] + pub fn new(conn_string: &str) -> PyPostgresClient { + PyPostgresClient { + client: Some(PostgresClient::new(conn_string)), + } + } + + pub fn get_columns( + &mut self, + table_name: &str, + schema_name: &str, + excluded: Vec, + ) -> PyResult> { + let mut query = + String::from("SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '"); + + query.push_str(table_name); + + if schema_name != "" { + query.push_str("' AND TABLE_SCHEMA = '"); + query.push_str(schema_name); + } + + if excluded.len() > 0 { + query.push_str("' AND COLUMN_NAME NOT IN ("); + query.push_str( + &excluded + .iter() + .map(|x| format!("'{x}'")) // Add quotes + .collect::>() + .join(", "), // Add commas + ); + query.push_str(")"); + } else { + query.push_str("'"); + } + + let res = match &mut self.client { + Some(client) => client.make_query(&query, vec![]), + None => panic!("Client not set up"), + }; + + let iter = match res { + Ok(iter) => iter, + Err(e) => panic!("{e}"), + }; + + let cols: Vec = match iter.map(|row| Ok(row.get::(0))).collect() { + Ok(s) => s, + Err(e) => panic!("{e}"), + }; + + Ok(cols) + } + + pub fn get_xlsx_from_query(&mut self, query: &str) -> PyResult> { + let res = match &mut self.client { + Some(client) => client.make_query(&query, vec![]), + None => panic!("Client not set up"), + }; + + let mut iter = match res { + Ok(iter) => iter, + Err(e) => panic!("{e}"), + }; + + let output_buffer = vec![]; + let mut workbook = WorkBook::new(Cursor::new(output_buffer)); + let mut worksheet = workbook.get_worksheet(String::from("Sheet 1")); + + let headers = iter.next().ok().unwrap().unwrap(); + let len = headers.len(); + + // Write headers + let mut row_vec: Vec<&[u8]> = vec![&[]; len]; + + for col in 0..len { + let column = headers.columns().get(col).unwrap(); + let name = column.name(); + row_vec[col] = name.as_bytes(); + } + + if let Err(e) = worksheet.write_row(row_vec) { + panic!("{e}"); + } + + while let Ok(Some(row)) = iter.next() { + let mut row_vec: Vec> = vec![Box::from([]); len]; + + for col in 0..len { + if let Ok(bytes) = row.try_get::(col) { + row_vec[col] = Box::from(bytes.0); + } else if let Ok(bytes) = row.try_get::(col) { + let asdasd = bytes.0; + row_vec[col] = asdasd + } + } + + let new_vec: Vec<&[u8]> = row_vec.iter().map(|x| x.as_ref()).collect(); + + if let Err(e) = worksheet.write_row(new_vec) { + panic!("{e}"); + } + } + + if let Err(e) = worksheet.close() { + panic!("{e}"); + } + + let final_buffer = workbook.finish().ok().unwrap().into_inner(); + + Ok(Cow::from(final_buffer)) + } + + pub fn close(&mut self) -> PyResult<()> { + let client = Option::take(&mut self.client); + + if let Some(client) = client { + client.close().ok(); + } + + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index b9fd02c..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,107 +0,0 @@ -mod export_to_xlsx; -mod ssl; -mod xlsx; - -use export_to_xlsx::{ - export_ndarray_to_custom_xlsx, export_pg_client_to_custom_xlsx, export_to_custom_xlsx, -}; -use numpy::PyReadonlyArray2; -use postgres::Client; -use pyo3::{ - prelude::*, - types::{PyBool, PyBytes, PyString}, -}; -use rustls::ClientConfig; -use ssl::SkipServerVerification; -use tokio_postgres_rustls::MakeRustlsConnect; - -#[pymodule] -fn _excel_rs<'py>(m: &Bound<'py, PyModule>) -> PyResult<()> { - #[pyfn(m)] - #[pyo3(name = "export_to_xlsx")] - fn export_to_xlsx<'py>(py: Python<'py>, buf: Bound<'py, PyBytes>) -> Bound<'py, PyBytes> { - let x = buf.as_bytes(); - let xlsx_bytes = match export_to_custom_xlsx(x) { - Ok(b) => b, - Err(e) => panic!("{e}"), - }; - PyBytes::new_bound(py, &xlsx_bytes) - } - - #[pyfn(m)] - #[pyo3(name = "py_2d_to_xlsx")] - fn py_2d_to_xlsx<'py>( - py: Python<'py>, - list: PyReadonlyArray2<'py, PyObject>, - ) -> Bound<'py, PyBytes> { - let ndarray = list.as_array(); - - let ndarray_str = ndarray.mapv(|x| { - if let Ok(inner_str) = x.extract::(py) { - inner_str - } else { - if let Ok(inner_num) = x.extract::(py) { - if inner_num.is_nan() { - String::from("") - } else { - inner_num.to_string() - } - } else { - String::from("") - } - } - }); - - let xlsx_bytes = match export_ndarray_to_custom_xlsx(ndarray_str) { - Ok(b) => b, - Err(e) => panic!("{e}"), - }; - - PyBytes::new_bound(py, &xlsx_bytes) - } - - #[pyfn(m)] - #[pyo3(name = "pg_query_to_xlsx")] - fn pg_query_to_xlsx<'py>( - py: Python<'py>, - py_query: Bound<'py, PyString>, - py_conn_string: Bound<'py, PyString>, - disable_strict_ssl: Bound<'py, PyBool>, - ) -> Bound<'py, PyBytes> { - let conn_string: String = match py_conn_string.extract() { - Ok(s) => s, - Err(e) => panic!("{e}"), - }; - - let query: String = match py_query.extract() { - Ok(s) => s, - Err(e) => panic!("{e}"), - }; - - let mut config = ClientConfig::builder() - .with_root_certificates(rustls::RootCertStore::empty()) - .with_no_client_auth(); - - if disable_strict_ssl.is_true() { - config - .dangerous() - .set_certificate_verifier(SkipServerVerification::new()) - } - - let tls = MakeRustlsConnect::new(config); - - let mut client = match Client::connect(&conn_string, tls) { - Ok(c) => c, - Err(e) => panic!("{e}"), - }; - - let xlsx_bytes = match export_pg_client_to_custom_xlsx(&query, &mut client) { - Ok(b) => b, - Err(e) => panic!("{e}"), - }; - - PyBytes::new_bound(py, &xlsx_bytes) - } - - Ok(()) -} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 32c0097..0000000 --- a/src/main.rs +++ /dev/null @@ -1,30 +0,0 @@ -mod export_to_xlsx; -mod xlsx; - -use std::fs::File; -use std::{io::prelude::*, time::Instant}; - -use anyhow::Result; -pub use export_to_xlsx::export_to_custom_xlsx; - -fn convert_csv_to_xlsx(filename: &str) -> Result<()> { - let mut f = File::open(filename)?; - let mut buffer: Vec = Vec::new(); - - f.read_to_end(&mut buffer)?; - - let xlsx = export_to_custom_xlsx(&buffer)?; - - f = File::create("final.xlsx")?; - f.write(&xlsx)?; - - Ok(()) -} -fn main() { - let now = Instant::now(); - match convert_csv_to_xlsx("original.csv") { - Ok(_) => (), - Err(e) => panic!("{e}"), - } - println!("[convert_csv_to_xlsx] Took: {:.2?}", now.elapsed()); -} diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs deleted file mode 100644 index 584b798..0000000 --- a/src/xlsx/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ - -mod format; -pub mod workbook; -pub mod sheet; - -pub use workbook::WorkBook; \ No newline at end of file