Skip to content

Commit

Permalink
Switch csv generation to QUOTE_MINIMAL
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Jun 24, 2024
1 parent b9c757a commit f7732b8
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
4 changes: 1 addition & 3 deletions src/handlers/site_upload/powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,6 @@ def generate_csv_from_parquet(bucket_name: str, bucket_root: str, subbucket_path
last_valid_df = last_valid_df.apply(
lambda x: x.strip() if isinstance(x, str) else x
).replace('""', numpy.nan)
# Here we are removing internal commas from fields so we get a valid unquoted CSV
last_valid_df = last_valid_df.replace(to_replace=",", value="", regex=True)
awswrangler.s3.to_csv(
last_valid_df,
(
Expand All @@ -238,7 +236,7 @@ def generate_csv_from_parquet(bucket_name: str, bucket_root: str, subbucket_path
)
),
index=False,
quoting=csv.QUOTE_NONE,
quoting=csv.QUOTE_MINIMAL,
)


Expand Down
4 changes: 2 additions & 2 deletions tests/site_upload/test_powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def test_parquet_to_csv(mock_bucket):
)
assert list(df["race"].dropna().unique()) == [
"White",
"Black or African American",
"Black, or African American",
"Asian",
"American Indian or Alaska Native",
"American Indian, or Alaska Native",
]

0 comments on commit f7732b8

Please sign in to comment.