Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Append Transformation Edge Cases #595

Merged
merged 3 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import com.conveyal.datatools.manager.models.TableTransformResult;
import com.conveyal.datatools.manager.models.TransformType;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -49,20 +51,40 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);

final File tempFile = File.createTempFile(tableName + "-temp", ".txt");
final File tempFileWithStrippedNewlines = File.createTempFile(tableName + "-temp-no-newlines", ".txt");
Files.copy(targetTxtFilePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);

// Append CSV data into the target file in the temporary copy of file
try (OutputStream os = new FileOutputStream(tempFile, true)) {
// Append a newline in case our data doesn't include one
// Having an extra newline is not a problem!
os.write(newLineStream.readAllBytes());
os.write(inputStream.readAllBytes());
os.flush();

} catch (Exception e) {
status.fail("Failed to write to target file", e);
}


// Re-write file without extra line breaks
try (
OutputStream noNewlineOs = new FileOutputStream(tempFileWithStrippedNewlines, false);
FileReader fr = new FileReader(tempFile);
BufferedReader br = new BufferedReader(fr);
) {
String line;
while ((line = br.readLine()) != null) {
if (line.matches("\n") || line.isEmpty()) {
continue;
}

noNewlineOs.write(line.getBytes());
noNewlineOs.write("\n".getBytes());
}
noNewlineOs.flush();
}

// Copy modified file into zip
Files.copy(tempFile.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
Files.copy(tempFileWithStrippedNewlines.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);

final int NEW_LINE_CHARACTER_CODE = 10;
int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,71 @@ void canAppendToStops() throws SQLException, IOException {
1
);
}


@Test
void canAppendToStopsWithLeadingNewlineInData() throws SQLException, IOException {
sourceVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines")
);
FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithLeadingNewline(), "stops");
FeedTransformRules transformRules = new FeedTransformRules(transformation);
feedSource.transformRules.add(transformRules);
Persistence.feedSources.replace(feedSource.id, feedSource);
// Create new target version (note: the folder has no stop_attributes.txt file)
targetVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-dates")
);
LOG.info("Checking assertions.");
assertEquals(
5 + 3, // Magic number should match row count of stops.txt with three extra
targetVersion.feedLoadResult.stops.rowCount,
"stops.txt row count should equal input csv data # of rows + 3 extra rows"
);
// Check for presence of new stop id in database (one record).
assertThatSqlCountQueryYieldsExpectedCount(
String.format(
"SELECT count(*) FROM %s.stops WHERE stop_id = '%s'",
targetVersion.namespace,
"new"
),
1
);
}
@Test
void canAppendToStopsWithTrailingNewlineInData() throws SQLException, IOException {
sourceVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines")
);
FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithTrailingNewline(), "stops");
FeedTransformRules transformRules = new FeedTransformRules(transformation);
feedSource.transformRules.add(transformRules);
Persistence.feedSources.replace(feedSource.id, feedSource);
// Create new target version (note: the folder has no stop_attributes.txt file)
targetVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-dates")
);
LOG.info("Checking assertions.");
assertEquals(
5 + 3, // Magic number should match row count of stops.txt with three extra
targetVersion.feedLoadResult.stops.rowCount,
"stops.txt row count should equal input csv data # of rows + 3 extra rows"
);
// Check for presence of new stop id in database (one record).
assertThatSqlCountQueryYieldsExpectedCount(
String.format(
"SELECT count(*) FROM %s.stops WHERE stop_id = '%s'",
targetVersion.namespace,
"new"
),
1
);
}

@Test
void canReplaceFeedInfo() throws SQLException, IOException {
// Generate random UUID for feedId, which gets placed into the csv data.
Expand Down Expand Up @@ -311,6 +376,16 @@ private static String generateStopRow() {
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,";
}
private static String generateStopRowWithLeadingNewline() {
return "\nnew3,new3,appended stop,,37,-122,,,0,123,," +
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,";
}
private static String generateStopRowWithTrailingNewline() {
return "new3,new3,appended stop,,37,-122,,,0,123,," +
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,\n";
}

private static String generateCustomCsvData() {
return "custom_column1,custom_column2,custom_column3"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url
1,Fake Transit,,,,,America/Los_Angeles,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
common_id,1,1,1,1,1,1,1,20170918,20170920
only_calendar_id,1,1,1,1,1,1,1,20170921,20170922
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version
fake_transit,Conveyal,http://www.conveyal.com,en,1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url
1,1,1,Route 1,,3,,7CE6E7,FFFFFF,
1,2,2,Route 2,,3,,7CE6E7,FFFFFF,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stop_id,accessibility_id,cardinal_direction,relative_position,stop_city
4u6g,0,SE,FS,Scotts Valley
johv,0,SE,FS,Scotts Valley
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000,
only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961,
only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000,
only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961,
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,,
johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,,
123,,Parent Station,,37.0666,-122.0777,,,1,,,

1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,,
1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id
1,only-calendar-trip1,,,0,,,0,0,common_id
2,only-calendar-trip2,,,0,,,0,0,common_id
Loading