Skip to content

Commit

Permalink
stream reading features
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkirk committed Sep 2, 2022
1 parent 0d92bd2 commit 8be5030
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 84 deletions.
5 changes: 3 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
geojson::ser::to_feature_collection_string(&my_structs).unwrap();
```
* PR: <https://github.com/georust/geojson/pull/199>
* Added `geojson::{FeatureReader, FeatureWriter}` to stream the reading/writing of your custom struct to and from GeoJSON.
* PR: <https://github.com/georust/geojson/pull/199>
* Added `geojson::{FeatureReader, FeatureWriter}` to stream the reading/writing of your custom struct to and from GeoJSON, greatly reducing the memory required to process a FeatureCollection.
* PR: <https://github.com/georust/geojson/pull/199>
* PR: <https://github.com/georust/geojson/pull/205>
* PR: <https://github.com/georust/geojson/pull/206>
* Added IntoIter implementation for FeatureCollection.
* <https://github.com/georust/geojson/pull/196>
* Added `geojson::Result<T>`.
Expand Down
82 changes: 11 additions & 71 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,17 @@ pub fn deserialize_feature_collection<'de, T>(
where
T: Deserialize<'de>,
{
let mut deserializer = serde_json::Deserializer::from_reader(feature_collection_reader);

// PERF: rather than deserializing the entirety of the `features:` array into memory here, it'd
// be nice to stream the features. However, I ran into difficulty while trying to return any
// borrowed reference from the visitor methods (e.g. MapAccess)
let visitor = FeatureCollectionVisitor::new();
let objects = deserializer.deserialize_map(visitor)?;

Ok(objects.into_iter().map(|feature_value| {
let deserializer = feature_value.into_deserializer();
let visitor = FeatureVisitor::new();
let record: T = deserializer.deserialize_map(visitor)?;

Ok(record)
}))
#[allow(deprecated)]
let iter = crate::FeatureIterator::new(feature_collection_reader).map(
|feature_value: Result<JsonValue>| {
let deserializer = feature_value?.into_deserializer();
let visitor = FeatureVisitor::new();
let record: T = deserializer.deserialize_map(visitor)?;

Ok(record)
},
);
Ok(iter)
}

/// Build a `Vec` of structs from a GeoJson `&str`.
Expand Down Expand Up @@ -295,62 +291,6 @@ where
Ok(deserializer.deserialize_map(visitor)?)
}

struct FeatureCollectionVisitor;

impl FeatureCollectionVisitor {
fn new() -> Self {
Self
}
}

impl<'de> serde::de::Visitor<'de> for FeatureCollectionVisitor {
type Value = Vec<JsonValue>;

fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
write!(formatter, "a valid GeoJSON Feature object")
}

fn visit_map<A>(self, mut map_access: A) -> std::result::Result<Self::Value, A::Error>
where
A: serde::de::MapAccess<'de>,
{
let mut has_feature_collection_type = false;
let mut features = None;
while let Some((key, value)) = map_access.next_entry::<String, JsonValue>()? {
if key == "type" {
if value == JsonValue::String("FeatureCollection".to_string()) {
has_feature_collection_type = true;
} else {
return Err(Error::custom("invalid type for feature collection"));
}
} else if key == "features" {
if let JsonValue::Array(value) = value {
if features.is_some() {
return Err(Error::custom(
"Encountered more than one list of `features`",
));
}
features = Some(value);
} else {
return Err(Error::custom("`features` had unexpected value"));
}
} else {
log::warn!("foreign members are not handled by FeatureCollection deserializer")
}
}

if let Some(features) = features {
if has_feature_collection_type {
Ok(features)
} else {
Err(Error::custom("No `type` field was found"))
}
} else {
Err(Error::custom("No `features` field was found"))
}
}
}

struct FeatureVisitor<D> {
_marker: PhantomData<D>,
}
Expand Down
26 changes: 15 additions & 11 deletions src/feature_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

use crate::{Feature, Result};

use serde::Deserialize;
use std::io;
use std::marker::PhantomData;

Expand All @@ -32,10 +33,11 @@ use std::marker::PhantomData;
/// Based on example code found at <https://github.com/serde-rs/serde/issues/903#issuecomment-297488118>.
///
/// [GeoJSON Format Specification § 3.3](https://datatracker.ietf.org/doc/html/rfc7946#section-3.3)
pub struct FeatureIterator<R> {
pub struct FeatureIterator<'de, R, D = Feature> {
reader: R,
state: State,
marker: PhantomData<Feature>,
output: PhantomData<D>,
lifetime: PhantomData<&'de ()>,
}

#[derive(Debug, Copy, Clone)]
Expand All @@ -45,17 +47,18 @@ enum State {
AfterFeatures,
}

impl<R> FeatureIterator<R> {
impl<'de, R, D> FeatureIterator<'de, R, D> {
pub fn new(reader: R) -> Self {
FeatureIterator {
reader,
state: State::BeforeFeatures,
marker: PhantomData,
output: PhantomData,
lifetime: PhantomData,
}
}
}

impl<R> FeatureIterator<R>
impl<'de, R, D> FeatureIterator<'de, R, D>
where
R: io::Read,
{
Expand Down Expand Up @@ -98,11 +101,12 @@ where
}
}

impl<R> Iterator for FeatureIterator<R>
impl<'de, R, D> Iterator for FeatureIterator<'de, R, D>
where
R: io::Read,
D: Deserialize<'de>,
{
type Item = Result<Feature>;
type Item = Result<D>;

fn next(&mut self) -> Option<Self::Item> {
match self.seek_to_next_feature() {
Expand All @@ -124,9 +128,9 @@ where

#[cfg(test)]
mod tests {
use crate::FeatureIterator;
use crate::Geometry;
use crate::Value;
use super::*;
use crate::{Geometry, Value};

use std::io::BufReader;

fn fc() -> &'static str {
Expand Down Expand Up @@ -179,7 +183,7 @@ mod tests {

#[test]
fn stream_read_test() {
let mut fi = FeatureIterator::new(BufReader::new(fc().as_bytes()));
let mut fi = FeatureIterator::<_, Feature>::new(BufReader::new(fc().as_bytes()));
assert_eq!(
Geometry {
bbox: None,
Expand Down

0 comments on commit 8be5030

Please sign in to comment.