diff --git a/README.md b/README.md index db64d531..b57efe0e 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,9 @@ session = boto3.Session( url = 's3://clp-example-s3-bucket/example.clp.zst' # Using `smart_open.open` to stream the encoded CLP IR: -with smart_open.open(url, "rb", transport_params={'client': session.client('s3')}) as istream: +with smart_open.open( + url, mode="rb", compression="disable", transport_params={"client": session.client("s3")} +) as istream: with ClpIrStreamReader(istream, allow_incomplete_stream=True) as clp_reader: for log_event in clp_reader: # Print the log message with its timestamp properly formatted. @@ -211,7 +213,11 @@ with smart_open.open(url, "rb", transport_params={'client': session.client('s3') ``` Note: -When `allow_incomplete_stream` is set to False (default), the reader will raise +- Setting `compression="disable"` is necessary so that `smart_open` doesn't +undo the IR file's Zstandard compression (based on the file's extension) before +streaming it to `ClpIrStreamReader`; `ClpIrStreamReader` expects the input +stream to be Zstandard-compressed. +- When `allow_incomplete_stream` is set to False (default), the reader will raise `clp_ffi_py.ir.IncompleteStreamError` if the stream is incomplete (it doesn't end with the byte sequence indicating the stream's end). In practice, this can occur if you're reading a stream that is still being written or wasn't properly