Merge pull request #1120 from Unidata/issue1115

don't return masked arrays for vlens (issue #1115)
Unidata · Jun 20, 2021 · 3376418 · 3376418
2 parents 18010f9 + 639b099
commit 3376418
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 16 deletions.
diff --git a/Changelog b/Changelog
@@ -6,6 +6,7 @@
    compressed data to it in parallel. Added a test for this (examples/mpi_example_compressed.py).
    Issue #1108.
  * Fix OverflowError when dimension sizes become greater than 2**32-1 elements on Windows (Issue #1112).
+ * Don't return masked arrays for vlens (only for primitive and enum types - issue #1115).
 
  version 1.5.6 (tag v1.5.6rel)
 ==============================

diff --git a/src/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx
@@ -549,8 +549,9 @@ shape of fancy temp slice = (3, 3, 36, 71)
 The result will be a numpy scalar array.
 
 By default, netcdf4-python returns numpy masked arrays with values equal to the
-`missing_value` or `_FillValue` variable attributes masked.  The
-`Dataset.set_auto_mask` `Dataset` and `Variable` methods
+`missing_value` or `_FillValue` variable attributes masked for primitive and
+enum data types.
+The `Dataset.set_auto_mask` `Dataset` and `Variable` methods
 can be used to disable this feature so that
 numpy arrays are always returned, with the missing values included. Prior to
 version 1.4.0 the default behavior was to only return masked arrays when the
@@ -3017,7 +3018,8 @@ after calling this function will follow the default behaviour.
 **`set_auto_mask(self, True_or_False)`**
 
 Call `Variable.set_auto_mask` for all variables contained in this `Dataset` or
-`Group`, as well as for all variables in all its subgroups.
+`Group`, as well as for all variables in all its subgroups. Only affects
+Variables with primitive or enum types (not compound or vlen Variables).
 
 **`True_or_False`**: Boolean determining if automatic conversion to masked arrays
 shall be applied for all variables.
@@ -3536,7 +3538,8 @@ Default is `True`, can be reset using `Variable.set_auto_scale` and
 **`mask`**: If True, data is automatically converted to/from masked
 arrays when missing values or fill values are present. Default is `True`, can be
 reset using `Variable.set_auto_mask` and `Variable.set_auto_maskandscale`
-methods.
+methods. Only relevant for Variables with primitive or enum types (ignored
+for compound and vlen Variables).
 
 **`chartostring`**: If True, data is automatically converted to/from character
 arrays to string arrays when the `_Encoding` variable attribute is set.
@@ -4438,9 +4441,7 @@ rename a `Variable` attribute named `oldname` to `newname`."""
                 msg = 'invalid scale_factor or add_offset attribute, no unpacking done...'
                 warnings.warn(msg)
 
-        if self.mask and\
-           (self._isprimitive or self._isenum or\
-           (self._isvlen and self.dtype != str)):
+        if self.mask and (self._isprimitive or self._isenum):\
             data = self._toma(data)
         else:
             # if attribute _Unsigned is True, and variable has signed integer

diff --git a/test/tst_vlen.py b/test/tst_vlen.py
@@ -204,13 +204,8 @@ def setUp(self):
         n = 0
         for nlen in ilen:
             data = np.random.uniform(low=0.0, high=1.0, size=nlen)
-            if n==99:
-                # mark last value as missing
-                mask = np.zeros(data.shape,dtype=bool)
-                mask[-1] = True
-                data = np.ma.masked_array(data, mask=mask)
-                self.data = data
             v[n] = data
+            if n==99: self.data = data
             n += 1
         nc.close()
     def tearDown(self):
@@ -219,16 +214,14 @@ def tearDown(self):
     def runTest(self):
         """testing packing float vlens as scaled integers (issue #1003)."""
         nc = Dataset(self.file)
-        # see if data is masked
         data = nc['vl'][-1]
-        assert(data[-1] is np.ma.masked)
         # check max error of compression
         err = np.abs(data - self.data)
         assert(err.max() < nc['vl'].scale_factor)
         # turn off auto-scaling
         nc.set_auto_maskandscale(False)
         data = nc['vl'][-1]
-        assert(data[-1] == 255)
+        assert(data[-1] == np.around(self.data[-1]/nc['vl'].scale_factor))
         nc.close()
 
 if __name__ == '__main__':