New Upstream Snapshot - keras-preprocessing
Ready changes
Summary
Merged new upstream version: 1.1.2+git20220217.1.3e38006+ds (was: 1.1.0+ds).
Resulting package
Built on 2023-01-20T00:19 (took 5m11s)
The resulting binary packages can be installed (if you have the apt repository enabled) by running one of:
apt install -t fresh-snapshots python3-keras-preprocessing
Diff
diff --git a/PKG-INFO b/PKG-INFO
index 98768ed..21676ca 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,39 +1,39 @@
-Metadata-Version: 1.1
+Metadata-Version: 2.1
Name: Keras_Preprocessing
-Version: 1.1.0
+Version: 1.1.2
Summary: Easy data preprocessing and data augmentation for deep learning models
Home-page: https://github.com/keras-team/keras-preprocessing
+Download-URL: https://github.com/keras-team/keras-preprocessing/tarball/1.1.2
Author: Keras Team
-Author-email: UNKNOWN
License: MIT
-Download-URL: https://github.com/keras-team/keras-preprocessing/tarball/1.1.0
-Description:
- Keras Preprocessing is the data preprocessing
- and data augmentation module of the Keras deep learning library.
- It provides utilities for working with image data, text data,
- and sequence data.
-
- Read the documentation at: https://keras.io/
-
- Keras Preprocessing may be imported directly
- from an up-to-date installation of Keras:
-
- ```
- from keras import preprocessing
- ```
-
- Keras Preprocessing is compatible with Python 2.7-3.6
- and is distributed under the MIT license.
-
-Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Education
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Provides-Extra: image
+Provides-Extra: pep8
+Provides-Extra: tests
+License-File: LICENSE
+
+
+Keras Preprocessing is the data preprocessing
+and data augmentation module of the Keras deep learning library.
+It provides utilities for working with image data, text data,
+and sequence data.
+
+Read the documentation at: https://keras.io/
+
+Keras Preprocessing may be imported directly
+from an up-to-date installation of Keras:
+
+```
+from keras import preprocessing
+```
+
+Keras Preprocessing is compatible with Python 3.6
+and is distributed under the MIT license.
diff --git a/README.md b/README.md
index ff44843..75c2166 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,10 @@
# Keras Preprocessing
-[![Build Status](https://travis-ci.org/keras-team/keras-preprocessing.svg?branch=master)](https://travis-ci.org/keras-team/keras-preprocessing)
-
-Keras Preprocessing is the data preprocessing
-and data augmentation module of the Keras deep learning library.
-It provides utilities for working with image data, text data,
-and sequence data.
-
-Read the documentation at: https://keras.io/
-
-Keras Preprocessing may be imported directly
-from an up-to-date installation of Keras:
-
-```
-from keras import preprocessing
-```
-
-Keras Preprocessing is compatible with Python 2.7-3.6
-and is distributed under the MIT license.
+⚠️ This GitHub repository is now deprecated -- all Keras Preprocessing symbols have
+moved into the core Keras [repository](https://github.com/keras-team/keras)
+and the TensorFlow [`pip` package](https://www.tensorflow.org/install). All code
+changes and discussion should move to the Keras repository.
+
+For users looking for a place to start preprocessing data, consult the
+[preprocessing layers guide](https://keras.io/guides/preprocessing_layers/)
+and refer to the [data loading utilities API](https://keras.io/api/data_loading/).
diff --git a/debian/changelog b/debian/changelog
index 967802a..716425f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+keras-preprocessing (1.1.2+git20220217.1.3e38006+ds-1) UNRELEASED; urgency=low
+
+ * New upstream snapshot.
+
+ -- Debian Janitor <janitor@jelmer.uk> Fri, 20 Jan 2023 00:15:26 -0000
+
keras-preprocessing (1.1.0+ds-1) unstable; urgency=medium
[ Stephen Sinclair ]
diff --git a/keras_preprocessing/__init__.py b/keras_preprocessing/__init__.py
index 31fd376..bbdea3f 100644
--- a/keras_preprocessing/__init__.py
+++ b/keras_preprocessing/__init__.py
@@ -1,8 +1,5 @@
"""Enables dynamic setting of underlying Keras module.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
_KERAS_BACKEND = None
_KERAS_UTILS = None
@@ -40,4 +37,4 @@ def get_keras_submodule(name):
return _KERAS_UTILS
-__version__ = '1.1.0'
+__version__ = '1.1.2'
diff --git a/keras_preprocessing/image/__init__.py b/keras_preprocessing/image/__init__.py
index 23200ca..fa74542 100644
--- a/keras_preprocessing/image/__init__.py
+++ b/keras_preprocessing/image/__init__.py
@@ -1,6 +1,5 @@
"""Enables dynamic setting of underlying Keras module.
"""
-from __future__ import absolute_import
# flake8: noqa:F401
from .affine_transformations import *
from .dataframe_iterator import DataFrameIterator
diff --git a/keras_preprocessing/image/affine_transformations.py b/keras_preprocessing/image/affine_transformations.py
index 9ee40cb..c2103d8 100644
--- a/keras_preprocessing/image/affine_transformations.py
+++ b/keras_preprocessing/image/affine_transformations.py
@@ -1,13 +1,8 @@
"""Utilities for performing affine transformations on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
import numpy as np
-from .utils import (array_to_img,
- img_to_array)
+from .utils import array_to_img, img_to_array
try:
import scipy
@@ -17,8 +12,8 @@ except ImportError:
scipy = None
try:
- from PIL import ImageEnhance
from PIL import Image as pil_image
+ from PIL import ImageEnhance
except ImportError:
pil_image = None
ImageEnhance = None
@@ -53,8 +48,13 @@ def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0,
Rotated Numpy image tensor.
"""
theta = np.random.uniform(-rg, rg)
- x = apply_affine_transform(x, theta=theta, channel_axis=channel_axis,
- fill_mode=fill_mode, cval=cval,
+ x = apply_affine_transform(x,
+ theta=theta,
+ row_axis=row_axis,
+ col_axis=col_axis,
+ channel_axis=channel_axis,
+ fill_mode=fill_mode,
+ cval=cval,
order=interpolation_order)
return x
@@ -84,8 +84,14 @@ def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0,
h, w = x.shape[row_axis], x.shape[col_axis]
tx = np.random.uniform(-hrg, hrg) * h
ty = np.random.uniform(-wrg, wrg) * w
- x = apply_affine_transform(x, tx=tx, ty=ty, channel_axis=channel_axis,
- fill_mode=fill_mode, cval=cval,
+ x = apply_affine_transform(x,
+ tx=tx,
+ ty=ty,
+ row_axis=row_axis,
+ col_axis=col_axis,
+ channel_axis=channel_axis,
+ fill_mode=fill_mode,
+ cval=cval,
order=interpolation_order)
return x
@@ -112,8 +118,13 @@ def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0,
Sheared Numpy image tensor.
"""
shear = np.random.uniform(-intensity, intensity)
- x = apply_affine_transform(x, shear=shear, channel_axis=channel_axis,
- fill_mode=fill_mode, cval=cval,
+ x = apply_affine_transform(x,
+ shear=shear,
+ row_axis=row_axis,
+ col_axis=col_axis,
+ channel_axis=channel_axis,
+ fill_mode=fill_mode,
+ cval=cval,
order=interpolation_order)
return x
@@ -150,8 +161,14 @@ def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0,
zx, zy = 1, 1
else:
zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
- x = apply_affine_transform(x, zx=zx, zy=zy, channel_axis=channel_axis,
- fill_mode=fill_mode, cval=cval,
+ x = apply_affine_transform(x,
+ zx=zx,
+ zy=zy,
+ row_axis=row_axis,
+ col_axis=col_axis,
+ channel_axis=channel_axis,
+ fill_mode=fill_mode,
+ cval=cval,
order=interpolation_order)
return x
@@ -195,37 +212,45 @@ def random_channel_shift(x, intensity_range, channel_axis=0):
return apply_channel_shift(x, intensity, channel_axis=channel_axis)
-def apply_brightness_shift(x, brightness):
+def apply_brightness_shift(x, brightness, scale=True):
"""Performs a brightness shift.
# Arguments
x: Input tensor. Must be 3D.
brightness: Float. The new brightness value.
- channel_axis: Index of axis for channels in the input tensor.
+ scale: Whether to rescale the image such that minimum and maximum values
+ are 0 and 255 respectively.
+ Default: True.
# Returns
Numpy image tensor.
# Raises
- ValueError if `brightness_range` isn't a tuple.
+ ImportError: if PIL is not available.
"""
if ImageEnhance is None:
raise ImportError('Using brightness shifts requires PIL. '
'Install PIL or Pillow.')
- x = array_to_img(x)
+ x_min, x_max = np.min(x), np.max(x)
+ local_scale = (x_min < 0) or (x_max > 255)
+ x = array_to_img(x, scale=local_scale or scale)
x = imgenhancer_Brightness = ImageEnhance.Brightness(x)
x = imgenhancer_Brightness.enhance(brightness)
x = img_to_array(x)
+ if not scale and local_scale:
+ x = x / 255 * (x_max - x_min) + x_min
return x
-def random_brightness(x, brightness_range):
+def random_brightness(x, brightness_range, scale=True):
"""Performs a random brightness shift.
# Arguments
x: Input tensor. Must be 3D.
brightness_range: Tuple of floats; brightness range.
- channel_axis: Index of axis for channels in the input tensor.
+ scale: Whether to rescale the image such that minimum and maximum values
+ are 0 and 255 respectively.
+ Default: True.
# Returns
Numpy image tensor.
@@ -239,12 +264,12 @@ def random_brightness(x, brightness_range):
'Received: %s' % (brightness_range,))
u = np.random.uniform(brightness_range[0], brightness_range[1])
- return apply_brightness_shift(x, u)
+ return apply_brightness_shift(x, u, scale)
def transform_matrix_offset_center(matrix, x, y):
- o_x = float(x) / 2 + 0.5
- o_y = float(y) / 2 + 0.5
+ o_x = float(x) / 2 - 0.5
+ o_y = float(y) / 2 - 0.5
offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
@@ -252,20 +277,22 @@ def transform_matrix_offset_center(matrix, x, y):
def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1,
- row_axis=0, col_axis=1, channel_axis=2,
+ row_axis=1, col_axis=2, channel_axis=0,
fill_mode='nearest', cval=0., order=1):
"""Applies an affine transformation specified by the parameters given.
# Arguments
- x: 2D numpy array, single image.
+ x: 3D numpy array - a 2D image with one or more channels.
theta: Rotation angle in degrees.
tx: Width shift.
ty: Heigh shift.
shear: Shear angle in degrees.
zx: Zoom in x direction.
zy: Zoom in y direction
- row_axis: Index of axis for rows in the input image.
- col_axis: Index of axis for columns in the input image.
+ row_axis: Index of axis for rows (aka Y axis) in the input image.
+ Direction: left to right.
+ col_axis: Index of axis for columns (aka X axis) in the input image.
+ Direction: top to bottom.
channel_axis: Index of axis for channels in the input image.
fill_mode: Points outside the boundaries of the input
are filled according to the given mode
@@ -280,6 +307,26 @@ def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1,
if scipy is None:
raise ImportError('Image transformations require SciPy. '
'Install SciPy.')
+
+ # Input sanity checks:
+ # 1. x must 2D image with one or more channels (i.e., a 3D tensor)
+ # 2. channels must be either first or last dimension
+ if np.unique([row_axis, col_axis, channel_axis]).size != 3:
+ raise ValueError("'row_axis', 'col_axis', and 'channel_axis'"
+ " must be distinct")
+
+ # TODO: shall we support negative indices?
+ valid_indices = set([0, 1, 2])
+ actual_indices = set([row_axis, col_axis, channel_axis])
+ if actual_indices != valid_indices:
+ raise ValueError(
+ f"Invalid axis' indices: {actual_indices - valid_indices}")
+
+ if x.ndim != 3:
+ raise ValueError("Input arrays must be multi-channel 2D images.")
+ if channel_axis not in [0, 2]:
+ raise ValueError("Channels are allowed and the first and last dimensions.")
+
transform_matrix = None
if theta != 0:
theta = np.deg2rad(theta)
@@ -321,6 +368,19 @@ def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1,
transform_matrix = transform_matrix_offset_center(
transform_matrix, h, w)
x = np.rollaxis(x, channel_axis, 0)
+
+ # Matrix construction assumes that coordinates are x, y (in that order).
+ # However, regular numpy arrays use y,x (aka i,j) indexing.
+ # Possible solution is:
+ # 1. Swap the x and y axes.
+ # 2. Apply transform.
+ # 3. Swap the x and y axes again to restore image-like data ordering.
+ # Mathematically, it is equivalent to the following transformation:
+ # M' = PMP, where P is the permutation matrix, M is the original
+ # transformation matrix.
+ if col_axis > row_axis:
+ transform_matrix[:, [0, 1]] = transform_matrix[:, [1, 0]]
+ transform_matrix[[0, 1]] = transform_matrix[[1, 0]]
final_affine_matrix = transform_matrix[:2, :2]
final_offset = transform_matrix[:2, 2]
diff --git a/keras_preprocessing/image/dataframe_iterator.py b/keras_preprocessing/image/dataframe_iterator.py
index 5412df2..a1c1cbf 100644
--- a/keras_preprocessing/image/dataframe_iterator.py
+++ b/keras_preprocessing/image/dataframe_iterator.py
@@ -1,11 +1,8 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
import os
import warnings
+from collections import OrderedDict
import numpy as np
@@ -80,6 +77,9 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
If PIL version 1.1.3 or newer is installed, "lanczos" is also
supported. If PIL version 3.4.0 or newer is installed, "box" and
"hamming" are also supported. By default, "nearest" is used.
+ keep_aspect_ratio: Boolean, whether to resize images to a target size
+ without aspect ratio distortion. The image is cropped in the center
+ with target aspect ratio before resizing.
dtype: Dtype to use for the generated arrays.
validate_filenames: Boolean, whether to validate image filenames in
`x_col`. If `True`, invalid images will be ignored. Disabling this option
@@ -89,6 +89,15 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
'binary', 'categorical', 'input', 'multi_output', 'raw', 'sparse', None
}
+ def __new__(cls, *args, **kwargs):
+ try:
+ from tensorflow.keras.utils import Sequence as TFSequence
+ if TFSequence not in cls.__bases__:
+ cls.__bases__ = cls.__bases__ + (TFSequence,)
+ except ImportError:
+ pass
+ return super(DataFrameIterator, cls).__new__(cls)
+
def __init__(self,
dataframe,
directory=None,
@@ -109,6 +118,7 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
save_format='png',
subset=None,
interpolation='nearest',
+ keep_aspect_ratio=False,
dtype='float32',
validate_filenames=True):
@@ -120,7 +130,8 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
save_prefix,
save_format,
subset,
- interpolation)
+ interpolation,
+ keep_aspect_ratio)
df = dataframe.copy()
self.directory = directory or ''
self.class_mode = class_mode
@@ -240,7 +251,8 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
)
if classes:
- classes = set(classes) # sort and prepare for membership lookup
+ # prepare for membership lookup
+ classes = list(OrderedDict.fromkeys(classes).keys())
df[y_col] = df[y_col].apply(lambda x: remove_classes(x, classes))
else:
classes = set()
@@ -249,7 +261,8 @@ class DataFrameIterator(BatchFromFilesMixin, Iterator):
classes.update(v)
else:
classes.add(v)
- return df.dropna(subset=[y_col]), sorted(classes)
+ classes = sorted(classes)
+ return df.dropna(subset=[y_col]), classes
def _filter_valid_filepaths(self, df, x_col):
"""Keep only dataframe rows with valid filenames
diff --git a/keras_preprocessing/image/directory_iterator.py b/keras_preprocessing/image/directory_iterator.py
index 3f75d83..0911eae 100644
--- a/keras_preprocessing/image/directory_iterator.py
+++ b/keras_preprocessing/image/directory_iterator.py
@@ -1,12 +1,7 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
import multiprocessing.pool
-from six.moves import range
+import os
import numpy as np
@@ -51,7 +46,7 @@ class DirectoryIterator(BatchFromFilesMixin, Iterator):
images (if `save_to_dir` is set).
save_format: Format to use for saving sample images
(if `save_to_dir` is set).
- follow_links: boolean,follow symbolic links to subdirectories
+ follow_links: Boolean, follow symbolic links to subdirectories
subset: Subset of data (`"training"` or `"validation"`) if
validation_split is set in ImageDataGenerator.
interpolation: Interpolation method used to resample the image if the
@@ -60,10 +55,22 @@ class DirectoryIterator(BatchFromFilesMixin, Iterator):
If PIL version 1.1.3 or newer is installed, "lanczos" is also
supported. If PIL version 3.4.0 or newer is installed, "box" and
"hamming" are also supported. By default, "nearest" is used.
+ keep_aspect_ratio: Boolean, whether to resize images to a target size
+ without aspect ratio distortion. The image is cropped in the center
+ with target aspect ratio before resizing.
dtype: Dtype to use for generated arrays.
"""
allowed_class_modes = {'categorical', 'binary', 'sparse', 'input', None}
+ def __new__(cls, *args, **kwargs):
+ try:
+ from tensorflow.keras.utils import Sequence as TFSequence
+ if TFSequence not in cls.__bases__:
+ cls.__bases__ = cls.__bases__ + (TFSequence,)
+ except ImportError:
+ pass
+ return super(DirectoryIterator, cls).__new__(cls)
+
def __init__(self,
directory,
image_data_generator,
@@ -81,6 +88,7 @@ class DirectoryIterator(BatchFromFilesMixin, Iterator):
follow_links=False,
subset=None,
interpolation='nearest',
+ keep_aspect_ratio=False,
dtype='float32'):
super(DirectoryIterator, self).set_processing_attrs(image_data_generator,
target_size,
@@ -90,7 +98,8 @@ class DirectoryIterator(BatchFromFilesMixin, Iterator):
save_prefix,
save_format,
subset,
- interpolation)
+ interpolation,
+ keep_aspect_ratio)
self.directory = directory
self.classes = classes
if class_mode not in self.allowed_class_modes:
diff --git a/keras_preprocessing/image/image_data_generator.py b/keras_preprocessing/image/image_data_generator.py
index 5c926eb..e3c2938 100644
--- a/keras_preprocessing/image/image_data_generator.py
+++ b/keras_preprocessing/image/image_data_generator.py
@@ -1,29 +1,15 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
import warnings
-from six.moves import range
import numpy as np
-try:
- import scipy
- # scipy.linalg cannot be accessed until explicitly imported
- from scipy import linalg
- # scipy.ndimage cannot be accessed until explicitly imported
-except ImportError:
- scipy = None
-
+from .affine_transformations import (apply_affine_transform,
+ apply_brightness_shift,
+ apply_channel_shift, flip_axis)
from .dataframe_iterator import DataFrameIterator
from .directory_iterator import DirectoryIterator
from .numpy_array_iterator import NumpyArrayIterator
-from .affine_transformations import (apply_affine_transform,
- apply_brightness_shift,
- apply_channel_shift,
- flip_axis)
class ImageDataGenerator(object):
@@ -49,7 +35,7 @@ class ImageDataGenerator(object):
are integers `[-1, 0, +1]`,
same as with `width_shift_range=[-1, 0, +1]`,
while with `width_shift_range=1.0` possible values are floats
- in the interval [-1.0, +1.0).
+ in the interval `[-1.0, +1.0)`.
height_shift_range: Float, 1-D array-like or int
- float: fraction of total height, if < 1, or pixels if >= 1.
- 1-D array-like: random elements from the array.
@@ -59,7 +45,7 @@ class ImageDataGenerator(object):
are integers `[-1, 0, +1]`,
same as with `height_shift_range=[-1, 0, +1]`,
while with `height_shift_range=1.0` possible values are floats
- in the interval [-1.0, +1.0).
+ in the interval `[-1.0, +1.0)`.
brightness_range: Tuple or list of two floats. Range for picking
a brightness shift value from.
shear_range: Float. Shear Intensity
@@ -87,8 +73,8 @@ class ImageDataGenerator(object):
preprocessing_function: function that will be applied on each input.
The function will run after the image is resized and augmented.
The function should take one argument:
- one image (Numpy tensor with rank 3),
- and should output a Numpy tensor with the same shape.
+ one image (NumPy tensor with rank 3),
+ and should output a NumPy tensor with the same shape.
data_format: Image data format,
either "channels_first" or "channels_last".
"channels_last" mode means that the images should have shape
@@ -320,11 +306,12 @@ class ImageDataGenerator(object):
self.mean = None
self.std = None
- self.principal_components = None
+ self.zca_whitening_matrix = None
- if np.isscalar(zoom_range):
+ if isinstance(zoom_range, (float, int)):
self.zoom_range = [1 - zoom_range, 1 + zoom_range]
- elif len(zoom_range) == 2:
+ elif (len(zoom_range) == 2 and
+ all(isinstance(val, (float, int)) for val in zoom_range)):
self.zoom_range = [zoom_range[0], zoom_range[1]]
else:
raise ValueError('`zoom_range` should be a float or '
@@ -374,14 +361,15 @@ class ImageDataGenerator(object):
save_to_dir=None,
save_prefix='',
save_format='png',
+ ignore_class_split=False,
subset=None):
"""Takes data & label arrays, generates batches of augmented data.
# Arguments
- x: Input data. Numpy array of rank 4 or a tuple.
+ x: Input data. NumPy array of rank 4 or a tuple.
If tuple, the first element
should contain the images and the second element
- another numpy array or a list of numpy arrays
+ another NumPy array or a list of NumPy arrays
that gets passed to the output
without any modifications.
Can be used to feed the model miscellaneous data
@@ -404,18 +392,21 @@ class ImageDataGenerator(object):
(only relevant if `save_to_dir` is set).
save_format: one of "png", "jpeg"
(only relevant if `save_to_dir` is set). Default: "png".
+ ignore_class_split: Boolean (default: False), ignore difference
+ in number of classes in labels across train and validation
+ split (useful for non-classification tasks)
subset: Subset of data (`"training"` or `"validation"`) if
`validation_split` is set in `ImageDataGenerator`.
# Returns
An `Iterator` yielding tuples of `(x, y)`
- where `x` is a numpy array of image data
+ where `x` is a NumPy array of image data
(in the case of a single image input) or a list
- of numpy arrays (in the case with
- additional inputs) and `y` is a numpy array
+ of NumPy arrays (in the case with
+ additional inputs) and `y` is a NumPy array
of corresponding labels. If 'sample_weight' is not None,
the yielded tuples are of the form `(x, y, sample_weight)`.
- If `y` is None, only the numpy array `x` is returned.
+ If `y` is None, only the NumPy array `x` is returned.
"""
return NumpyArrayIterator(
x,
@@ -429,7 +420,9 @@ class ImageDataGenerator(object):
save_to_dir=save_to_dir,
save_prefix=save_prefix,
save_format=save_format,
- subset=subset
+ ignore_class_split=ignore_class_split,
+ subset=subset,
+ dtype=self.dtype
)
def flow_from_directory(self,
@@ -446,7 +439,8 @@ class ImageDataGenerator(object):
save_format='png',
follow_links=False,
subset=None,
- interpolation='nearest'):
+ interpolation='nearest',
+ keep_aspect_ratio=False):
"""Takes the path to a directory & generates batches of augmented data.
# Arguments
@@ -514,17 +508,21 @@ class ImageDataGenerator(object):
supported. If PIL version 3.4.0 or newer is installed,
`"box"` and `"hamming"` are also supported.
By default, `"nearest"` is used.
+ keep_aspect_ratio: Boolean, whether to resize images to a target
+ size without aspect ratio distortion. The image is cropped in
+ the center with target aspect ratio before resizing.
# Returns
A `DirectoryIterator` yielding tuples of `(x, y)`
- where `x` is a numpy array containing a batch
+ where `x` is a NumPy array containing a batch
of images with shape `(batch_size, *target_size, channels)`
- and `y` is a numpy array of corresponding labels.
+ and `y` is a NumPy array of corresponding labels.
"""
return DirectoryIterator(
directory,
self,
target_size=target_size,
+ keep_aspect_ratio=keep_aspect_ratio,
color_mode=color_mode,
classes=classes,
class_mode=class_mode,
@@ -537,7 +535,8 @@ class ImageDataGenerator(object):
save_format=save_format,
follow_links=follow_links,
subset=subset,
- interpolation=interpolation
+ interpolation=interpolation,
+ dtype=self.dtype
)
def flow_from_dataframe(self,
@@ -600,14 +599,14 @@ class ImageDataGenerator(object):
class_mode: one of "binary", "categorical", "input", "multi_output",
"raw", sparse" or None. Default: "categorical".
Mode for yielding the targets:
- - `"binary"`: 1D numpy array of binary labels,
- - `"categorical"`: 2D numpy array of one-hot encoded labels.
+ - `"binary"`: 1D NumPy array of binary labels,
+ - `"categorical"`: 2D NumPy array of one-hot encoded labels.
Supports multi-label output.
- `"input"`: images identical to input images (mainly used to
work with autoencoders),
- `"multi_output"`: list with the values of the different columns,
- - `"raw"`: numpy array of values in `y_col` column(s),
- - `"sparse"`: 1D numpy array of integer labels,
+ - `"raw"`: NumPy array of values in `y_col` column(s),
+ - `"sparse"`: 1D NumPy array of integer labels,
- `None`, no targets are returned (the generator will only yield
batches of image data, which is useful to use in
`model.predict_generator()`).
@@ -639,9 +638,9 @@ class ImageDataGenerator(object):
# Returns
A `DataFrameIterator` yielding tuples of `(x, y)`
- where `x` is a numpy array containing a batch
+ where `x` is a NumPy array containing a batch
of images with shape `(batch_size, *target_size, channels)`
- and `y` is a numpy array of corresponding labels.
+ and `y` is a NumPy array of corresponding labels.
"""
if 'has_ext' in kwargs:
warnings.warn('has_ext is deprecated, filenames in the dataframe have '
@@ -680,19 +679,20 @@ class ImageDataGenerator(object):
save_format=save_format,
subset=subset,
interpolation=interpolation,
- validate_filenames=validate_filenames
+ validate_filenames=validate_filenames,
+ dtype=self.dtype
)
def standardize(self, x):
"""Applies the normalization configuration in-place to a batch of inputs.
`x` is changed in-place since the function is mainly used internally
- to standarize images and feed them to your network. If a copy of `x`
+ to standardize images and feed them to your network. If a copy of `x`
would be created instead it would have a significant performance cost.
If you want to apply this method without changing the input in-place
you can call the method creating a copy before:
- standarize(np.copy(x))
+ standardize(np.copy(x))
# Arguments
x: Batch of inputs to be normalized.
@@ -727,10 +727,10 @@ class ImageDataGenerator(object):
'been fit on any training data. Fit it '
'first by calling `.fit(numpy_data)`.')
if self.zca_whitening:
- if self.principal_components is not None:
- flatx = np.reshape(x, (-1, np.prod(x.shape[-3:])))
- whitex = np.dot(flatx, self.principal_components)
- x = np.reshape(whitex, x.shape)
+ if self.zca_whitening_matrix is not None:
+ flat_x = x.reshape(-1, np.prod(x.shape[-3:]))
+ white_x = flat_x @ self.zca_whitening_matrix
+ x = np.reshape(white_x, x.shape)
else:
warnings.warn('This ImageDataGenerator specifies '
'`zca_whitening`, but it hasn\'t '
@@ -845,7 +845,7 @@ class ImageDataGenerator(object):
- `'zy'`: Float. Zoom in the y direction.
- `'flip_horizontal'`: Boolean. Horizontal flip.
- `'flip_vertical'`: Boolean. Vertical flip.
- - `'channel_shift_intencity'`: Float. Channel shift intensity.
+ - `'channel_shift_intensity'`: Float. Channel shift intensity.
- `'brightness'`: Float. Brightness shift intensity.
# Returns
@@ -881,7 +881,7 @@ class ImageDataGenerator(object):
x = flip_axis(x, img_row_axis)
if transform_parameters.get('brightness') is not None:
- x = apply_brightness_shift(x, transform_parameters['brightness'])
+ x = apply_brightness_shift(x, transform_parameters['brightness'], False)
return x
@@ -910,6 +910,9 @@ class ImageDataGenerator(object):
Only required if `featurewise_center` or
`featurewise_std_normalization` or `zca_whitening` are set to True.
+ When `rescale` is set to a value, rescaling is applied to
+ sample data before computing the internal data stats.
+
# Arguments
x: Sample data. Should have rank 4.
In case of grayscale data,
@@ -943,6 +946,9 @@ class ImageDataGenerator(object):
np.random.seed(seed)
x = np.copy(x)
+ if self.rescale:
+ x *= self.rescale
+
if augment:
ax = np.zeros(
tuple([rounds * x.shape[0]] + list(x.shape)[1:]),
@@ -967,12 +973,9 @@ class ImageDataGenerator(object):
x /= (self.std + 1e-6)
if self.zca_whitening:
- if scipy is None:
- raise ImportError('Using zca_whitening requires SciPy. '
- 'Install SciPy.')
- flat_x = np.reshape(
- x, (x.shape[0], x.shape[1] * x.shape[2] * x.shape[3]))
- sigma = np.dot(flat_x.T, flat_x) / flat_x.shape[0]
- u, s, _ = linalg.svd(sigma)
- s_inv = 1. / np.sqrt(s[np.newaxis] + self.zca_epsilon)
- self.principal_components = (u * s_inv).dot(u.T)
+ n = len(x)
+ flat_x = np.reshape(x, (n, -1))
+
+ u, s, _ = np.linalg.svd(flat_x.T, full_matrices=False)
+ s_inv = np.sqrt(n) / (s + self.zca_epsilon)
+ self.zca_whitening_matrix = (u * s_inv).dot(u.T)
diff --git a/keras_preprocessing/image/iterator.py b/keras_preprocessing/image/iterator.py
index f5a9b6c..c62b1d3 100644
--- a/keras_preprocessing/image/iterator.py
+++ b/keras_preprocessing/image/iterator.py
@@ -1,12 +1,10 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
import os
import threading
+
import numpy as np
+
from keras_preprocessing import get_keras_submodule
try:
@@ -14,9 +12,7 @@ try:
except ImportError:
IteratorType = object
-from .utils import (array_to_img,
- img_to_array,
- load_img)
+from .utils import array_to_img, img_to_array, load_img
class Iterator(IteratorType):
@@ -142,7 +138,8 @@ class BatchFromFilesMixin():
save_prefix,
save_format,
subset,
- interpolation):
+ interpolation,
+ keep_aspect_ratio):
"""Sets attributes to use later for processing files into a batch.
# Arguments
@@ -171,6 +168,7 @@ class BatchFromFilesMixin():
"""
self.image_data_generator = image_data_generator
self.target_size = tuple(target_size)
+ self.keep_aspect_ratio = keep_aspect_ratio
if color_mode not in {'rgb', 'rgba', 'grayscale'}:
raise ValueError('Invalid color mode:', color_mode,
'; expected "rgb", "rgba", or "grayscale".')
@@ -227,7 +225,8 @@ class BatchFromFilesMixin():
img = load_img(filepaths[j],
color_mode=self.color_mode,
target_size=self.target_size,
- interpolation=self.interpolation)
+ interpolation=self.interpolation,
+ keep_aspect_ratio=self.keep_aspect_ratio)
x = img_to_array(img, data_format=self.data_format)
# Pillow images should be closed after `load_img`,
# but not PIL images.
diff --git a/keras_preprocessing/image/numpy_array_iterator.py b/keras_preprocessing/image/numpy_array_iterator.py
index f03434b..f90aaec 100644
--- a/keras_preprocessing/image/numpy_array_iterator.py
+++ b/keras_preprocessing/image/numpy_array_iterator.py
@@ -1,11 +1,8 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
import os
import warnings
+
import numpy as np
from .iterator import Iterator
@@ -39,9 +36,21 @@ class NumpyArrayIterator(Iterator):
(if `save_to_dir` is set).
subset: Subset of data (`"training"` or `"validation"`) if
validation_split is set in ImageDataGenerator.
+ ignore_class_split: Boolean (default: False), ignore difference
+ in number of classes in labels across train and validation
+ split (useful for non-classification tasks)
dtype: Dtype to use for the generated arrays.
"""
+ def __new__(cls, *args, **kwargs):
+ try:
+ from tensorflow.keras.utils import Sequence as TFSequence
+ if TFSequence not in cls.__bases__:
+ cls.__bases__ = cls.__bases__ + (TFSequence,)
+ except ImportError:
+ pass
+ return super(NumpyArrayIterator, cls).__new__(cls)
+
def __init__(self,
x,
y,
@@ -55,6 +64,7 @@ class NumpyArrayIterator(Iterator):
save_prefix='',
save_format='png',
subset=None,
+ ignore_class_split=False,
dtype='float32'):
self.dtype = dtype
if (type(x) is tuple) or (type(x) is list):
@@ -89,7 +99,7 @@ class NumpyArrayIterator(Iterator):
'; expected "training" or "validation".')
split_idx = int(len(x) * image_data_generator._validation_split)
- if (y is not None and not
+ if (y is not None and not ignore_class_split and not
np.array_equal(np.unique(y[:split_idx]),
np.unique(y[split_idx:]))):
raise ValueError('Training and validation subsets '
diff --git a/keras_preprocessing/image/utils.py b/keras_preprocessing/image/utils.py
index 39be888..91c5580 100644
--- a/keras_preprocessing/image/utils.py
+++ b/keras_preprocessing/image/utils.py
@@ -1,17 +1,15 @@
"""Utilities for real-time data augmentation on image data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
+import io
import os
import warnings
+from pathlib import Path
import numpy as np
try:
- from PIL import ImageEnhance
from PIL import Image as pil_image
+ from PIL import ImageEnhance
except ImportError:
pil_image = None
ImageEnhance = None
@@ -76,14 +74,15 @@ def save_img(path,
def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
- interpolation='nearest'):
+ interpolation='nearest', keep_aspect_ratio=False):
"""Loads an image into PIL format.
# Arguments
- path: Path to image file.
+ path: Path (string), pathlib.Path object, or io.BytesIO stream to image file.
grayscale: DEPRECATED use `color_mode="grayscale"`.
- color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb".
- The desired image format.
+ color_mode: The desired image format. One of "grayscale", "rgb", "rgba".
+ "grayscale" supports 8-bit images and 32-bit signed integer images.
+ Default: "rgb".
target_size: Either `None` (default to original size)
or tuple of ints `(img_height, img_width)`.
interpolation: Interpolation method used to resample the image if the
@@ -91,7 +90,11 @@ def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
Supported methods are "nearest", "bilinear", and "bicubic".
If PIL version 1.1.3 or newer is installed, "lanczos" is also
supported. If PIL version 3.4.0 or newer is installed, "box" and
- "hamming" are also supported. By default, "nearest" is used.
+ "hamming" are also supported.
+ Default: "nearest".
+ keep_aspect_ratio: Boolean, whether to resize images to a target
+ size without aspect ratio distortion. The image is cropped in
+ the center with target aspect ratio before resizing.
# Returns
A PIL Image instance.
@@ -99,6 +102,7 @@ def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
# Raises
ImportError: if PIL is not available.
ValueError: if interpolation method is not supported.
+ TypeError: type of 'path' should be path-like or io.Byteio.
"""
if grayscale is True:
warnings.warn('grayscale is deprecated. Please use '
@@ -107,9 +111,21 @@ def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
if pil_image is None:
raise ImportError('Could not import PIL.Image. '
'The use of `load_img` requires PIL.')
- img = pil_image.open(path)
+ if isinstance(path, io.BytesIO):
+ img = pil_image.open(path)
+ elif isinstance(path, (Path, bytes, str)):
+ if isinstance(path, Path):
+ path = str(path.resolve())
+ with open(path, 'rb') as f:
+ img = pil_image.open(io.BytesIO(f.read()))
+ else:
+ raise TypeError('path should be path-like or io.BytesIO'
+ ', not {}'.format(type(path)))
+
if color_mode == 'grayscale':
- if img.mode != 'L':
+ # if image is not already an 8-bit, 16-bit or 32-bit grayscale image
+ # convert it to an 8-bit grayscale image.
+ if img.mode not in ('L', 'I;16', 'I'):
img = img.convert('L')
elif color_mode == 'rgba':
if img.mode != 'RGBA':
@@ -129,7 +145,30 @@ def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
interpolation,
", ".join(_PIL_INTERPOLATION_METHODS.keys())))
resample = _PIL_INTERPOLATION_METHODS[interpolation]
- img = img.resize(width_height_tuple, resample)
+
+ if keep_aspect_ratio:
+ width, height = img.size
+ target_width, target_height = width_height_tuple
+
+ crop_height = (width * target_height) // target_width
+ crop_width = (height * target_width) // target_height
+
+ # Set back to input height / width
+ # if crop_height / crop_width is not smaller.
+ crop_height = min(height, crop_height)
+ crop_width = min(width, crop_width)
+
+ crop_box_hstart = (height - crop_height) // 2
+ crop_box_wstart = (width - crop_width) // 2
+ crop_box_wend = crop_box_wstart + crop_width
+ crop_box_hend = crop_box_hstart + crop_height
+ crop_box = [
+ crop_box_wstart, crop_box_hstart, crop_box_wend,
+ crop_box_hend
+ ]
+ img = img.resize(width_height_tuple, resample, box=crop_box)
+ else:
+ img = img.resize(width_height_tuple, resample)
return img
@@ -202,12 +241,11 @@ def _list_valid_filenames_in_directory(directory, white_list_formats, split,
"""
dirname = os.path.basename(directory)
if split:
- num_files = len(list(
- _iter_valid_files(directory, white_list_formats, follow_links)))
+ all_files = list(_iter_valid_files(directory, white_list_formats,
+ follow_links))
+ num_files = len(all_files)
start, stop = int(split[0] * num_files), int(split[1] * num_files)
- valid_files = list(
- _iter_valid_files(
- directory, white_list_formats, follow_links))[start: stop]
+ valid_files = all_files[start: stop]
else:
valid_files = _iter_valid_files(
directory, white_list_formats, follow_links)
@@ -228,11 +266,13 @@ def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'):
# Arguments
x: Input Numpy array.
- data_format: Image data format.
- either "channels_first" or "channels_last".
- scale: Whether to rescale image values
- to be within `[0, 255]`.
+ data_format: Image data format, either "channels_first" or "channels_last".
+ Default: "channels_last".
+ scale: Whether to rescale the image such that minimum and maximum values
+ are 0 and 255 respectively.
+ Default: True.
dtype: Dtype to use.
+ Default: "float32".
# Returns
A PIL Image instance.
@@ -258,7 +298,7 @@ def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'):
if data_format == 'channels_first':
x = x.transpose(1, 2, 0)
if scale:
- x = x + max(-np.min(x), 0)
+ x = x - np.min(x)
x_max = np.max(x)
if x_max != 0:
x /= x_max
@@ -271,6 +311,9 @@ def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'):
return pil_image.fromarray(x.astype('uint8'), 'RGB')
elif x.shape[2] == 1:
# grayscale
+ if np.max(x) > 255:
+ # 32-bit signed integer grayscale image. PIL mode "I"
+ return pil_image.fromarray(x[:, :, 0].astype('int32'), 'I')
return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L')
else:
raise ValueError('Unsupported channel number: %s' % (x.shape[2],))
diff --git a/keras_preprocessing/sequence.py b/keras_preprocessing/sequence.py
index 0e03002..74660ce 100644
--- a/keras_preprocessing/sequence.py
+++ b/keras_preprocessing/sequence.py
@@ -1,15 +1,10 @@
# -*- coding: utf-8 -*-
"""Utilities for preprocessing sequence data.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+import json
+import random
import numpy as np
-import random
-import json
-from six.moves import range
-import six
def pad_sequences(sequences, maxlen=None, dtype='int32',
@@ -23,7 +18,8 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
or the length of the longest sequence otherwise.
Sequences that are shorter than `num_timesteps`
- are padded with `value` at the end.
+ are padded with `value` at the beginning or the end
+ if padding='post.
Sequences longer than `num_timesteps` are truncated
so that they fit the desired length.
@@ -56,9 +52,18 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
num_samples = len(sequences)
lengths = []
+ sample_shape = ()
+ flag = True
+
+ # take the sample shape from the first non empty sequence
+ # checking for consistency in the main loop below.
+
for x in sequences:
try:
lengths.append(len(x))
+ if flag and len(x):
+ sample_shape = np.asarray(x).shape[1:]
+ flag = False
except TypeError:
raise ValueError('`sequences` must be a list of iterables. '
'Found non-iterable: ' + str(x))
@@ -66,16 +71,8 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
if maxlen is None:
maxlen = np.max(lengths)
- # take the sample shape from the first non empty sequence
- # checking for consistency in the main loop below.
- sample_shape = tuple()
- for s in sequences:
- if len(s) > 0:
- sample_shape = np.asarray(s).shape[1:]
- break
-
is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype(dtype, np.unicode_)
- if isinstance(value, six.string_types) and dtype != object and not is_dtype_str:
+ if isinstance(value, str) and dtype != object and not is_dtype_str:
raise ValueError("`dtype` {} is not compatible with `value`'s type: {}\n"
"You should set `dtype=object` for variable length strings."
.format(dtype, type(value)))
diff --git a/keras_preprocessing/text.py b/keras_preprocessing/text.py
index 573e411..9c30dc9 100644
--- a/keras_preprocessing/text.py
+++ b/keras_preprocessing/text.py
@@ -1,26 +1,14 @@
# -*- coding: utf-8 -*-
"""Utilities for text input preprocessing.
"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import string
-import sys
+import json
import warnings
-from collections import OrderedDict
-from collections import defaultdict
+from collections import OrderedDict, defaultdict
from hashlib import md5
-import json
import numpy as np
-from six.moves import range
-from six.moves import zip
-if sys.version_info < (3,):
- maketrans = string.maketrans
-else:
- maketrans = str.maketrans
+maketrans = str.maketrans
def text_to_word_sequence(text,
@@ -42,20 +30,9 @@ def text_to_word_sequence(text,
if lower:
text = text.lower()
- if sys.version_info < (3,):
- if isinstance(text, unicode):
- translate_map = dict((ord(c), unicode(split)) for c in filters)
- text = text.translate(translate_map)
- elif len(split) == 1:
- translate_map = maketrans(filters, split * len(filters))
- text = text.translate(translate_map)
- else:
- for c in filters:
- text = text.replace(c, split)
- else:
- translate_dict = dict((c, split) for c in filters)
- translate_map = maketrans(translate_dict)
- text = text.translate(translate_map)
+ translate_dict = {c: split for c in filters}
+ translate_map = maketrans(translate_dict)
+ text = text.translate(translate_map)
seq = text.split(split)
return [i for i in seq if i]
@@ -64,7 +41,8 @@ def text_to_word_sequence(text,
def one_hot(text, n,
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True,
- split=' '):
+ split=' ',
+ analyzer=None):
"""One-hot encodes a text into a list of word indexes of size n.
This is a wrapper to the `hashing_trick` function using `hash` as the
@@ -78,6 +56,7 @@ def one_hot(text, n,
includes basic punctuation, tabs, and newlines.
lower: boolean. Whether to set the text to lowercase.
split: str. Separator for word splitting.
+ analyzer: function. Custom analyzer to split the text
# Returns
List of integers in [1, n]. Each integer encodes a word
@@ -87,14 +66,16 @@ def one_hot(text, n,
hash_function=hash,
filters=filters,
lower=lower,
- split=split)
+ split=split,
+ analyzer=analyzer)
def hashing_trick(text, n,
hash_function=None,
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True,
- split=' '):
+ split=' ',
+ analyzer=None):
"""Converts a text to a sequence of indexes in a fixed-size hashing space.
# Arguments
@@ -110,6 +91,7 @@ def hashing_trick(text, n,
includes basic punctuation, tabs, and newlines.
lower: boolean. Whether to set the text to lowercase.
split: str. Separator for word splitting.
+ analyzer: function. Custom analyzer to split the text
# Returns
A list of integer word indices (unicity non-guaranteed).
@@ -129,10 +111,14 @@ def hashing_trick(text, n,
def hash_function(w):
return int(md5(w.encode()).hexdigest(), 16)
- seq = text_to_word_sequence(text,
- filters=filters,
- lower=lower,
- split=split)
+ if analyzer is None:
+ seq = text_to_word_sequence(text,
+ filters=filters,
+ lower=lower,
+ split=split)
+ else:
+ seq = analyzer(text)
+
return [(hash_function(w) % (n - 1) + 1) for w in seq]
@@ -156,6 +142,8 @@ class Tokenizer(object):
char_level: if True, every character will be treated as a token.
oov_token: if given, it will be added to word_index and used to
replace out-of-vocabulary words during text_to_sequence calls
+ analyzer: function. Custom analyzer to split the text.
+ The default analyzer is text_to_word_sequence
By default, all punctuation is removed, turning the texts into
space-separated sequences of words
@@ -171,13 +159,14 @@ class Tokenizer(object):
split=' ',
char_level=False,
oov_token=None,
- document_count=0,
+ analyzer=None,
**kwargs):
# Legacy support
if 'nb_words' in kwargs:
warnings.warn('The `nb_words` argument in `Tokenizer` '
'has been renamed `num_words`.')
num_words = kwargs.pop('nb_words')
+ document_count = kwargs.pop('document_count', 0)
if kwargs:
raise TypeError('Unrecognized keyword arguments: ' + str(kwargs))
@@ -191,8 +180,9 @@ class Tokenizer(object):
self.char_level = char_level
self.oov_token = oov_token
self.index_docs = defaultdict(int)
- self.word_index = dict()
- self.index_word = dict()
+ self.word_index = {}
+ self.index_word = {}
+ self.analyzer = analyzer
def fit_on_texts(self, texts):
"""Updates internal vocabulary based on a list of texts.
@@ -217,10 +207,13 @@ class Tokenizer(object):
text = text.lower()
seq = text
else:
- seq = text_to_word_sequence(text,
- self.filters,
- self.lower,
- self.split)
+ if self.analyzer is None:
+ seq = text_to_word_sequence(text,
+ filters=self.filters,
+ lower=self.lower,
+ split=self.split)
+ else:
+ seq = self.analyzer(text)
for w in seq:
if w in self.word_counts:
self.word_counts[w] += 1
@@ -241,9 +234,9 @@ class Tokenizer(object):
# note that index 0 is reserved, never assigned to an existing word
self.word_index = dict(
- list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))))
+ zip(sorted_voc, list(range(1, len(sorted_voc) + 1))))
- self.index_word = dict((c, w) for w, c in self.word_index.items())
+ self.index_word = {c: w for w, c in self.word_index.items()}
for w, c in list(self.word_docs.items()):
self.index_docs[self.word_index[w]] = c
@@ -304,10 +297,13 @@ class Tokenizer(object):
text = text.lower()
seq = text
else:
- seq = text_to_word_sequence(text,
- self.filters,
- self.lower,
- self.split)
+ if self.analyzer is None:
+ seq = text_to_word_sequence(text,
+ filters=self.filters,
+ lower=self.lower,
+ split=self.split)
+ else:
+ seq = self.analyzer(text)
vect = []
for w in seq:
i = self.word_index.get(w)
diff --git a/setup.cfg b/setup.cfg
index 8e548af..fea2ab3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,4 @@
-[tool:pytest]
+[tool:pytest]]
addopts = -v -n 2 --durations=20
norecursedirs = build
@@ -11,5 +11,4 @@ pep8ignore = * E731 \
[egg_info]
tag_build =
tag_date = 0
-tag_svn_revision = 0
diff --git a/setup.py b/setup.py
index bb8dfac..bdbb88c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,4 @@
-import sys
-
-from setuptools import setup
-from setuptools import find_packages
+from setuptools import find_packages, setup
long_description = '''
Keras Preprocessing is the data preprocessing
@@ -18,26 +15,25 @@ from an up-to-date installation of Keras:
from keras import preprocessing
```
-Keras Preprocessing is compatible with Python 2.7-3.6
+Keras Preprocessing is compatible with Python 3.6
and is distributed under the MIT license.
'''
setup(name='Keras_Preprocessing',
- version='1.1.0',
+ version='1.1.2',
description='Easy data preprocessing and data augmentation '
'for deep learning models',
long_description=long_description,
author='Keras Team',
url='https://github.com/keras-team/keras-preprocessing',
download_url='https://github.com/keras-team/'
- 'keras-preprocessing/tarball/1.1.0',
+ 'keras-preprocessing/tarball/1.1.2',
license='MIT',
- install_requires=['numpy>=1.9.1',
- 'six>=1.9.0'],
+ install_requires=['numpy>=1.9.1'],
extras_require={
'tests': ['pandas',
- 'Pillow' if sys.version_info >= (3, 0) else 'pillow',
- 'tensorflow==1.7', # CPU version
+ 'Pillow',
+ 'tensorflow', # CPU version
'keras',
'pytest',
'pytest-xdist',
@@ -52,8 +48,6 @@ setup(name='Keras_Preprocessing',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Topic :: Software Development :: Libraries',
diff --git a/tests/image/affine_transformations_test.py b/tests/image/affine_transformations_test.py
index 2eba2e1..9a9079f 100644
--- a/tests/image/affine_transformations_test.py
+++ b/tests/image/affine_transformations_test.py
@@ -15,16 +15,135 @@ def test_random_transforms():
def test_deterministic_transform():
x = np.ones((3, 3, 3))
x_rotated = np.array([[[0., 0., 0.],
- [0., 0., 0.],
- [1., 1., 1.]],
- [[0., 0., 0.],
+ [1., 1., 1.],
+ [0., 0., 0.]],
+ [[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]],
[[0., 0., 0.],
- [0., 0., 0.],
- [1., 1., 1.]]])
- assert np.allclose(affine_transformations.apply_affine_transform(
- x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated)
+ [1., 1., 1.],
+ [0., 0., 0.]]])
+ assert np.allclose(
+ affine_transformations.apply_affine_transform(x,
+ theta=45,
+ row_axis=0,
+ col_axis=1,
+ channel_axis=2,
+ fill_mode='constant'),
+ x_rotated)
+
+
+def test_matrix_center():
+ x = np.expand_dims(np.array([
+ [0, 1],
+ [0, 0],
+ ]), -1)
+ x_rotated90 = np.expand_dims(np.array([
+ [1, 0],
+ [0, 0],
+ ]), -1)
+
+ assert np.allclose(
+ affine_transformations.apply_affine_transform(x,
+ theta=90,
+ row_axis=0,
+ col_axis=1,
+ channel_axis=2),
+ x_rotated90)
+
+
+def test_translation():
+ x = np.array([
+ [0, 0, 0, 0],
+ [0, 1, 0, 0],
+ [0, 0, 0, 0],
+ ])
+ x_up = np.array([
+ [0, 1, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ ])
+ x_dn = np.array([
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 1, 0, 0],
+ ])
+ x_left = np.array([
+ [0, 0, 0, 0],
+ [1, 0, 0, 0],
+ [0, 0, 0, 0],
+ ])
+ x_right = np.array([
+ [0, 0, 0, 0],
+ [0, 0, 1, 0],
+ [0, 0, 0, 0],
+ ])
+
+ # Channels first
+ x_test = np.expand_dims(x, 0)
+
+ # Horizontal translation
+ assert np.alltrue(x_left == np.squeeze(
+ affine_transformations.apply_affine_transform(x_test, tx=1)))
+ assert np.alltrue(x_right == np.squeeze(
+ affine_transformations.apply_affine_transform(x_test, tx=-1)))
+
+ # change axes: x<->y
+ assert np.alltrue(x_left == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=1, row_axis=2, col_axis=1)))
+ assert np.alltrue(x_right == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=-1, row_axis=2, col_axis=1)))
+
+ # Vertical translation
+ assert np.alltrue(x_up == np.squeeze(
+ affine_transformations.apply_affine_transform(x_test, ty=1)))
+ assert np.alltrue(x_dn == np.squeeze(
+ affine_transformations.apply_affine_transform(x_test, ty=-1)))
+
+ # change axes: x<->y
+ assert np.alltrue(x_up == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=1, row_axis=2, col_axis=1)))
+ assert np.alltrue(x_dn == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=-1, row_axis=2, col_axis=1)))
+
+ # Channels last
+ x_test = np.expand_dims(x, -1)
+
+ # Horizontal translation
+ assert np.alltrue(x_left == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=1, row_axis=0, col_axis=1, channel_axis=2)))
+ assert np.alltrue(x_right == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=-1, row_axis=0, col_axis=1, channel_axis=2)))
+
+ # change axes: x<->y
+ assert np.alltrue(x_left == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=1, row_axis=1, col_axis=0, channel_axis=2)))
+ assert np.alltrue(x_right == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=-1, row_axis=1, col_axis=0, channel_axis=2)))
+
+ # Vertical translation
+ assert np.alltrue(x_up == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=1, row_axis=0, col_axis=1, channel_axis=2)))
+ assert np.alltrue(x_dn == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, ty=-1, row_axis=0, col_axis=1, channel_axis=2)))
+
+ # change axes: x<->y
+ assert np.alltrue(x_up == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=1, row_axis=1, col_axis=0, channel_axis=2)))
+ assert np.alltrue(x_dn == np.squeeze(
+ affine_transformations.apply_affine_transform(
+ x_test, tx=-1, row_axis=1, col_axis=0, channel_axis=2)))
def test_random_zoom():
@@ -46,7 +165,7 @@ def test_apply_brightness_shift_error(monkeypatch):
def test_random_brightness(monkeypatch):
monkeypatch.setattr(affine_transformations,
- 'apply_brightness_shift', lambda x, y: (x, y))
+ 'apply_brightness_shift', lambda x, y, z: (x, y))
assert (0, 3.) == affine_transformations.random_brightness(0, (3, 3))
@@ -55,6 +174,33 @@ def test_random_brightness_error():
affine_transformations.random_brightness(0, [0])
+def test_random_brightness_scale():
+ img = np.ones((1, 1, 3)) * 128
+ zeros = np.zeros((1, 1, 3))
+ must_be_128 = affine_transformations.random_brightness(img, [1, 1], False)
+ assert np.array_equal(img, must_be_128)
+ must_be_0 = affine_transformations.random_brightness(img, [1, 1], True)
+ assert np.array_equal(zeros, must_be_0)
+
+
+def test_random_brightness_scale_outside_range_positive():
+ img = np.ones((1, 1, 3)) * 1024
+ zeros = np.zeros((1, 1, 3))
+ must_be_1024 = affine_transformations.random_brightness(img, [1, 1], False)
+ assert np.array_equal(img, must_be_1024)
+ must_be_0 = affine_transformations.random_brightness(img, [1, 1], True)
+ assert np.array_equal(zeros, must_be_0)
+
+
+def test_random_brightness_scale_outside_range_negative():
+ img = np.ones((1, 1, 3)) * -1024
+ zeros = np.zeros((1, 1, 3))
+ must_be_neg_1024 = affine_transformations.random_brightness(img, [1, 1], False)
+ assert np.array_equal(img, must_be_neg_1024)
+ must_be_0 = affine_transformations.random_brightness(img, [1, 1], True)
+ assert np.array_equal(zeros, must_be_0)
+
+
def test_apply_affine_transform_error(monkeypatch):
monkeypatch.setattr(affine_transformations, 'scipy', None)
with pytest.raises(ImportError):
diff --git a/tests/image/dataframe_iterator_test.py b/tests/image/dataframe_iterator_test.py
index cc89fa1..632279a 100644
--- a/tests/image/dataframe_iterator_test.py
+++ b/tests/image/dataframe_iterator_test.py
@@ -5,11 +5,9 @@ import shutil
import numpy as np
import pandas as pd
import pytest
-
from PIL import Image
-from keras_preprocessing.image import dataframe_iterator
-from keras_preprocessing.image import image_data_generator
+from keras_preprocessing.image import dataframe_iterator, image_data_generator
@pytest.fixture(scope='module')
@@ -256,7 +254,7 @@ def test_dataframe_iterator_class_mode_categorical_multi_label(all_test_images,
assert isinstance(batch_y, np.ndarray)
assert batch_y.shape == (len(batch_x), 2)
for labels in batch_y:
- assert all(l in {0, 1} for l in labels)
+ assert all(label in {0, 1} for label in labels)
# on first 3 batches
df = pd.DataFrame({
@@ -272,7 +270,7 @@ def test_dataframe_iterator_class_mode_categorical_multi_label(all_test_images,
assert isinstance(batch_y, np.ndarray)
assert batch_y.shape == (len(batch_x), 3)
for labels in batch_y:
- assert all(l in {0, 1} for l in labels)
+ assert all(label in {0, 1} for label in labels)
assert (batch_y[0] == np.array([1, 1, 0])).all()
assert (batch_y[1] == np.array([0, 1, 0])).all()
assert (batch_y[2] == np.array([0, 0, 1])).all()
@@ -647,5 +645,47 @@ def test_dataframe_iterator_with_subdirs(all_test_images, tmpdir):
assert set(df_iterator.filenames) == set(filenames)
+def test_dataframe_iterator_classes_indices_order(all_test_images, tmpdir):
+ # save the images in the paths
+ count = 0
+ filenames = []
+ for test_images in all_test_images:
+ for im in test_images:
+ filename = 'image-{}.png'.format(count)
+ im.save(str(tmpdir / filename))
+ filenames.append(filename)
+ count += 1
+
+ # Test the class_indices without classes input
+ generator = image_data_generator.ImageDataGenerator()
+ label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']]
+ df_f = pd.DataFrame({
+ "filename": filenames,
+ "class": ['a', 'b'] + [random.choice(label_opt) for _ in filenames[:-2]]
+ })
+ flow_forward_iter = generator.flow_from_dataframe(df_f, str(tmpdir))
+ label_rev = ['b', 'a', ['b'], ['a'], ['b', 'a'], ['a', 'b']]
+ df_r = pd.DataFrame({
+ "filename": filenames,
+ "class": ['b', 'a'] + [random.choice(label_rev) for _ in filenames[:-2]]
+ })
+ flow_backward_iter = generator.flow_from_dataframe(df_r, str(tmpdir))
+
+ # check class_indices
+ assert flow_forward_iter.class_indices == flow_backward_iter.class_indices
+
+ # Test the class_indices with classes input
+ generator_2 = image_data_generator.ImageDataGenerator()
+ df_f2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']],
+ columns=['filename', 'class'])
+ flow_forward = generator_2.flow_from_dataframe(df_f2, classes=['A', 'B'])
+ df_b2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']],
+ columns=['filename', 'class'])
+ flow_backward = generator_2.flow_from_dataframe(df_b2, classes=['B', 'A'])
+
+ # check class_indices
+ assert flow_forward.class_indices != flow_backward.class_indices
+
+
if __name__ == '__main__':
pytest.main([__file__])
diff --git a/tests/image/directory_iterator_test.py b/tests/image/directory_iterator_test.py
index 36230b7..5225e11 100644
--- a/tests/image/directory_iterator_test.py
+++ b/tests/image/directory_iterator_test.py
@@ -4,7 +4,6 @@ import tempfile
import numpy as np
import pytest
-
from PIL import Image
from keras_preprocessing.image import image_data_generator
@@ -16,23 +15,35 @@ def all_test_images():
rgb_images = []
rgba_images = []
gray_images = []
+ gray_images_16bit = []
+ gray_images_32bit = []
for n in range(8):
bias = np.random.rand(img_w, img_h, 1) * 64
variance = np.random.rand(img_w, img_h, 1) * (255 - 64)
+ # RGB
imarray = np.random.rand(img_w, img_h, 3) * variance + bias
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
rgb_images.append(im)
-
+ # RGBA
imarray = np.random.rand(img_w, img_h, 4) * variance + bias
im = Image.fromarray(imarray.astype('uint8')).convert('RGBA')
rgba_images.append(im)
-
+ # 8-bit grayscale
imarray = np.random.rand(img_w, img_h, 1) * variance + bias
- im = Image.fromarray(
- imarray.astype('uint8').squeeze()).convert('L')
+ im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
gray_images.append(im)
+ # 16-bit grayscale
+ imarray = np.array(
+ np.random.randint(-2147483648, 2147483647, (img_w, img_h))
+ )
+ im = Image.fromarray(imarray.astype('uint16'))
+ gray_images_16bit.append(im)
+ # 32-bit grayscale
+ im = Image.fromarray(imarray.astype('uint32'))
+ gray_images_32bit.append(im)
- return [rgb_images, rgba_images, gray_images]
+ return [rgb_images, rgba_images,
+ gray_images, gray_images_16bit, gray_images_32bit]
def test_directory_iterator(all_test_images, tmpdir):
@@ -101,7 +112,7 @@ def test_directory_iterator(all_test_images, tmpdir):
color_mode='rgb',
batch_size=3,
class_mode='categorical')
- assert len(dir_seq) == np.ceil(count / 3)
+ assert len(dir_seq) == np.ceil(count / 3.)
x1, y1 = dir_seq[1]
assert x1.shape == (3, 26, 26, 3)
assert y1.shape == (3, num_classes)
@@ -109,7 +120,7 @@ def test_directory_iterator(all_test_images, tmpdir):
assert (x1 == 0).all()
with pytest.raises(ValueError):
- x1, y1 = dir_seq[9]
+ x1, y1 = dir_seq[14] # there are 40 images and batch size is 3
def test_directory_iterator_class_mode_input(all_test_images, tmpdir):
@@ -140,9 +151,9 @@ def test_directory_iterator_class_mode_input(all_test_images, tmpdir):
@pytest.mark.parametrize('validation_split,num_training', [
- (0.25, 18),
- (0.50, 12),
- (0.75, 6),
+ (0.25, 30),
+ (0.50, 20),
+ (0.75, 10),
])
def test_directory_iterator_with_validation_split(all_test_images,
validation_split,
diff --git a/tests/image/image_data_generator_test.py b/tests/image/image_data_generator_test.py
index f5f9e1c..f6071fd 100644
--- a/tests/image/image_data_generator_test.py
+++ b/tests/image/image_data_generator_test.py
@@ -1,10 +1,8 @@
import numpy as np
import pytest
-
from PIL import Image
-from keras_preprocessing.image import image_data_generator
-from keras_preprocessing.image import utils
+from keras_preprocessing.image import image_data_generator, utils
@pytest.fixture(scope='module')
@@ -81,6 +79,12 @@ def test_image_data_generator_with_validation_split(all_test_images):
shuffle=False, batch_size=10,
subset='validation')
+ # test non categorical labels with validation split
+ generator.flow(images, labels,
+ shuffle=False, batch_size=10,
+ ignore_class_split=True,
+ subset='validation')
+
labels = np.concatenate([
np.zeros((int(len(images) / 4),)),
np.ones((int(len(images) / 4),)),
@@ -383,14 +387,14 @@ def test_deterministic_transform():
x[:, ::-1, :])
x = np.ones((3, 3, 3))
x_rotated = np.array([[[0., 0., 0.],
- [0., 0., 0.],
- [1., 1., 1.]],
- [[0., 0., 0.],
+ [1., 1., 1.],
+ [0., 0., 0.]],
+ [[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]],
[[0., 0., 0.],
- [0., 0., 0.],
- [1., 1., 1.]]])
+ [1., 1., 1.],
+ [0., 0., 0.]]])
assert np.allclose(generator.apply_transform(x, {'theta': 45}),
x_rotated)
@@ -442,5 +446,51 @@ def test_random_transforms():
assert transform_dict['brightness'] is None
+def test_fit_rescale(all_test_images):
+ rescale = 1. / 255
+
+ for test_images in all_test_images:
+ img_list = []
+ for im in test_images:
+ img_list.append(utils.img_to_array(im)[None, ...])
+ images = np.vstack(img_list)
+
+ # featurewise_center test
+ generator = image_data_generator.ImageDataGenerator(
+ rescale=rescale,
+ featurewise_center=True,
+ dtype='float64')
+ generator.fit(images)
+ batch = generator.flow(images, batch_size=8).next()
+ assert abs(np.mean(batch)) < 1e-6
+
+ # featurewise_std_normalization test
+ generator = image_data_generator.ImageDataGenerator(
+ rescale=rescale,
+ featurewise_center=True,
+ featurewise_std_normalization=True,
+ dtype='float64')
+ generator.fit(images)
+ batch = generator.flow(images, batch_size=8).next()
+ assert abs(np.mean(batch)) < 1e-6
+ assert abs(1 - np.std(batch)) < 1e-5
+
+ # zca_whitening test
+ generator = image_data_generator.ImageDataGenerator(
+ rescale=rescale,
+ featurewise_center=True,
+ zca_whitening=True,
+ dtype='float64')
+ generator.fit(images)
+ batch = generator.flow(images, batch_size=8).next()
+ batch = np.reshape(batch,
+ (batch.shape[0],
+ batch.shape[1] * batch.shape[2] * batch.shape[3]))
+ # Y * Y_T = n * I, where Y = W * X
+ identity = np.dot(batch, batch.T) / batch.shape[0]
+ assert ((np.abs(identity) - np.identity(identity.shape[0]))
+ < 1e-6).all()
+
+
if __name__ == '__main__':
pytest.main([__file__])
diff --git a/tests/image/numpy_array_iterator_test.py b/tests/image/numpy_array_iterator_test.py
index ff75f66..17a0301 100644
--- a/tests/image/numpy_array_iterator_test.py
+++ b/tests/image/numpy_array_iterator_test.py
@@ -1,10 +1,8 @@
import numpy as np
import pytest
-
from PIL import Image
-from keras_preprocessing.image import numpy_array_iterator
-from keras_preprocessing.image import utils
+from keras_preprocessing.image import numpy_array_iterator, utils
from keras_preprocessing.image.image_data_generator import ImageDataGenerator
diff --git a/tests/image/utils_test.py b/tests/image/utils_test.py
index 7fbac47..f70b5aa 100644
--- a/tests/image/utils_test.py
+++ b/tests/image/utils_test.py
@@ -1,4 +1,9 @@
+import io
+import resource
+from pathlib import Path
+
import numpy as np
+import PIL
import pytest
from keras_preprocessing.image import utils
@@ -20,6 +25,9 @@ def test_validate_filename(tmpdir):
def test_load_img(tmpdir):
filename_rgb = str(tmpdir / 'rgb_utils.png')
filename_rgba = str(tmpdir / 'rgba_utils.png')
+ filename_grayscale_8bit = str(tmpdir / 'grayscale_8bit_utils.png')
+ filename_grayscale_16bit = str(tmpdir / 'grayscale_16bit_utils.tiff')
+ filename_grayscale_32bit = str(tmpdir / 'grayscale_32bit_utils.tiff')
original_rgb_array = np.array(255 * np.random.rand(100, 100, 3),
dtype=np.uint8)
@@ -31,6 +39,26 @@ def test_load_img(tmpdir):
original_rgba = utils.array_to_img(original_rgba_array, scale=False)
original_rgba.save(filename_rgba)
+ original_grayscale_8bit_array = np.array(255 * np.random.rand(100, 100, 1),
+ dtype=np.uint8)
+ original_grayscale_8bit = utils.array_to_img(original_grayscale_8bit_array,
+ scale=False)
+ original_grayscale_8bit.save(filename_grayscale_8bit)
+
+ original_grayscale_16bit_array = np.array(
+ np.random.randint(-2147483648, 2147483647, (100, 100, 1)), dtype=np.int16
+ )
+ original_grayscale_16bit = utils.array_to_img(original_grayscale_16bit_array,
+ scale=False, dtype='int16')
+ original_grayscale_16bit.save(filename_grayscale_16bit)
+
+ original_grayscale_32bit_array = np.array(
+ np.random.randint(-2147483648, 2147483647, (100, 100, 1)), dtype=np.int32
+ )
+ original_grayscale_32bit = utils.array_to_img(original_grayscale_32bit_array,
+ scale=False, dtype='int32')
+ original_grayscale_32bit.save(filename_grayscale_32bit)
+
# Test that loaded image is exactly equal to original.
loaded_im = utils.load_img(filename_rgb)
@@ -48,6 +76,27 @@ def test_load_img(tmpdir):
assert loaded_im_array.shape == (original_rgb_array.shape[0],
original_rgb_array.shape[1], 1)
+ loaded_im = utils.load_img(filename_grayscale_8bit, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert loaded_im_array.shape == original_grayscale_8bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_8bit_array)
+
+ loaded_im = utils.load_img(filename_grayscale_16bit, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int16')
+ assert loaded_im_array.shape == original_grayscale_16bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_16bit_array)
+ # test casting int16 image to float32
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert np.allclose(loaded_im_array, original_grayscale_16bit_array)
+
+ loaded_im = utils.load_img(filename_grayscale_32bit, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int32')
+ assert loaded_im_array.shape == original_grayscale_32bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+ # test casting int32 image to float32
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert np.allclose(loaded_im_array, original_grayscale_32bit_array)
+
# Test that nothing is changed when target size is equal to original.
loaded_im = utils.load_img(filename_rgb, target_size=(100, 100))
@@ -67,6 +116,24 @@ def test_load_img(tmpdir):
assert loaded_im_array.shape == (original_rgba_array.shape[0],
original_rgba_array.shape[1], 1)
+ loaded_im = utils.load_img(filename_grayscale_8bit, color_mode='grayscale',
+ target_size=(100, 100))
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert loaded_im_array.shape == original_grayscale_8bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_8bit_array)
+
+ loaded_im = utils.load_img(filename_grayscale_16bit, color_mode='grayscale',
+ target_size=(100, 100))
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int16')
+ assert loaded_im_array.shape == original_grayscale_16bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_16bit_array)
+
+ loaded_im = utils.load_img(filename_grayscale_32bit, color_mode='grayscale',
+ target_size=(100, 100))
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int32')
+ assert loaded_im_array.shape == original_grayscale_32bit_array.shape
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+
# Test down-sampling with bilinear interpolation.
loaded_im = utils.load_img(filename_rgb, target_size=(25, 25))
@@ -83,6 +150,21 @@ def test_load_img(tmpdir):
loaded_im_array = utils.img_to_array(loaded_im)
assert loaded_im_array.shape == (25, 25, 1)
+ loaded_im = utils.load_img(filename_grayscale_8bit, color_mode='grayscale',
+ target_size=(25, 25))
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert loaded_im_array.shape == (25, 25, 1)
+
+ loaded_im = utils.load_img(filename_grayscale_16bit, color_mode='grayscale',
+ target_size=(25, 25))
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int16')
+ assert loaded_im_array.shape == (25, 25, 1)
+
+ loaded_im = utils.load_img(filename_grayscale_32bit, color_mode='grayscale',
+ target_size=(25, 25))
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int32')
+ assert loaded_im_array.shape == (25, 25, 1)
+
# Test down-sampling with nearest neighbor interpolation.
loaded_im_nearest = utils.load_img(filename_rgb, target_size=(25, 25),
@@ -98,6 +180,43 @@ def test_load_img(tmpdir):
assert loaded_im_array_nearest.shape == (25, 25, 4)
assert np.any(loaded_im_array_nearest != loaded_im_array)
+ loaded_im = utils.load_img(filename_grayscale_8bit, color_mode='grayscale',
+ target_size=(25, 25), interpolation="nearest")
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert loaded_im_array.shape == (25, 25, 1)
+
+ loaded_im = utils.load_img(filename_grayscale_16bit, color_mode='grayscale',
+ target_size=(25, 25), interpolation="nearest")
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int16')
+ assert loaded_im_array.shape == (25, 25, 1)
+
+ loaded_im = utils.load_img(filename_grayscale_32bit, color_mode='grayscale',
+ target_size=(25, 25), interpolation="nearest")
+ loaded_im_array = utils.img_to_array(loaded_im, dtype='int32')
+ assert loaded_im_array.shape == (25, 25, 1)
+
+ # Test different path type
+ with open(filename_grayscale_32bit, 'rb') as f:
+ _path = io.BytesIO(f.read()) # io.Bytesio
+ loaded_im = utils.load_img(_path, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype=np.int32)
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+
+ _path = filename_grayscale_32bit # str
+ loaded_im = utils.load_img(_path, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype=np.int32)
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+
+ _path = filename_grayscale_32bit.encode() # bytes
+ loaded_im = utils.load_img(_path, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype=np.int32)
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+
+ _path = Path(tmpdir / 'grayscale_32bit_utils.tiff') # Path
+ loaded_im = utils.load_img(_path, color_mode='grayscale')
+ loaded_im_array = utils.img_to_array(loaded_im, dtype=np.int32)
+ assert np.all(loaded_im_array == original_grayscale_32bit_array)
+
# Check that exception is raised if interpolation not supported.
loaded_im = utils.load_img(filename_rgb, interpolation="unsupported")
@@ -105,6 +224,28 @@ def test_load_img(tmpdir):
loaded_im = utils.load_img(filename_rgb, target_size=(25, 25),
interpolation="unsupported")
+ # Check that the aspect ratio of a square is the same
+
+ filename_red_square = str(tmpdir / 'red_square_utils.png')
+ A = np.zeros((50, 100, 3), dtype=np.uint8) # rectangle image 100x50
+ A[20:30, 45:55, 0] = 255 # red square 10x10
+ red_square_array = np.array(A)
+ red_square = utils.array_to_img(red_square_array, scale=False)
+ red_square.save(filename_red_square)
+
+ loaded_im = utils.load_img(filename_red_square, target_size=(25, 25),
+ keep_aspect_ratio=True)
+ loaded_im_array = utils.img_to_array(loaded_im)
+ assert loaded_im_array.shape == (25, 25, 3)
+
+ red_channel_arr = loaded_im_array[:, :, 0].astype(np.bool)
+ square_width = np.sum(np.sum(red_channel_arr, axis=0))
+ square_height = np.sum(np.sum(red_channel_arr, axis=1))
+ aspect_ratio_result = square_width / square_height
+
+ # original square had 1:1 ratio
+ assert aspect_ratio_result == pytest.approx(1.0)
+
def test_list_pictures(tmpdir):
filenames = ['test.png', 'test0.jpg', 'test-1.jpeg', '2test.bmp',
@@ -150,6 +291,17 @@ def test_array_to_img_and_img_to_array():
x = utils.img_to_array(img, data_format='channels_first')
assert x.shape == (1, height, width)
+ # grayscale 32-bit signed integer
+ x = np.array(
+ np.random.randint(-2147483648, 2147483647, (1, height, width)),
+ dtype=np.int32
+ )
+ img = utils.array_to_img(x, data_format='channels_first')
+ assert img.size == (width, height)
+
+ x = utils.img_to_array(img, data_format='channels_first')
+ assert x.shape == (1, height, width)
+
# Test tf data format
# Test RGB 3D
x = np.random.random((height, width, 3))
@@ -175,6 +327,28 @@ def test_array_to_img_and_img_to_array():
x = utils.img_to_array(img, data_format='channels_last')
assert x.shape == (height, width, 1)
+ # grayscale 16-bit signed integer
+ x = np.array(
+ np.random.randint(-2147483648, 2147483647, (height, width, 1)),
+ dtype=np.int16
+ )
+ img = utils.array_to_img(x, data_format='channels_last')
+ assert img.size == (width, height)
+
+ x = utils.img_to_array(img, data_format='channels_last')
+ assert x.shape == (height, width, 1)
+
+ # grayscale 32-bit signed integer
+ x = np.array(
+ np.random.randint(-2147483648, 2147483647, (height, width, 1)),
+ dtype=np.int32
+ )
+ img = utils.array_to_img(x, data_format='channels_last')
+ assert img.size == (width, height)
+
+ x = utils.img_to_array(img, data_format='channels_last')
+ assert x.shape == (height, width, 1)
+
# Test invalid use case
with pytest.raises(ValueError):
x = np.random.random((height, width)) # not 3D
@@ -201,5 +375,25 @@ def test_array_to_img_and_img_to_array():
img = utils.img_to_array(x, data_format='channels_last')
+def write_sample_image(tmpdir):
+ im = utils.array_to_img(np.random.rand(1, 1, 3))
+ path = str(tmpdir / 'sample_image.png')
+ utils.save_img(path, im)
+ return path
+
+
+def test_image_file_handlers_close(tmpdir):
+ path = write_sample_image(tmpdir)
+ max_open_files, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
+ for i in range(max_open_files+1):
+ utils.load_img(path)
+
+
+def test_load_img_returns_image(tmpdir):
+ path = write_sample_image(tmpdir)
+ im = utils.load_img(path)
+ assert isinstance(im, PIL.Image.Image)
+
+
if __name__ == '__main__':
pytest.main([__file__])
diff --git a/tests/sequence_test.py b/tests/sequence_test.py
index 36fe1d7..246ca66 100644
--- a/tests/sequence_test.py
+++ b/tests/sequence_test.py
@@ -1,9 +1,8 @@
from math import ceil
-import pytest
+
import numpy as np
-from numpy.testing import assert_allclose
-from numpy.testing import assert_equal
-from numpy.testing import assert_raises
+import pytest
+from numpy.testing import assert_allclose, assert_equal, assert_raises
from keras_preprocessing import sequence
@@ -101,8 +100,8 @@ def test_skipgrams():
categorical=True)
for couple in couples:
assert couple[0] - couple[1] <= 3
- for l in labels:
- assert len(l) == 2
+ for label in labels:
+ assert len(label) == 2
def test_remove_long_seq():
diff --git a/tests/test_documentation.py b/tests/test_documentation.py
index 5839a5b..12a113b 100644
--- a/tests/test_documentation.py
+++ b/tests/test_documentation.py
@@ -1,7 +1,6 @@
import importlib
import inspect
import re
-import sys
from itertools import compress
import pytest
@@ -163,7 +162,6 @@ def handle_module(mod):
handle_module(mem)
-@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3")
def test_doc():
for module in modules:
mod = importlib.import_module(module)
diff --git a/tests/text_test.py b/tests/text_test.py
index e39aebb..c6bf2da 100644
--- a/tests/text_test.py
+++ b/tests/text_test.py
@@ -1,10 +1,11 @@
# -*- coding: utf-8 -*-
+from collections import OrderedDict
+
import numpy as np
import pytest
+from tensorflow import keras
-import keras
from keras_preprocessing import text
-from collections import OrderedDict
def test_one_hot():
@@ -14,6 +15,13 @@ def test_one_hot():
assert np.max(encoded) <= 4
assert np.min(encoded) >= 0
+ sample_text = 'The-cat-sat-on-the-mat'
+ encoded2 = text.one_hot(sample_text, 5, analyzer=lambda t: t.lower().split('-'))
+ assert encoded == encoded2
+ assert len(encoded) == 6
+ assert np.max(encoded) <= 4
+ assert np.min(encoded) >= 0
+
def test_hashing_trick_hash():
sample_text = 'The cat sat on the mat.'
Debdiff
[The following lists of changes regard files as different if they have different names, permissions or owners.]
Files in second set of .debs but not in first
-rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.2.egg-info/PKG-INFO -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.2.egg-info/dependency_links.txt -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.2.egg-info/requires.txt -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.2.egg-info/top_level.txt
Files in first set of .debs but not in second
-rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.0.egg-info/PKG-INFO -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.0.egg-info/dependency_links.txt -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.0.egg-info/requires.txt -rw-r--r-- root/root /usr/lib/python3/dist-packages/Keras_Preprocessing-1.1.0.egg-info/top_level.txt
Control files: lines which differ (wdiff format)
Depends: python3-numpy, python3-six (>= 1.9.0), python3:any (>= 3.6~)