New Upstream Release - python-itemloaders
Ready changes
Summary
Merged new upstream version: 1.1.0 (was: 1.0.6).
Diff
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 2336baf..a9f550e 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.0.6
+current_version = 1.1.0
commit = True
tag = True
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..cc6b873
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# Apply black format
+627f3bd9ea5210f40dbd5697eff9351bb5af019c
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 85daa7d..fb85c8c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -3,6 +3,15 @@ on:
- pull_request
- push
jobs:
+ pre-commit:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ with:
+ python-version: "3.11"
+ - uses: pre-commit/action@v3.0.0
+
tests:
runs-on: ubuntu-latest
strategy:
@@ -11,9 +20,9 @@ jobs:
- python-version: 3
env:
TOXENV: docs
- - python-version: 3.6
+ - python-version: 3
env:
- TOXENV: py
+ TOXENV: twinecheck
- python-version: 3.7
env:
TOXENV: py
@@ -32,6 +41,9 @@ jobs:
- python-version: '3.10'
env:
TOXENV: py
+ - python-version: '3.11'
+ env:
+ TOXENV: py
steps:
- uses: actions/checkout@v2
- name: Install system libraries
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..57fee0e
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,24 @@
+default_language_version:
+ python: python3.10
+repos:
+ - hooks:
+ - id: black
+ language_version: python3
+ repo: https://github.com/ambv/black
+ rev: 23.3.0
+ - hooks:
+ - id: isort
+ language_version: python3
+ repo: https://github.com/PyCQA/isort
+ rev: 5.12.0
+ - hooks:
+ - id: flake8
+ language_version: python3
+ additional_dependencies:
+ - flake8-bugbear
+ - flake8-comprehensions
+ - flake8-debugger
+ - flake8-docstrings
+ - flake8-string-format
+ repo: https://github.com/pycqa/flake8
+ rev: 6.0.0
diff --git a/README.rst b/README.rst
index 81751ad..a33b041 100644
--- a/README.rst
+++ b/README.rst
@@ -73,4 +73,5 @@ All contributions are welcome!
* File an `issue here <https://github.com/scrapy/itemloaders/issues>`_, if there isn't one yet
* Fork this repository
* Create a branch to work on your changes
+ * Run `pre-commit install` to install pre-commit hooks
* Push your local branch and submit a Pull Request
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..d8aa6b9
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,6 @@
+comment:
+ layout: "header, diff, tree"
+
+coverage:
+ status:
+ project: false
diff --git a/debian/changelog b/debian/changelog
index fac8dd5..661475e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+python-itemloaders (1.1.0-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk> Fri, 02 Jun 2023 17:59:49 -0000
+
python-itemloaders (1.0.6-1) unstable; urgency=medium
* New upstream version.
diff --git a/docs/_ext/__init__.py b/docs/_ext/__init__.py
index 8b13789..e69de29 100644
--- a/docs/_ext/__init__.py
+++ b/docs/_ext/__init__.py
@@ -1 +0,0 @@
-
diff --git a/docs/_ext/github.py b/docs/_ext/github.py
index e1adcfc..00f8783 100644
--- a/docs/_ext/github.py
+++ b/docs/_ext/github.py
@@ -1,19 +1,31 @@
+from typing import Optional
+
from docutils import nodes
from docutils.parsers.rst.roles import set_classes
def setup(app):
- app.add_role('gh', github_role)
+ app.add_role("gh", github_role)
-def github_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
+def github_role(
+ name,
+ rawtext,
+ text,
+ lineno,
+ inliner,
+ options: Optional[dict] = None,
+ content: Optional[list] = None,
+):
+ options = options or {}
+ content = content or []
if text.isdigit():
- display_text = f'#{text}'
- url = f'https://github.com/scrapy/itemloaders/issues/{text}'
+ display_text = f"#{text}"
+ url = f"https://github.com/scrapy/itemloaders/issues/{text}"
else:
short_commit = text[:7]
display_text = short_commit
- url = f'https://github.com/scrapy/itemloaders/commit/{short_commit}'
+ url = f"https://github.com/scrapy/itemloaders/commit/{short_commit}"
set_classes(options)
node = nodes.reference(rawtext, display_text, refuri=url, **options)
diff --git a/docs/conf.py b/docs/conf.py
index 9713489..862dde8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,6 +15,8 @@ import sys
from datetime import datetime
from os import path
+import sphinx_rtd_theme
+
# If your extensions are in another directory, add it here. If the directory
# is relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
@@ -27,72 +29,72 @@ sys.path.insert(0, path.dirname(path.dirname(__file__)))
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = [
- '_ext.github',
- 'sphinx.ext.autodoc',
- 'sphinx.ext.coverage',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.viewcode',
+ "_ext.github",
+ "sphinx.ext.autodoc",
+ "sphinx.ext.coverage",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.viewcode",
]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
# The encoding of source files.
-#source_encoding = 'utf-8'
+# source_encoding = 'utf-8'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'itemloaders'
-copyright = '2020–{}, Zyte Group Ltd'.format(datetime.now().year)
+project = "itemloaders"
+copyright = "2020–{}, Zyte Group Ltd".format(datetime.now().year)
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
-version = ''
-release = ''
+version = ""
+release = ""
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
-language = 'en'
+language = "en"
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-#today = ''
+# today = ''
# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
# List of documents that shouldn't be included in the build.
-#unused_docs = []
+# unused_docs = []
-exclude_patterns = ['build']
+exclude_patterns = ["build"]
# List of directories, relative to source directory, that shouldn't be searched
# for source files.
-exclude_trees = ['.build']
+exclude_trees = [".build"]
# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-#show_authors = False
+# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# Options for HTML output
@@ -100,17 +102,17 @@ pygments_style = 'sphinx'
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
-#html_theme_options = {}
+# html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
# Add path to the RTD explicitly to robustify builds (otherwise might
# fail in a clean Debian build env)
-import sphinx_rtd_theme
+
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
@@ -121,19 +123,19 @@ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
-#html_title = None
+# html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-#html_logo = None
+# html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
-#html_favicon = None
+# html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
@@ -142,23 +144,23 @@ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
-html_last_updated_fmt = '%b %d, %Y'
+html_last_updated_fmt = "%b %d, %Y"
# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
# If false, no module index is generated.
-#html_use_modindex = True
+# html_use_modindex = True
# If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
# If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
# If true, the reST sources are included in the HTML build as _sources/<name>.
html_copy_source = True
@@ -166,68 +168,68 @@ html_copy_source = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
# Output file base name for HTML help builder.
-htmlhelp_basename = 'itemloadersdoc'
+htmlhelp_basename = "itemloadersdoc"
# Options for LaTeX output
# ------------------------
# The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+# latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
+# latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, document class [howto/manual]).
latex_documents = [
- ('index', 'itemloaders.tex', 'itemloaders Documentation', 'Zyte', 'manual'),
+ ("index", "itemloaders.tex", "itemloaders Documentation", "Zyte", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-#latex_logo = None
+# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
# Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
+# latex_preamble = ''
# Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
# If false, no module index is generated.
-#latex_use_modindex = True
+# latex_use_modindex = True
# autodocs
def setup(app):
- app.connect('autodoc-skip-member', maybe_skip_member)
+ app.connect("autodoc-skip-member", maybe_skip_member)
def maybe_skip_member(app, what, name, obj, skip, options):
if not skip:
# autodocs was generating a text "alias of" for the following members
# https://github.com/sphinx-doc/sphinx/issues/4422
- return name in {'default_item_class', 'default_selector_class'}
+ return name in {"default_item_class", "default_selector_class"}
return skip
nitpicky = True
intersphinx_mapping = {
- 'parsel': ('https://parsel.readthedocs.io/en/stable/', None),
- 'python': ('https://docs.python.org/3', None),
- 'scrapy': ('https://docs.scrapy.org/en/latest/', None),
- 'w3lib': ('https://w3lib.readthedocs.io/en/latest', None),
+ "parsel": ("https://parsel.readthedocs.io/en/stable/", None),
+ "python": ("https://docs.python.org/3", None),
+ "scrapy": ("https://docs.scrapy.org/en/latest/", None),
+ "w3lib": ("https://w3lib.readthedocs.io/en/latest", None),
}
diff --git a/docs/release-notes.rst b/docs/release-notes.rst
index a62c601..df6a1be 100644
--- a/docs/release-notes.rst
+++ b/docs/release-notes.rst
@@ -5,14 +5,33 @@
Release notes
=============
+.. _release-1.1.0:
+
+itemloaders 1.1.0 (2023-04-21)
+------------------------------
+
+- Added JMESPath support (:meth:`ItemLoader.add_jmes` etc.), requiring Parsel
+ 1.8.1+ (:gh:`68`)
+
+- Added official support for Python 3.11 (:gh:`59`)
+
+- Removed official support for Python 3.6 (:gh:`61`)
+
+- Internal code cleanup (:gh:`65`, :gh:`66`)
+
+- Added ``pre-commit`` support and applied changes from ``black`` and
+ ``flake8`` (:gh:`70`).
+
+- Improved CI (:gh:`60`)
+
.. _release-1.0.6:
itemloaders 1.0.6 (2022-08-29)
------------------------------
Fixes a regression introduced in 1.0.5 that would cause the ``re`` parameter of
-:meth:`ItemLoader.add_xpath` and similar methods to be passed to lxml, which
-would trigger an exception when the value of ``re`` was a compiled pattern and
+:meth:`ItemLoader.add_xpath` and similar methods to be passed to lxml, which
+would trigger an exception when the value of ``re`` was a compiled pattern and
not a string (:gh:`56`)
.. _release-1.0.5:
diff --git a/itemloaders/__init__.py b/itemloaders/__init__.py
index 775e04d..deb80f4 100644
--- a/itemloaders/__init__.py
+++ b/itemloaders/__init__.py
@@ -19,7 +19,7 @@ def unbound_method(method):
(no need to define an unused first 'self' argument)
"""
with suppress(AttributeError):
- if '.' not in method.__qualname__:
+ if "." not in method.__qualname__:
return method.__func__
return method
@@ -36,12 +36,12 @@ class ItemLoader:
:param item: The item instance to populate using subsequent calls to
:meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
- or :meth:`~ItemLoader.add_value`.
+ :meth:`~ItemLoader.add_jmes` or :meth:`~ItemLoader.add_value`.
:type item: :class:`dict` object
:param selector: The selector to extract data from, when using the
- :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
- (resp. :meth:`replace_css`) method.
+ :meth:`add_xpath` (resp. :meth:`add_css`, :meth:`add_jmes`) or :meth:`replace_xpath`
+ (resp. :meth:`replace_css`, :meth:`replace_jmes`) method.
:type selector: :class:`~parsel.selector.Selector` object
The item, selector and the remaining keyword arguments are
@@ -105,7 +105,7 @@ class ItemLoader:
if item is None:
item = self.default_item_class()
self._local_item = item
- context['item'] = item
+ context["item"] = item
self.context = context
self.parent = parent
self._local_values = {}
@@ -138,9 +138,7 @@ class ItemLoader:
"""
selector = self.selector.xpath(xpath)
context.update(selector=selector)
- subloader = self.__class__(
- item=self.item, parent=self, **context
- )
+ subloader = self.__class__(item=self.item, parent=self, **context)
return subloader
def nested_css(self, css, **context):
@@ -153,9 +151,7 @@ class ItemLoader:
"""
selector = self.selector.css(css)
context.update(selector=selector)
- subloader = self.__class__(
- item=self.item, parent=self, **context
- )
+ subloader = self.__class__(item=self.item, parent=self, **context)
return subloader
def add_value(self, field_name, value, *processors, re=None, **kw):
@@ -246,9 +242,10 @@ class ItemLoader:
try:
value = proc(value)
except Exception as e:
- raise ValueError("Error with processor %s value=%r error='%s: %s'" %
- (_proc.__class__.__name__, value,
- type(e).__name__, str(e)))
+ raise ValueError(
+ "Error with processor %s value=%r error='%s: %s'"
+ % (_proc.__class__.__name__, value, type(e).__name__, str(e))
+ ) from e
return value
def load_item(self):
@@ -276,30 +273,28 @@ class ItemLoader:
try:
return proc(value)
except Exception as e:
- raise ValueError("Error with output processor: field=%r value=%r error='%s: %s'" %
- (field_name, value, type(e).__name__, str(e)))
+ raise ValueError(
+ "Error with output processor: field=%r value=%r error='%s: %s'"
+ % (field_name, value, type(e).__name__, str(e))
+ ) from e
def get_collected_values(self, field_name):
"""Return the collected values for the given field."""
return self._values.get(field_name, [])
def get_input_processor(self, field_name):
- proc = getattr(self, '%s_in' % field_name, None)
+ proc = getattr(self, "%s_in" % field_name, None)
if not proc:
proc = self._get_item_field_attr(
- field_name,
- 'input_processor',
- self.default_input_processor
+ field_name, "input_processor", self.default_input_processor
)
return unbound_method(proc)
def get_output_processor(self, field_name):
- proc = getattr(self, '%s_out' % field_name, None)
+ proc = getattr(self, "%s_out" % field_name, None)
if not proc:
proc = self._get_item_field_attr(
- field_name,
- 'output_processor',
- self.default_output_processor
+ field_name, "output_processor", self.default_output_processor
)
return unbound_method(proc)
@@ -316,8 +311,15 @@ class ItemLoader:
except Exception as e:
raise ValueError(
"Error with input processor %s: field=%r value=%r "
- "error='%s: %s'" % (_proc.__class__.__name__, field_name,
- value, type(e).__name__, str(e)))
+ "error='%s: %s'"
+ % (
+ _proc.__class__.__name__,
+ field_name,
+ value,
+ type(e).__name__,
+ str(e),
+ )
+ ) from e
def _check_selector_method(self):
if self.selector is None:
@@ -402,14 +404,14 @@ class ItemLoader:
# HTML snippet: <p id="price">the price is $1200</p>
loader.add_css('price', 'p#price', re='the price is (.*)')
"""
- values = self._get_cssvalues(css, **kw)
+ values = self._get_cssvalues(css)
self.add_value(field_name, values, *processors, re=re, **kw)
def replace_css(self, field_name, css, *processors, re=None, **kw):
"""
Similar to :meth:`add_css` but replaces collected data instead of adding it.
"""
- values = self._get_cssvalues(css, **kw)
+ values = self._get_cssvalues(css)
self.replace_value(field_name, values, *processors, re=re, **kw)
def get_css(self, css, *processors, re=None, **kw):
@@ -432,10 +434,70 @@ class ItemLoader:
# HTML snippet: <p id="price">the price is $1200</p>
loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
"""
- values = self._get_cssvalues(css, **kw)
+ values = self._get_cssvalues(css)
return self.get_value(values, *processors, re=re, **kw)
- def _get_cssvalues(self, csss, **kw):
+ def _get_cssvalues(self, csss):
self._check_selector_method()
csss = arg_to_iter(csss)
return flatten(self.selector.css(css).getall() for css in csss)
+
+ def add_jmes(self, field_name, jmes, *processors, re=None, **kw):
+ """
+ Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector
+ instead of a value, which is used to extract a list of unicode strings
+ from the selector associated with this :class:`ItemLoader`.
+
+ See :meth:`get_jmes` for ``kwargs``.
+
+ :param jmes: the JMESPath selector to extract data from
+ :type jmes: str
+
+ Examples::
+
+ # HTML snippet: {"name": "Color TV"}
+ loader.add_jmes('name')
+ # HTML snippet: {"price": the price is $1200"}
+ loader.add_jmes('price', TakeFirst(), re='the price is (.*)')
+ """
+ values = self._get_jmesvalues(jmes)
+ self.add_value(field_name, values, *processors, re=re, **kw)
+
+ def replace_jmes(self, field_name, jmes, *processors, re=None, **kw):
+ """
+ Similar to :meth:`add_jmes` but replaces collected data instead of adding it.
+ """
+ values = self._get_jmesvalues(jmes)
+ self.replace_value(field_name, values, *processors, re=re, **kw)
+
+ def get_jmes(self, jmes, *processors, re=None, **kw):
+ """
+ Similar to :meth:`ItemLoader.get_value` but receives a JMESPath selector
+ instead of a value, which is used to extract a list of unicode strings
+ from the selector associated with this :class:`ItemLoader`.
+
+ :param jmes: the JMESPath selector to extract data from
+ :type jmes: str
+
+ :param re: a regular expression to use for extracting data from the
+ selected JMESPath
+ :type re: str or typing.Pattern
+
+ Examples::
+
+ # HTML snippet: {"name": "Color TV"}
+ loader.get_jmes('name')
+ # HTML snippet: {"price": the price is $1200"}
+ loader.get_jmes('price', TakeFirst(), re='the price is (.*)')
+ """
+ values = self._get_jmesvalues(jmes)
+ return self.get_value(values, *processors, re=re, **kw)
+
+ def _get_jmesvalues(self, jmess):
+ self._check_selector_method()
+ jmess = arg_to_iter(jmess)
+ if not hasattr(self.selector, "jmespath"):
+ raise AttributeError(
+ "Please install parsel >= 1.8.1 to get jmespath support"
+ )
+ return flatten(self.selector.jmespath(jmes).getall() for jmes in jmess)
diff --git a/itemloaders/common.py b/itemloaders/common.py
index 4fc24c6..6c0b7fa 100644
--- a/itemloaders/common.py
+++ b/itemloaders/common.py
@@ -1,6 +1,7 @@
"""Common functions used in Item Loaders code"""
from functools import partial
+
from itemloaders.utils import get_func_args
@@ -8,7 +9,7 @@ def wrap_loader_context(function, context):
"""Wrap functions that receive loader_context to contain the context
"pre-loaded" and expose a interface that receives only one argument
"""
- if 'loader_context' in get_func_args(function):
+ if "loader_context" in get_func_args(function):
return partial(function, loader_context=context)
else:
return function
diff --git a/itemloaders/processors.py b/itemloaders/processors.py
index 4bf3440..c4aa039 100644
--- a/itemloaders/processors.py
+++ b/itemloaders/processors.py
@@ -5,8 +5,8 @@ See documentation in docs/topics/loaders.rst
"""
from collections import ChainMap
-from itemloaders.utils import arg_to_iter
from itemloaders.common import wrap_loader_context
+from itemloaders.utils import arg_to_iter
class MapCompose:
@@ -51,7 +51,7 @@ class MapCompose:
See :class:`Compose` processor for more info.
.. _`parsel selectors`: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector.extract
- """
+ """ # noqa
def __init__(self, *functions, **default_loader_context):
self.functions = functions
@@ -70,10 +70,11 @@ class MapCompose:
try:
next_values += arg_to_iter(func(v))
except Exception as e:
- raise ValueError("Error in MapCompose with "
- "%s value=%r error='%s: %s'" %
- (str(func), value, type(e).__name__,
- str(e)))
+ raise ValueError(
+ "Error in MapCompose with "
+ "%s value=%r error='%s: %s'"
+ % (str(func), value, type(e).__name__, str(e))
+ ) from e
values = next_values
return values
@@ -109,7 +110,7 @@ class Compose:
def __init__(self, *functions, **default_loader_context):
self.functions = functions
- self.stop_on_none = default_loader_context.get('stop_on_none', True)
+ self.stop_on_none = default_loader_context.get("stop_on_none", True)
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
@@ -124,9 +125,11 @@ class Compose:
try:
value = func(value)
except Exception as e:
- raise ValueError("Error in Compose with "
- "%s value=%r error='%s: %s'" %
- (str(func), value, type(e).__name__, str(e)))
+ raise ValueError(
+ "Error in Compose with "
+ "%s value=%r error='%s: %s'"
+ % (str(func), value, type(e).__name__, str(e))
+ ) from e
return value
@@ -146,7 +149,7 @@ class TakeFirst:
def __call__(self, values):
for value in values:
- if value is not None and value != '':
+ if value is not None and value != "":
return value
@@ -197,6 +200,7 @@ class SelectJmes:
def __init__(self, json_path):
self.json_path = json_path
import jmespath
+
self.compiled_path = jmespath.compile(self.json_path)
def __call__(self, value):
@@ -226,7 +230,7 @@ class Join:
'one<br>two<br>three'
"""
- def __init__(self, separator=' '):
+ def __init__(self, separator=" "):
self.separator = separator
def __call__(self, values):
diff --git a/itemloaders/utils.py b/itemloaders/utils.py
index e764926..361814f 100644
--- a/itemloaders/utils.py
+++ b/itemloaders/utils.py
@@ -7,7 +7,6 @@ from functools import partial
from itemadapter import is_item
-
_ITERABLE_SINGLE_VALUES = str, bytes
@@ -20,7 +19,7 @@ def arg_to_iter(arg):
if arg is None:
return []
elif (
- hasattr(arg, '__iter__')
+ hasattr(arg, "__iter__")
and not isinstance(arg, _ITERABLE_SINGLE_VALUES)
and not is_item(arg)
):
@@ -30,45 +29,30 @@ def arg_to_iter(arg):
def get_func_args(func, stripself=False):
- """Return the argument name list of a callable"""
- if inspect.isfunction(func):
- spec = inspect.getfullargspec(func)
- func_args = spec.args + spec.kwonlyargs
- elif inspect.isclass(func):
- return get_func_args(func.__init__, True)
- elif inspect.ismethod(func):
- return get_func_args(func.__func__, True)
- elif inspect.ismethoddescriptor(func):
- return []
- elif isinstance(func, partial):
- return [x for x in get_func_args(func.func)[len(func.args):]
- if not (func.keywords and x in func.keywords)]
- elif hasattr(func, '__call__'):
- if inspect.isroutine(func):
- return []
- elif getattr(func, '__name__', None) == '__call__':
- return []
- else:
- return get_func_args(func.__call__, True)
+ """Return the argument name list of a callable object"""
+ if not callable(func):
+ raise TypeError(f"func must be callable, got {type(func).__name__!r}")
+
+ args = []
+ try:
+ sig = inspect.signature(func)
+ except ValueError:
+ return args
+
+ if isinstance(func, partial):
+ partial_args = func.args
+ partial_kw = func.keywords
+
+ for name, param in sig.parameters.items():
+ if param.name in partial_args:
+ continue
+ if partial_kw and param.name in partial_kw:
+ continue
+ args.append(name)
else:
- raise TypeError(f'{type(func)} is not callable')
- if stripself:
- func_args.pop(0)
- return func_args
-
-
-def _getargspec_py23(func):
- """_getargspec_py23(function) -> named tuple ArgSpec(args, varargs, keywords,
- defaults)
+ for name in sig.parameters.keys():
+ args.append(name)
- Was identical to inspect.getargspec() in python2, but uses
- inspect.getfullargspec() for python3 behind the scenes to avoid
- DeprecationWarning.
-
- >>> def f(a, b=2, *ar, **kw):
- ... pass
-
- >>> _getargspec_py23(f)
- ArgSpec(args=['a', 'b'], varargs='ar', keywords='kw', defaults=(2,))
- """
- return inspect.ArgSpec(*inspect.getfullargspec(func)[:4])
+ if stripself and args and args[0] == "self":
+ args = args[1:]
+ return args
diff --git a/requirements-dev.txt b/requirements-dev.txt
deleted file mode 100644
index c17e1cf..0000000
--- a/requirements-dev.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-w3lib>=1.21.0
-parsel>=1.5.2
-jmespath>=0.9.5
-itemadapter>=0.1.0
-
-pytest==5.4.1
-flake8==3.7.9
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index ab1cdc8..6e8d795 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,4 +2,7 @@
ignore = E266, E501, W503
max-line-length = 100
select = B,C,E,F,W,T4,B9
-exclude = .git,__pycache__,.venv
\ No newline at end of file
+exclude = .git,__pycache__,.venv
+
+[isort]
+profile = black
diff --git a/setup.py b/setup.py
index 0e6a004..55361a7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,48 +1,48 @@
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
-with open('README.rst') as f:
+with open("README.rst") as f:
long_description = f.read()
setup(
- name='itemloaders',
- version='1.0.6',
- url='https://github.com/scrapy/itemloaders',
+ name="itemloaders",
+ version="1.1.0",
+ url="https://github.com/scrapy/itemloaders",
project_urls={
- 'Documentation': 'https://itemloaders.readthedocs.io/',
- 'Source': 'https://github.com/scrapy/itemloaders',
+ "Documentation": "https://itemloaders.readthedocs.io/",
+ "Source": "https://github.com/scrapy/itemloaders",
},
description="Base library for scrapy's ItemLoader",
long_description=long_description,
long_description_content_type="text/x-rst",
- author='Zyte',
- author_email='opensource@zyte.com',
- license='BSD',
- packages=find_packages(exclude=('tests', 'tests.*')),
+ author="Zyte",
+ author_email="opensource@zyte.com",
+ license="BSD",
+ packages=find_packages(exclude=("tests", "tests.*")),
include_package_data=True,
zip_safe=False,
classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: BSD License',
- 'Operating System :: OS Independent',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3.10',
- 'Programming Language :: Python :: Implementation :: CPython',
- 'Programming Language :: Python :: Implementation :: PyPy',
+ "Development Status :: 5 - Production/Stable",
+ "Intended Audience :: Developers",
+ "License :: OSI Approved :: BSD License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
],
- python_requires='>=3.6',
+ python_requires=">=3.7",
install_requires=[
# before updating these versions, be sure they are not higher than
# scrapy's requirements
- 'w3lib>=1.17.0',
- 'parsel>=1.5.0',
- 'jmespath>=0.9.5',
- 'itemadapter>=0.1.0',
+ "w3lib>=1.17.0",
+ "parsel>=1.5.0",
+ "jmespath>=0.9.5",
+ "itemadapter>=0.1.0",
],
# extras_require=extras_require,
)
diff --git a/tests/test_base_loader.py b/tests/test_base_loader.py
index b4ed396..c0bf007 100644
--- a/tests/test_base_loader.py
+++ b/tests/test_base_loader.py
@@ -1,5 +1,5 @@
-from functools import partial
import unittest
+from functools import partial
from itemloaders import ItemLoader
from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst
@@ -15,27 +15,26 @@ class DefaultedItemLoader(ItemLoader):
# test processors
def processor_with_args(value, other=None, loader_context=None):
- if 'key' in loader_context:
- return loader_context['key']
+ if "key" in loader_context:
+ return loader_context["key"]
return value
class BasicItemLoaderTest(unittest.TestCase):
-
def test_load_item_using_default_loader(self):
- i = dict(summary='lala')
+ i = {"summary": "lala"}
il = ItemLoader(item=i)
- il.add_value('name', 'marta')
+ il.add_value("name", "marta")
item = il.load_item()
assert item is i
- assert item['summary'] == ['lala']
- assert item['name'] == ['marta']
+ assert item["summary"] == ["lala"]
+ assert item["name"] == ["marta"]
def test_load_item_using_custom_loader(self):
il = CustomItemLoader()
- il.add_value('name', 'marta')
+ il.add_value("name", "marta")
item = il.load_item()
- assert item['name'] == ['Marta']
+ assert item["name"] == ["Marta"]
def test_load_item_ignore_none_field_values(self):
def validate_sku(value):
@@ -48,227 +47,239 @@ class BasicItemLoaderTest(unittest.TestCase):
price_out = Compose(TakeFirst(), float)
sku_out = Compose(TakeFirst(), validate_sku)
- valid_fragment = 'SKU: 1234'
- invalid_fragment = 'SKU: not available'
- sku_re = 'SKU: (.+)'
+ valid_fragment = "SKU: 1234"
+ invalid_fragment = "SKU: not available"
+ sku_re = "SKU: (.+)"
il = MyLoader(item={})
# Should not return "sku: None".
- il.add_value('sku', [invalid_fragment], re=sku_re)
+ il.add_value("sku", [invalid_fragment], re=sku_re)
# Should not ignore empty values.
- il.add_value('name', '')
- il.add_value('price', ['0'])
- assert il.load_item() == {'name': '', 'price': 0.0}
+ il.add_value("name", "")
+ il.add_value("price", ["0"])
+ assert il.load_item() == {"name": "", "price": 0.0}
- il.replace_value('sku', [valid_fragment], re=sku_re)
- self.assertEqual(il.load_item()['sku'], '1234')
+ il.replace_value("sku", [valid_fragment], re=sku_re)
+ self.assertEqual(il.load_item()["sku"], "1234")
def test_self_referencing_loader(self):
class MyLoader(ItemLoader):
url_out = TakeFirst()
def img_url_out(self, values):
- return (self.get_output_value('url') or '') + values[0]
+ return (self.get_output_value("url") or "") + values[0]
il = MyLoader(item={})
- il.add_value('url', 'http://example.com/')
- il.add_value('img_url', '1234.png')
+ il.add_value("url", "http://example.com/")
+ il.add_value("img_url", "1234.png")
assert il.load_item() == {
- 'url': 'http://example.com/',
- 'img_url': 'http://example.com/1234.png',
+ "url": "http://example.com/",
+ "img_url": "http://example.com/1234.png",
}
il = MyLoader(item={})
- il.add_value('img_url', '1234.png')
- assert il.load_item() == {'img_url': '1234.png'}
+ il.add_value("img_url", "1234.png")
+ assert il.load_item() == {"img_url": "1234.png"}
def test_add_value(self):
il = CustomItemLoader()
- il.add_value('name', 'marta')
- assert il.get_collected_values('name') == ['Marta']
- assert il.get_output_value('name') == ['Marta']
+ il.add_value("name", "marta")
+ assert il.get_collected_values("name") == ["Marta"]
+ assert il.get_output_value("name") == ["Marta"]
- il.add_value('name', 'pepe')
- assert il.get_collected_values('name') == ['Marta', 'Pepe']
- assert il.get_output_value('name') == ['Marta', 'Pepe']
+ il.add_value("name", "pepe")
+ assert il.get_collected_values("name") == ["Marta", "Pepe"]
+ assert il.get_output_value("name") == ["Marta", "Pepe"]
# test add object value
- il.add_value('summary', {'key': 1})
- assert il.get_collected_values('summary') == [{'key': 1}]
+ il.add_value("summary", {"key": 1})
+ assert il.get_collected_values("summary") == [{"key": 1}]
- il.add_value(None, 'Jim', lambda x: {'name': x})
- assert il.get_collected_values('name') == ['Marta', 'Pepe', 'Jim']
+ il.add_value(None, "Jim", lambda x: {"name": x})
+ assert il.get_collected_values("name") == ["Marta", "Pepe", "Jim"]
def test_add_zero(self):
il = ItemLoader()
- il.add_value('name', 0)
- assert il.get_collected_values('name') == [0]
+ il.add_value("name", 0)
+ assert il.get_collected_values("name") == [0]
def test_add_none(self):
il = ItemLoader()
- il.add_value('name', None)
- assert il.get_collected_values('name') == []
+ il.add_value("name", None)
+ assert il.get_collected_values("name") == []
def test_replace_value(self):
il = CustomItemLoader()
- il.replace_value('name', 'marta')
- self.assertEqual(il.get_collected_values('name'), ['Marta'])
- self.assertEqual(il.get_output_value('name'), ['Marta'])
- il.replace_value('name', 'pepe')
- self.assertEqual(il.get_collected_values('name'), ['Pepe'])
- self.assertEqual(il.get_output_value('name'), ['Pepe'])
+ il.replace_value("name", "marta")
+ self.assertEqual(il.get_collected_values("name"), ["Marta"])
+ self.assertEqual(il.get_output_value("name"), ["Marta"])
+ il.replace_value("name", "pepe")
+ self.assertEqual(il.get_collected_values("name"), ["Pepe"])
+ self.assertEqual(il.get_output_value("name"), ["Pepe"])
- il.replace_value(None, 'Jim', lambda x: {'name': x})
- self.assertEqual(il.get_collected_values('name'), ['Jim'])
+ il.replace_value(None, "Jim", lambda x: {"name": x})
+ self.assertEqual(il.get_collected_values("name"), ["Jim"])
def test_replace_value_none(self):
il = CustomItemLoader()
- il.replace_value('name', None)
- self.assertEqual(il.get_collected_values('name'), [])
- il.replace_value('name', 'marta')
- self.assertEqual(il.get_collected_values('name'), ['Marta'])
- il.replace_value('name', None) # when replacing with `None` nothing should happen
- self.assertEqual(il.get_collected_values('name'), ['Marta'])
+ il.replace_value("name", None)
+ self.assertEqual(il.get_collected_values("name"), [])
+ il.replace_value("name", "marta")
+ self.assertEqual(il.get_collected_values("name"), ["Marta"])
+ il.replace_value(
+ "name", None
+ ) # when replacing with `None` nothing should happen
+ self.assertEqual(il.get_collected_values("name"), ["Marta"])
def test_get_value(self):
il = ItemLoader()
- self.assertEqual('FOO', il.get_value(['foo', 'bar'], TakeFirst(), str.upper))
- self.assertEqual(['foo', 'bar'], il.get_value(['name:foo', 'name:bar'], re='name:(.*)$'))
- self.assertEqual('foo', il.get_value(['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$'))
- self.assertEqual(None, il.get_value(['foo', 'bar'], TakeFirst(), re='name:(.*)$'))
+ self.assertEqual("FOO", il.get_value(["foo", "bar"], TakeFirst(), str.upper))
+ self.assertEqual(
+ ["foo", "bar"], il.get_value(["name:foo", "name:bar"], re="name:(.*)$")
+ )
+ self.assertEqual(
+ "foo", il.get_value(["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$")
+ )
+ self.assertEqual(
+ None, il.get_value(["foo", "bar"], TakeFirst(), re="name:(.*)$")
+ )
self.assertEqual(None, il.get_value(None, TakeFirst()))
- il.add_value('name', ['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$')
- self.assertEqual(['foo'], il.get_collected_values('name'))
- il.replace_value('name', 'name:bar', re='name:(.*)$')
- self.assertEqual(['bar'], il.get_collected_values('name'))
+ il.add_value("name", ["name:foo", "name:bar"], TakeFirst(), re="name:(.*)$")
+ self.assertEqual(["foo"], il.get_collected_values("name"))
+ il.replace_value("name", "name:bar", re="name:(.*)$")
+ self.assertEqual(["bar"], il.get_collected_values("name"))
def test_iter_on_input_processor_input(self):
class NameFirstItemLoader(ItemLoader):
name_in = TakeFirst()
il = NameFirstItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_collected_values('name'), ['marta'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_collected_values("name"), ["marta"])
il = NameFirstItemLoader()
- il.add_value('name', ['marta', 'jose'])
- self.assertEqual(il.get_collected_values('name'), ['marta'])
+ il.add_value("name", ["marta", "jose"])
+ self.assertEqual(il.get_collected_values("name"), ["marta"])
il = NameFirstItemLoader()
- il.replace_value('name', 'marta')
- self.assertEqual(il.get_collected_values('name'), ['marta'])
+ il.replace_value("name", "marta")
+ self.assertEqual(il.get_collected_values("name"), ["marta"])
il = NameFirstItemLoader()
- il.replace_value('name', ['marta', 'jose'])
- self.assertEqual(il.get_collected_values('name'), ['marta'])
+ il.replace_value("name", ["marta", "jose"])
+ self.assertEqual(il.get_collected_values("name"), ["marta"])
il = NameFirstItemLoader()
- il.add_value('name', 'marta')
- il.add_value('name', ['jose', 'pedro'])
- self.assertEqual(il.get_collected_values('name'), ['marta', 'jose'])
+ il.add_value("name", "marta")
+ il.add_value("name", ["jose", "pedro"])
+ self.assertEqual(il.get_collected_values("name"), ["marta", "jose"])
def test_map_compose_filter(self):
def filter_world(x):
- return None if x == 'world' else x
+ return None if x == "world" else x
proc = MapCompose(filter_world, str.upper)
- self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']),
- ['HELLO', 'THIS', 'IS', 'SCRAPY'])
+ self.assertEqual(
+ proc(["hello", "world", "this", "is", "scrapy"]),
+ ["HELLO", "THIS", "IS", "SCRAPY"],
+ )
def test_map_compose_filter_multil(self):
class CustomItemLoader(ItemLoader):
name_in = MapCompose(lambda v: v.title(), lambda v: v[:-1])
il = CustomItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['Mart'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["Mart"])
item = il.load_item()
- self.assertEqual(item['name'], ['Mart'])
+ self.assertEqual(item["name"], ["Mart"])
def test_default_input_processor(self):
il = DefaultedItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['mart'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["mart"])
def test_inherited_default_input_processor(self):
class InheritDefaultedItemLoader(DefaultedItemLoader):
pass
il = InheritDefaultedItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['mart'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["mart"])
def test_input_processor_inheritance(self):
class ChildItemLoader(CustomItemLoader):
url_in = MapCompose(lambda v: v.lower())
il = ChildItemLoader()
- il.add_value('url', 'HTTP://scrapy.ORG')
- self.assertEqual(il.get_output_value('url'), ['http://scrapy.org'])
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['Marta'])
+ il.add_value("url", "HTTP://scrapy.ORG")
+ self.assertEqual(il.get_output_value("url"), ["http://scrapy.org"])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["Marta"])
class ChildChildItemLoader(ChildItemLoader):
url_in = MapCompose(lambda v: v.upper())
summary_in = MapCompose(lambda v: v)
il = ChildChildItemLoader()
- il.add_value('url', 'http://scrapy.org')
- self.assertEqual(il.get_output_value('url'), ['HTTP://SCRAPY.ORG'])
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['Marta'])
+ il.add_value("url", "http://scrapy.org")
+ self.assertEqual(il.get_output_value("url"), ["HTTP://SCRAPY.ORG"])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["Marta"])
def test_empty_map_compose(self):
class IdentityDefaultedItemLoader(DefaultedItemLoader):
name_in = MapCompose()
il = IdentityDefaultedItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['marta'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["marta"])
def test_identity_input_processor(self):
class IdentityDefaultedItemLoader(DefaultedItemLoader):
name_in = Identity()
il = IdentityDefaultedItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['marta'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["marta"])
def test_extend_custom_input_processors(self):
class ChildItemLoader(CustomItemLoader):
name_in = MapCompose(CustomItemLoader.name_in, str.swapcase)
il = ChildItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['mARTA'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["mARTA"])
def test_extend_default_input_processors(self):
class ChildDefaultedItemLoader(DefaultedItemLoader):
- name_in = MapCompose(DefaultedItemLoader.default_input_processor, str.swapcase)
+ name_in = MapCompose(
+ DefaultedItemLoader.default_input_processor, str.swapcase
+ )
il = ChildDefaultedItemLoader()
- il.add_value('name', 'marta')
- self.assertEqual(il.get_output_value('name'), ['MART'])
+ il.add_value("name", "marta")
+ self.assertEqual(il.get_output_value("name"), ["MART"])
def test_output_processor_using_function(self):
il = CustomItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])
class TakeFirstItemLoader(CustomItemLoader):
- name_out = u" ".join
+ name_out = " ".join
il = TakeFirstItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), 'Mar Ta')
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), "Mar Ta")
def test_output_processor_error(self):
class CustomItemLoader(ItemLoader):
name_out = MapCompose(float)
il = CustomItemLoader()
- il.add_value('name', ['$10'])
+ il.add_value("name", ["$10"])
try:
- float('$10')
+ float("$10")
except Exception as e:
expected_exc_str = str(e)
@@ -279,86 +290,86 @@ class BasicItemLoaderTest(unittest.TestCase):
exc = e
assert isinstance(exc, ValueError)
s = str(exc)
- assert 'name' in s, s
- assert '$10' in s, s
- assert 'ValueError' in s, s
+ assert "name" in s, s
+ assert "$10" in s, s
+ assert "ValueError" in s, s
assert expected_exc_str in s, s
def test_output_processor_using_classes(self):
il = CustomItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])
class TakeFirstItemLoader(CustomItemLoader):
name_out = Join()
il = TakeFirstItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), 'Mar Ta')
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), "Mar Ta")
class TakeFirstItemLoader(CustomItemLoader):
name_out = Join("<br>")
il = TakeFirstItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), 'Mar<br>Ta')
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), "Mar<br>Ta")
def test_default_output_processor(self):
il = CustomItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])
class LalaItemLoader(CustomItemLoader):
default_output_processor = Identity()
il = LalaItemLoader()
- il.add_value('name', ['mar', 'ta'])
- self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+ il.add_value("name", ["mar", "ta"])
+ self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])
def test_loader_context_on_declaration(self):
class ChildItemLoader(CustomItemLoader):
- url_in = MapCompose(processor_with_args, key='val')
+ url_in = MapCompose(processor_with_args, key="val")
il = ChildItemLoader()
- il.add_value('url', 'text')
- self.assertEqual(il.get_output_value('url'), ['val'])
- il.replace_value('url', 'text2')
- self.assertEqual(il.get_output_value('url'), ['val'])
+ il.add_value("url", "text")
+ self.assertEqual(il.get_output_value("url"), ["val"])
+ il.replace_value("url", "text2")
+ self.assertEqual(il.get_output_value("url"), ["val"])
def test_loader_context_on_instantiation(self):
class ChildItemLoader(CustomItemLoader):
url_in = MapCompose(processor_with_args)
- il = ChildItemLoader(key='val')
- il.add_value('url', 'text')
- self.assertEqual(il.get_output_value('url'), ['val'])
- il.replace_value('url', 'text2')
- self.assertEqual(il.get_output_value('url'), ['val'])
+ il = ChildItemLoader(key="val")
+ il.add_value("url", "text")
+ self.assertEqual(il.get_output_value("url"), ["val"])
+ il.replace_value("url", "text2")
+ self.assertEqual(il.get_output_value("url"), ["val"])
def test_loader_context_on_assign(self):
class ChildItemLoader(CustomItemLoader):
url_in = MapCompose(processor_with_args)
il = ChildItemLoader()
- il.context['key'] = 'val'
- il.add_value('url', 'text')
- self.assertEqual(il.get_output_value('url'), ['val'])
- il.replace_value('url', 'text2')
- self.assertEqual(il.get_output_value('url'), ['val'])
+ il.context["key"] = "val"
+ il.add_value("url", "text")
+ self.assertEqual(il.get_output_value("url"), ["val"])
+ il.replace_value("url", "text2")
+ self.assertEqual(il.get_output_value("url"), ["val"])
def test_item_passed_to_input_processor_functions(self):
def processor(value, loader_context):
- return loader_context['item']['name']
+ return loader_context["item"]["name"]
class ChildItemLoader(CustomItemLoader):
url_in = MapCompose(processor)
- it = dict(name='marta')
+ it = {"name": "marta"}
il = ChildItemLoader(item=it)
- il.add_value('url', 'text')
- self.assertEqual(il.get_output_value('url'), ['marta'])
- il.replace_value('url', 'text2')
- self.assertEqual(il.get_output_value('url'), ['marta'])
+ il.add_value("url", "text")
+ self.assertEqual(il.get_output_value("url"), ["marta"])
+ il.replace_value("url", "text2")
+ self.assertEqual(il.get_output_value("url"), ["marta"])
# def test_add_value_on_unknown_field(self):
# il = CustomItemLoader()
@@ -369,60 +380,60 @@ class BasicItemLoaderTest(unittest.TestCase):
name_out = Compose(lambda v: v[0], lambda v: v.title(), lambda v: v[:-1])
il = CustomItemLoader()
- il.add_value('name', ['marta', 'other'])
- self.assertEqual(il.get_output_value('name'), 'Mart')
+ il.add_value("name", ["marta", "other"])
+ self.assertEqual(il.get_output_value("name"), "Mart")
item = il.load_item()
- self.assertEqual(item['name'], 'Mart')
+ self.assertEqual(item["name"], "Mart")
def test_partial_processor(self):
def join(values, sep=None, loader_context=None, ignored=None):
if sep is not None:
return sep.join(values)
- elif loader_context and 'sep' in loader_context:
- return loader_context['sep'].join(values)
+ elif loader_context and "sep" in loader_context:
+ return loader_context["sep"].join(values)
else:
- return ''.join(values)
+ return "".join(values)
class CustomItemLoader(ItemLoader):
- name_out = Compose(partial(join, sep='+'))
- url_out = Compose(partial(join, loader_context={'sep': '.'}))
- summary_out = Compose(partial(join, ignored='foo'))
+ name_out = Compose(partial(join, sep="+"))
+ url_out = Compose(partial(join, loader_context={"sep": "."}))
+ summary_out = Compose(partial(join, ignored="foo"))
il = CustomItemLoader()
- il.add_value('name', ['rabbit', 'hole'])
- il.add_value('url', ['rabbit', 'hole'])
- il.add_value('summary', ['rabbit', 'hole'])
+ il.add_value("name", ["rabbit", "hole"])
+ il.add_value("url", ["rabbit", "hole"])
+ il.add_value("summary", ["rabbit", "hole"])
item = il.load_item()
- self.assertEqual(item['name'], 'rabbit+hole')
- self.assertEqual(item['url'], 'rabbit.hole')
- self.assertEqual(item['summary'], 'rabbithole')
+ self.assertEqual(item["name"], "rabbit+hole")
+ self.assertEqual(item["url"], "rabbit.hole")
+ self.assertEqual(item["summary"], "rabbithole")
def test_error_input_processor(self):
class CustomItemLoader(ItemLoader):
name_in = MapCompose(float)
il = CustomItemLoader()
- self.assertRaises(ValueError, il.add_value, 'name',
- ['marta', 'other'])
+ self.assertRaises(ValueError, il.add_value, "name", ["marta", "other"])
def test_error_output_processor(self):
class CustomItemLoader(ItemLoader):
name_out = Compose(Join(), float)
il = CustomItemLoader()
- il.add_value('name', 'marta')
+ il.add_value("name", "marta")
with self.assertRaises(ValueError):
il.load_item()
def test_error_processor_as_argument(self):
il = CustomItemLoader()
- self.assertRaises(ValueError, il.add_value, 'name',
- ['marta', 'other'], Compose(float))
+ self.assertRaises(
+ ValueError, il.add_value, "name", ["marta", "other"], Compose(float)
+ )
def test_get_unset_value(self):
loader = ItemLoader()
self.assertEqual(loader.load_item(), {})
- self.assertEqual(loader.get_output_value('foo'), [])
+ self.assertEqual(loader.get_output_value("foo"), [])
self.assertEqual(loader.load_item(), {})
@@ -439,28 +450,37 @@ class NoInputReprocessingFromDictTest(unittest.TestCase):
"""
Loaders initialized from loaded items must not reprocess fields (dict instances)
"""
+
def test_avoid_reprocessing_with_initial_values_single(self):
- il = NoInputReprocessingDictLoader(item=dict(title='foo'))
+ il = NoInputReprocessingDictLoader(item={"title": "foo"})
il_loaded = il.load_item()
- self.assertEqual(il_loaded, dict(title='foo'))
- self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo'))
+ self.assertEqual(il_loaded, {"title": "foo"})
+ self.assertEqual(
+ NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"}
+ )
def test_avoid_reprocessing_with_initial_values_list(self):
- il = NoInputReprocessingDictLoader(item=dict(title=['foo', 'bar']))
+ il = NoInputReprocessingDictLoader(item={"title": ["foo", "bar"]})
il_loaded = il.load_item()
- self.assertEqual(il_loaded, dict(title='foo'))
- self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo'))
+ self.assertEqual(il_loaded, {"title": "foo"})
+ self.assertEqual(
+ NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"}
+ )
def test_avoid_reprocessing_without_initial_values_single(self):
il = NoInputReprocessingDictLoader()
- il.add_value('title', 'foo')
+ il.add_value("title", "foo")
il_loaded = il.load_item()
- self.assertEqual(il_loaded, dict(title='FOO'))
- self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO'))
+ self.assertEqual(il_loaded, {"title": "FOO"})
+ self.assertEqual(
+ NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"}
+ )
def test_avoid_reprocessing_without_initial_values_list(self):
il = NoInputReprocessingDictLoader()
- il.add_value('title', ['foo', 'bar'])
+ il.add_value("title", ["foo", "bar"])
il_loaded = il.load_item()
- self.assertEqual(il_loaded, dict(title='FOO'))
- self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO'))
+ self.assertEqual(il_loaded, {"title": "FOO"})
+ self.assertEqual(
+ NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"}
+ )
diff --git a/tests/test_loader_initialization.py b/tests/test_loader_initialization.py
index 7e8d51a..0f63253 100644
--- a/tests/test_loader_initialization.py
+++ b/tests/test_loader_initialization.py
@@ -4,90 +4,89 @@ from itemloaders import ItemLoader
class InitializationTestMixin:
-
item_class = None
def test_keep_single_value(self):
"""Loaded item should contain values from the initial item"""
- input_item = self.item_class(name='foo')
+ input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo"]})
def test_keep_list(self):
"""Loaded item should contain values from the initial item"""
- input_item = self.item_class(name=['foo', 'bar'])
+ input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})
def test_add_value_singlevalue_singlevalue(self):
"""Values added after initialization should be appended"""
- input_item = self.item_class(name='foo')
+ input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
- il.add_value('name', 'bar')
+ il.add_value("name", "bar")
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})
def test_add_value_singlevalue_list(self):
"""Values added after initialization should be appended"""
- input_item = self.item_class(name='foo')
+ input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
- il.add_value('name', ['item', 'loader'])
+ il.add_value("name", ["item", "loader"])
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo', 'item', 'loader']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]})
def test_add_value_list_singlevalue(self):
"""Values added after initialization should be appended"""
- input_item = self.item_class(name=['foo', 'bar'])
+ input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
- il.add_value('name', 'qwerty')
+ il.add_value("name", "qwerty")
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'qwerty']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]})
def test_add_value_list_list(self):
"""Values added after initialization should be appended"""
- input_item = self.item_class(name=['foo', 'bar'])
+ input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
- il.add_value('name', ['item', 'loader'])
+ il.add_value("name", ["item", "loader"])
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'item', 'loader']})
+ self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]})
def test_get_output_value_singlevalue(self):
"""Getting output value must not remove value from item"""
- input_item = self.item_class(name='foo')
+ input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
- self.assertEqual(il.get_output_value('name'), ['foo'])
+ self.assertEqual(il.get_output_value("name"), ["foo"])
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(loaded_item, dict({'name': ['foo']}))
+ self.assertEqual(loaded_item, {"name": ["foo"]})
def test_get_output_value_list(self):
"""Getting output value must not remove value from item"""
- input_item = self.item_class(name=['foo', 'bar'])
+ input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
- self.assertEqual(il.get_output_value('name'), ['foo', 'bar'])
+ self.assertEqual(il.get_output_value("name"), ["foo", "bar"])
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
- self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']}))
+ self.assertEqual(loaded_item, {"name": ["foo", "bar"]})
def test_values_single(self):
"""Values from initial item must be added to loader._values"""
- input_item = self.item_class(name='foo')
+ input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
- self.assertEqual(il._values.get('name'), ['foo'])
+ self.assertEqual(il._values.get("name"), ["foo"])
def test_values_list(self):
"""Values from initial item must be added to loader._values"""
- input_item = self.item_class(name=['foo', 'bar'])
+ input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
- self.assertEqual(il._values.get('name'), ['foo', 'bar'])
+ self.assertEqual(il._values.get("name"), ["foo", "bar"])
class InitializationFromDictTest(InitializationTestMixin, unittest.TestCase):
diff --git a/tests/test_nested_items.py b/tests/test_nested_items.py
index 0bdfbf2..444431a 100644
--- a/tests/test_nested_items.py
+++ b/tests/test_nested_items.py
@@ -8,8 +8,8 @@ class NestedItemTest(unittest.TestCase):
def _test_item(self, item):
il = ItemLoader()
- il.add_value('item_list', item)
- self.assertEqual(il.load_item(), {'item_list': [item]})
+ il.add_value("item_list", item)
+ self.assertEqual(il.load_item(), {"item_list": [item]})
def test_attrs(self):
try:
@@ -21,7 +21,7 @@ class NestedItemTest(unittest.TestCase):
class TestItem:
foo = attr.ib()
- self._test_item(TestItem(foo='bar'))
+ self._test_item(TestItem(foo="bar"))
def test_dataclass(self):
try:
@@ -33,10 +33,10 @@ class NestedItemTest(unittest.TestCase):
class TestItem:
foo: str
- self._test_item(TestItem(foo='bar'))
+ self._test_item(TestItem(foo="bar"))
def test_dict(self):
- self._test_item({'foo': 'bar'})
+ self._test_item({"foo": "bar"})
def test_scrapy_item(self):
try:
@@ -47,4 +47,4 @@ class NestedItemTest(unittest.TestCase):
class TestItem(Item):
foo = Field()
- self._test_item(TestItem(foo='bar'))
+ self._test_item(TestItem(foo="bar"))
diff --git a/tests/test_nested_loader.py b/tests/test_nested_loader.py
index 1e193d3..58b9bec 100644
--- a/tests/test_nested_loader.py
+++ b/tests/test_nested_loader.py
@@ -6,7 +6,8 @@ from itemloaders import ItemLoader
class SubselectorLoaderTest(unittest.TestCase):
- selector = Selector(text="""
+ selector = Selector(
+ text="""
<html>
<body>
<header>
@@ -19,75 +20,91 @@ class SubselectorLoaderTest(unittest.TestCase):
</footer>
</body>
</html>
- """)
+ """
+ )
def test_nested_xpath(self):
loader = ItemLoader(selector=self.selector)
nl = loader.nested_xpath("//header")
- nl.add_xpath('name', 'div/text()')
- nl.add_css('name_div', '#id')
- nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall())
-
- self.assertEqual(loader.get_output_value('name'), ['marta'])
- self.assertEqual(loader.get_output_value('name_div'), ['<div id="id">marta</div>'])
- self.assertEqual(loader.get_output_value('name_value'), ['marta'])
-
- self.assertEqual(loader.get_output_value('name'), nl.get_output_value('name'))
- self.assertEqual(loader.get_output_value('name_div'), nl.get_output_value('name_div'))
- self.assertEqual(loader.get_output_value('name_value'), nl.get_output_value('name_value'))
+ nl.add_xpath("name", "div/text()")
+ nl.add_css("name_div", "#id")
+ nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())
+
+ self.assertEqual(loader.get_output_value("name"), ["marta"])
+ self.assertEqual(
+ loader.get_output_value("name_div"), ['<div id="id">marta</div>']
+ )
+ self.assertEqual(loader.get_output_value("name_value"), ["marta"])
+
+ self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name"))
+ self.assertEqual(
+ loader.get_output_value("name_div"), nl.get_output_value("name_div")
+ )
+ self.assertEqual(
+ loader.get_output_value("name_value"), nl.get_output_value("name_value")
+ )
def test_nested_css(self):
loader = ItemLoader(selector=self.selector)
nl = loader.nested_css("header")
- nl.add_xpath('name', 'div/text()')
- nl.add_css('name_div', '#id')
- nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall())
-
- self.assertEqual(loader.get_output_value('name'), ['marta'])
- self.assertEqual(loader.get_output_value('name_div'), ['<div id="id">marta</div>'])
- self.assertEqual(loader.get_output_value('name_value'), ['marta'])
-
- self.assertEqual(loader.get_output_value('name'), nl.get_output_value('name'))
- self.assertEqual(loader.get_output_value('name_div'), nl.get_output_value('name_div'))
- self.assertEqual(loader.get_output_value('name_value'), nl.get_output_value('name_value'))
+ nl.add_xpath("name", "div/text()")
+ nl.add_css("name_div", "#id")
+ nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())
+
+ self.assertEqual(loader.get_output_value("name"), ["marta"])
+ self.assertEqual(
+ loader.get_output_value("name_div"), ['<div id="id">marta</div>']
+ )
+ self.assertEqual(loader.get_output_value("name_value"), ["marta"])
+
+ self.assertEqual(loader.get_output_value("name"), nl.get_output_value("name"))
+ self.assertEqual(
+ loader.get_output_value("name_div"), nl.get_output_value("name_div")
+ )
+ self.assertEqual(
+ loader.get_output_value("name_value"), nl.get_output_value("name_value")
+ )
def test_nested_replace(self):
loader = ItemLoader(selector=self.selector)
- nl1 = loader.nested_xpath('//footer')
- nl2 = nl1.nested_xpath('a')
+ nl1 = loader.nested_xpath("//footer")
+ nl2 = nl1.nested_xpath("a")
- loader.add_xpath('url', '//footer/a/@href')
- self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org'])
- nl1.replace_xpath('url', 'img/@src')
- self.assertEqual(loader.get_output_value('url'), ['/images/logo.png'])
- nl2.replace_xpath('url', '@href')
- self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org'])
+ loader.add_xpath("url", "//footer/a/@href")
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
+ nl1.replace_xpath("url", "img/@src")
+ self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"])
+ nl2.replace_xpath("url", "@href")
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
def test_nested_ordering(self):
loader = ItemLoader(selector=self.selector)
- nl1 = loader.nested_xpath('//footer')
- nl2 = nl1.nested_xpath('a')
-
- nl1.add_xpath('url', 'img/@src')
- loader.add_xpath('url', '//footer/a/@href')
- nl2.add_xpath('url', 'text()')
- loader.add_xpath('url', '//footer/a/@href')
-
- self.assertEqual(loader.get_output_value('url'), [
- '/images/logo.png',
- 'http://www.scrapy.org',
- 'homepage',
- 'http://www.scrapy.org',
- ])
+ nl1 = loader.nested_xpath("//footer")
+ nl2 = nl1.nested_xpath("a")
+
+ nl1.add_xpath("url", "img/@src")
+ loader.add_xpath("url", "//footer/a/@href")
+ nl2.add_xpath("url", "text()")
+ loader.add_xpath("url", "//footer/a/@href")
+
+ self.assertEqual(
+ loader.get_output_value("url"),
+ [
+ "/images/logo.png",
+ "http://www.scrapy.org",
+ "homepage",
+ "http://www.scrapy.org",
+ ],
+ )
def test_nested_load_item(self):
loader = ItemLoader(selector=self.selector)
- nl1 = loader.nested_xpath('//footer')
- nl2 = nl1.nested_xpath('img')
+ nl1 = loader.nested_xpath("//footer")
+ nl2 = nl1.nested_xpath("img")
- loader.add_xpath('name', '//header/div/text()')
- nl1.add_xpath('url', 'a/@href')
- nl2.add_xpath('image', '@src')
+ loader.add_xpath("name", "//header/div/text()")
+ nl1.add_xpath("url", "a/@href")
+ nl2.add_xpath("image", "@src")
item = loader.load_item()
@@ -95,6 +112,6 @@ class SubselectorLoaderTest(unittest.TestCase):
assert item is nl1.item
assert item is nl2.item
- self.assertEqual(item['name'], ['marta'])
- self.assertEqual(item['url'], ['http://www.scrapy.org'])
- self.assertEqual(item['image'], ['/images/logo.png'])
+ self.assertEqual(item["name"], ["marta"])
+ self.assertEqual(item["url"], ["http://www.scrapy.org"])
+ self.assertEqual(item["image"], ["/images/logo.png"])
diff --git a/tests/test_output_processor.py b/tests/test_output_processor.py
index 54bb1fe..f4aa387 100644
--- a/tests/test_output_processor.py
+++ b/tests/test_output_processor.py
@@ -1,16 +1,15 @@
import unittest
from itemloaders import ItemLoader
-from itemloaders.processors import Identity, Compose, TakeFirst
+from itemloaders.processors import Compose, Identity, TakeFirst
class TestOutputProcessorDict(unittest.TestCase):
def test_output_processor(self):
-
class TempDict(dict):
def __init__(self, *args, **kwargs):
super(TempDict, self).__init__(self, *args, **kwargs)
- self.setdefault('temp', 0.3)
+ self.setdefault("temp", 0.3)
class TempLoader(ItemLoader):
default_item_class = TempDict
@@ -20,7 +19,7 @@ class TestOutputProcessorDict(unittest.TestCase):
loader = TempLoader()
item = loader.load_item()
self.assertIsInstance(item, TempDict)
- self.assertEqual(dict(item), {'temp': 0.3})
+ self.assertEqual(dict(item), {"temp": 0.3})
class TestOutputProcessorItem(unittest.TestCase):
@@ -29,9 +28,9 @@ class TestOutputProcessorItem(unittest.TestCase):
default_input_processor = Identity()
default_output_processor = Compose(TakeFirst())
- item = dict()
- item.setdefault('temp', 0.3)
+ item = {}
+ item.setdefault("temp", 0.3)
loader = TempLoader(item=item)
item = loader.load_item()
self.assertIsInstance(item, dict)
- self.assertEqual(dict(item), {'temp': 0.3})
+ self.assertEqual(dict(item), {"temp": 0.3})
diff --git a/tests/test_processors.py b/tests/test_processors.py
index 769597d..55a0c9e 100644
--- a/tests/test_processors.py
+++ b/tests/test_processors.py
@@ -1,47 +1,49 @@
import unittest
-from itemloaders.processors import (Compose, Identity, Join,
- MapCompose, TakeFirst)
+from itemloaders.processors import Compose, Identity, Join, MapCompose, TakeFirst
class ProcessorsTest(unittest.TestCase):
-
def test_take_first(self):
proc = TakeFirst()
- self.assertEqual(proc([None, '', 'hello', 'world']), 'hello')
- self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
+ self.assertEqual(proc([None, "", "hello", "world"]), "hello")
+ self.assertEqual(proc([None, "", 0, "hello", "world"]), 0)
def test_identity(self):
proc = Identity()
- self.assertEqual(proc([None, '', 'hello', 'world']),
- [None, '', 'hello', 'world'])
+ self.assertEqual(
+ proc([None, "", "hello", "world"]), [None, "", "hello", "world"]
+ )
def test_join(self):
proc = Join()
- self.assertRaises(TypeError, proc, [None, '', 'hello', 'world'])
- self.assertEqual(proc(['', 'hello', 'world']), u' hello world')
- self.assertEqual(proc(['hello', 'world']), u'hello world')
- self.assertIsInstance(proc(['hello', 'world']), str)
+ self.assertRaises(TypeError, proc, [None, "", "hello", "world"])
+ self.assertEqual(proc(["", "hello", "world"]), " hello world")
+ self.assertEqual(proc(["hello", "world"]), "hello world")
+ self.assertIsInstance(proc(["hello", "world"]), str)
def test_compose(self):
proc = Compose(lambda v: v[0], str.upper)
- self.assertEqual(proc(['hello', 'world']), 'HELLO')
+ self.assertEqual(proc(["hello", "world"]), "HELLO")
proc = Compose(str.upper)
self.assertEqual(proc(None), None)
proc = Compose(str.upper, stop_on_none=False)
self.assertRaises(ValueError, proc, None)
proc = Compose(str.upper, lambda x: x + 1)
- self.assertRaises(ValueError, proc, 'hello')
+ self.assertRaises(ValueError, proc, "hello")
def test_mapcompose(self):
def filter_world(x):
- return None if x == 'world' else x
+ return None if x == "world" else x
+
proc = MapCompose(filter_world, str.upper)
- self.assertEqual(proc([u'hello', u'world', u'this', u'is', u'scrapy']),
- [u'HELLO', u'THIS', u'IS', u'SCRAPY'])
+ self.assertEqual(
+ proc(["hello", "world", "this", "is", "scrapy"]),
+ ["HELLO", "THIS", "IS", "SCRAPY"],
+ )
proc = MapCompose(filter_world, str.upper)
self.assertEqual(proc(None), [])
proc = MapCompose(filter_world, str.upper)
self.assertRaises(ValueError, proc, [1])
proc = MapCompose(filter_world, lambda x: x + 1)
- self.assertRaises(ValueError, proc, 'hello')
+ self.assertRaises(ValueError, proc, "hello")
diff --git a/tests/test_select_jmes.py b/tests/test_select_jmes.py
index d3c8cc7..1754863 100644
--- a/tests/test_select_jmes.py
+++ b/tests/test_select_jmes.py
@@ -5,24 +5,22 @@ from itemloaders.processors import SelectJmes
class SelectJmesTestCase(unittest.TestCase):
test_list_equals = {
- 'simple': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"),
- 'invalid': ('foo.bar.baz', {"foo": {"bar": "baz"}}, None),
- 'top_level': ('foo', {"foo": {"bar": "baz"}}, {"bar": "baz"}),
- 'double_vs_single_quote_string': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"),
- 'dict': (
- 'foo.bar[*].name',
+ "simple": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"),
+ "invalid": ("foo.bar.baz", {"foo": {"bar": "baz"}}, None),
+ "top_level": ("foo", {"foo": {"bar": "baz"}}, {"bar": "baz"}),
+ "double_vs_single_quote_string": ("foo.bar", {"foo": {"bar": "baz"}}, "baz"),
+ "dict": (
+ "foo.bar[*].name",
{"foo": {"bar": [{"name": "one"}, {"name": "two"}]}},
- ['one', 'two']
+ ["one", "two"],
),
- 'list': ('[1]', [1, 2], 2)
+ "list": ("[1]", [1, 2], 2),
}
def test_output(self):
- for l in self.test_list_equals:
- expr, test_list, expected = self.test_list_equals[l]
+ for key in self.test_list_equals:
+ expr, test_list, expected = self.test_list_equals[key]
test = SelectJmes(expr)(test_list)
self.assertEqual(
- test,
- expected,
- msg='test "{}" got {} expected {}'.format(l, test, expected)
+ test, expected, msg=f"test {key!r} got {test} expected {expected}"
)
diff --git a/tests/test_selector_loader.py b/tests/test_selector_loader.py
index 170b56f..484c239 100644
--- a/tests/test_selector_loader.py
+++ b/tests/test_selector_loader.py
@@ -1,5 +1,6 @@
import re
import unittest
+from unittest.mock import MagicMock
from parsel import Selector
@@ -12,7 +13,8 @@ class CustomItemLoader(ItemLoader):
class SelectortemLoaderTest(unittest.TestCase):
- selector = Selector(text="""
+ selector = Selector(
+ text="""
<html>
<body>
<div id="id">marta</div>
@@ -21,7 +23,22 @@ class SelectortemLoaderTest(unittest.TestCase):
<img src="/images/logo.png" width="244" height="65" alt="Scrapy">
</body>
</html>
- """)
+ """
+ )
+
+ jmes_selector = Selector(
+ text="""
+ {
+ "name": "marta",
+ "description": "paragraph",
+ "website": {
+ "url": "http://www.scrapy.org",
+ "name": "homepage"
+ },
+ "logo": "/images/logo.png"
+ }
+ """
+ )
def test_init_method(self):
loader = CustomItemLoader()
@@ -29,146 +46,230 @@ class SelectortemLoaderTest(unittest.TestCase):
def test_init_method_errors(self):
loader = CustomItemLoader()
- self.assertRaises(RuntimeError, loader.add_xpath, 'url', '//a/@href')
- self.assertRaises(RuntimeError, loader.replace_xpath, 'url', '//a/@href')
- self.assertRaises(RuntimeError, loader.get_xpath, '//a/@href')
- self.assertRaises(RuntimeError, loader.add_css, 'name', '#name::text')
- self.assertRaises(RuntimeError, loader.replace_css, 'name', '#name::text')
- self.assertRaises(RuntimeError, loader.get_css, '#name::text')
+ self.assertRaises(RuntimeError, loader.add_xpath, "url", "//a/@href")
+ self.assertRaises(RuntimeError, loader.replace_xpath, "url", "//a/@href")
+ self.assertRaises(RuntimeError, loader.get_xpath, "//a/@href")
+ self.assertRaises(RuntimeError, loader.add_css, "name", "#name::text")
+ self.assertRaises(RuntimeError, loader.replace_css, "name", "#name::text")
+ self.assertRaises(RuntimeError, loader.get_css, "#name::text")
def test_init_method_with_selector(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_xpath('name', '//div/text()')
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
+ loader.add_xpath("name", "//div/text()")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
def test_init_method_with_selector_css(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_css('name', 'div::text')
- self.assertEqual(loader.get_output_value('name'), [u'Marta'])
+ loader.add_css("name", "div::text")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
- loader.add_css('url', 'a::attr(href)')
- self.assertEqual(loader.get_output_value('url'), [u'http://www.scrapy.org'])
+ loader.add_css("url", "a::attr(href)")
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
# combining/accumulating CSS selectors and XPath expressions
- loader.add_xpath('name', '//div/text()')
- self.assertEqual(loader.get_output_value('name'), [u'Marta', u'Marta'])
+ loader.add_xpath("name", "//div/text()")
+ self.assertEqual(loader.get_output_value("name"), ["Marta", "Marta"])
- loader.add_xpath('url', '//img/@src')
- self.assertEqual(loader.get_output_value('url'), [u'http://www.scrapy.org', u'/images/logo.png'])
+ loader.add_xpath("url", "//img/@src")
+ self.assertEqual(
+ loader.get_output_value("url"),
+ ["http://www.scrapy.org", "/images/logo.png"],
+ )
def test_add_xpath_re(self):
loader = CustomItemLoader(selector=self.selector)
- loader.add_xpath('name', '//div/text()', re='ma')
- self.assertEqual(loader.get_output_value('name'), ['Ma'])
+ loader.add_xpath("name", "//div/text()", re="ma")
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
loader = CustomItemLoader(selector=self.selector)
- loader.add_xpath('name', '//div/text()', re=re.compile('ma'))
- self.assertEqual(loader.get_output_value('name'), ['Ma'])
-
+ loader.add_xpath("name", "//div/text()", re=re.compile("ma"))
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
def test_add_xpath_variables(self):
loader = CustomItemLoader(selector=self.selector)
- loader.add_xpath('name', 'id($id)/text()', id="id")
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
+ loader.add_xpath("name", "id($id)/text()", id="id")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
loader = CustomItemLoader(selector=self.selector)
- loader.add_xpath('name', 'id($id)/text()', id="id2")
- self.assertEqual(loader.get_output_value('name'), [])
+ loader.add_xpath("name", "id($id)/text()", id="id2")
+ self.assertEqual(loader.get_output_value("name"), [])
def test_replace_xpath(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_xpath('name', '//div/text()')
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
- loader.replace_xpath('name', '//p/text()')
- self.assertEqual(loader.get_output_value('name'), ['Paragraph'])
+ loader.add_xpath("name", "//div/text()")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_xpath("name", "//p/text()")
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
- loader.replace_xpath('name', ['//p/text()', '//div/text()'])
- self.assertEqual(loader.get_output_value('name'), ['Paragraph', 'Marta'])
+ loader.replace_xpath("name", ["//p/text()", "//div/text()"])
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"])
def test_get_xpath(self):
loader = CustomItemLoader(selector=self.selector)
- self.assertEqual(loader.get_xpath('//p/text()'), ['paragraph'])
- self.assertEqual(loader.get_xpath('//p/text()', TakeFirst()), 'paragraph')
- self.assertEqual(loader.get_xpath('//p/text()', TakeFirst(), re='pa'), 'pa')
+ self.assertEqual(loader.get_xpath("//p/text()"), ["paragraph"])
+ self.assertEqual(loader.get_xpath("//p/text()", TakeFirst()), "paragraph")
+ self.assertEqual(loader.get_xpath("//p/text()", TakeFirst(), re="pa"), "pa")
- self.assertEqual(loader.get_xpath(['//p/text()', '//div/text()']), ['paragraph', 'marta'])
+ self.assertEqual(
+ loader.get_xpath(["//p/text()", "//div/text()"]), ["paragraph", "marta"]
+ )
def test_replace_xpath_multi_fields(self):
loader = CustomItemLoader(selector=self.selector)
- loader.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
- loader.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
- self.assertEqual(loader.get_output_value('name'), ['Paragraph'])
+ loader.add_xpath(None, "//div/text()", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_xpath(None, "//p/text()", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
def test_replace_xpath_re(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_xpath('name', '//div/text()')
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
- loader.replace_xpath('name', '//div/text()', re='ma')
- self.assertEqual(loader.get_output_value('name'), ['Ma'])
+ loader.add_xpath("name", "//div/text()")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_xpath("name", "//div/text()", re="ma")
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
def test_add_css_re(self):
loader = CustomItemLoader(selector=self.selector)
- loader.add_css('name', 'div::text', re='ma')
- self.assertEqual(loader.get_output_value('name'), ['Ma'])
+ loader.add_css("name", "div::text", re="ma")
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
- loader.add_css('url', 'a::attr(href)', re='http://(.+)')
- self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org'])
+ loader.add_css("url", "a::attr(href)", re="http://(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
loader = CustomItemLoader(selector=self.selector)
- loader.add_css('name', 'div::text', re=re.compile('ma'))
- self.assertEqual(loader.get_output_value('name'), ['Ma'])
+ loader.add_css("name", "div::text", re=re.compile("ma"))
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
- loader.add_css('url', 'a::attr(href)', re=re.compile('http://(.+)'))
- self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org'])
+ loader.add_css("url", "a::attr(href)", re=re.compile("http://(.+)"))
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
def test_replace_css(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_css('name', 'div::text')
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
- loader.replace_css('name', 'p::text')
- self.assertEqual(loader.get_output_value('name'), ['Paragraph'])
+ loader.add_css("name", "div::text")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_css("name", "p::text")
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
- loader.replace_css('name', ['p::text', 'div::text'])
- self.assertEqual(loader.get_output_value('name'), ['Paragraph', 'Marta'])
+ loader.replace_css("name", ["p::text", "div::text"])
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"])
- loader.add_css('url', 'a::attr(href)', re='http://(.+)')
- self.assertEqual(loader.get_output_value('url'), ['www.scrapy.org'])
- loader.replace_css('url', 'img::attr(src)')
- self.assertEqual(loader.get_output_value('url'), ['/images/logo.png'])
+ loader.add_css("url", "a::attr(href)", re="http://(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
+ loader.replace_css("url", "img::attr(src)")
+ self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"])
def test_get_css(self):
loader = CustomItemLoader(selector=self.selector)
- self.assertEqual(loader.get_css('p::text'), [u'paragraph'])
- self.assertEqual(loader.get_css('p::text', TakeFirst()), 'paragraph')
- self.assertEqual(loader.get_css('p::text', TakeFirst(), re='pa'), u'pa')
-
- self.assertEqual(loader.get_css(['p::text', 'div::text']), ['paragraph', 'marta'])
- self.assertEqual(loader.get_css(['a::attr(href)', 'img::attr(src)']),
- [u'http://www.scrapy.org', '/images/logo.png'])
+ self.assertEqual(loader.get_css("p::text"), ["paragraph"])
+ self.assertEqual(loader.get_css("p::text", TakeFirst()), "paragraph")
+ self.assertEqual(loader.get_css("p::text", TakeFirst(), re="pa"), "pa")
+
+ self.assertEqual(
+ loader.get_css(["p::text", "div::text"]), ["paragraph", "marta"]
+ )
+ self.assertEqual(
+ loader.get_css(["a::attr(href)", "img::attr(src)"]),
+ ["http://www.scrapy.org", "/images/logo.png"],
+ )
def test_replace_css_multi_fields(self):
loader = CustomItemLoader(selector=self.selector)
- loader.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
- self.assertEqual(loader.get_output_value('name'), ['Marta'])
- loader.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
- self.assertEqual(loader.get_output_value('name'), ['Paragraph'])
+ loader.add_css(None, "div::text", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_css(None, "p::text", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
- loader.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
- self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org'])
- loader.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
- self.assertEqual(loader.get_output_value('url'), ['/images/logo.png'])
+ loader.add_css(None, "a::attr(href)", TakeFirst(), lambda x: {"url": x})
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
+ loader.replace_css(None, "img::attr(src)", TakeFirst(), lambda x: {"url": x})
+ self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"])
def test_replace_css_re(self):
loader = CustomItemLoader(selector=self.selector)
self.assertTrue(loader.selector)
- loader.add_css('url', 'a::attr(href)')
- self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org'])
- loader.replace_css('url', 'a::attr(href)', re=r'http://www\.(.+)')
- self.assertEqual(loader.get_output_value('url'), ['scrapy.org'])
+ loader.add_css("url", "a::attr(href)")
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
+ loader.replace_css("url", "a::attr(href)", re=r"http://www\.(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["scrapy.org"])
+
+ def test_jmes_not_installed(self):
+ selector = MagicMock(spec=Selector)
+ del selector.jmespath
+ loader = CustomItemLoader(selector=selector)
+ with self.assertRaises(AttributeError) as err:
+ loader.add_jmes("name", "name", re="ma")
+
+ self.assertEqual(
+ str(err.exception), "Please install parsel >= 1.8.1 to get jmespath support"
+ )
+
+ def test_add_jmes_re(self):
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ loader.add_jmes("name", "name", re="ma")
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
+
+ loader.add_jmes("url", "website.url", re="http://(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
+
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ loader.add_jmes("name", "name", re=re.compile("ma"))
+ self.assertEqual(loader.get_output_value("name"), ["Ma"])
+
+ loader.add_jmes("url", "website.url", re=re.compile("http://(.+)"))
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
+
+ def test_get_jmes(self):
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ self.assertEqual(loader.get_jmes("description"), ["paragraph"])
+ self.assertEqual(loader.get_jmes("description", TakeFirst()), "paragraph")
+ self.assertEqual(loader.get_jmes("description", TakeFirst(), re="pa"), "pa")
+
+ self.assertEqual(
+ loader.get_jmes(["description", "name"]), ["paragraph", "marta"]
+ )
+ self.assertEqual(
+ loader.get_jmes(["website.url", "logo"]),
+ ["http://www.scrapy.org", "/images/logo.png"],
+ )
+
+ def test_replace_jmes(self):
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ self.assertTrue(loader.selector)
+ loader.add_jmes("name", "name")
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_jmes("name", "description")
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
+
+ loader.replace_jmes("name", ["description", "name"])
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph", "Marta"])
+
+ loader.add_jmes("url", "website.url", re="http://(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["www.scrapy.org"])
+ loader.replace_jmes("url", "logo")
+ self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"])
+
+ def test_replace_jmes_multi_fields(self):
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ loader.add_jmes(None, "name", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Marta"])
+ loader.replace_jmes(None, "description", TakeFirst(), lambda x: {"name": x})
+ self.assertEqual(loader.get_output_value("name"), ["Paragraph"])
+
+ loader.add_jmes(None, "website.url", TakeFirst(), lambda x: {"url": x})
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
+ loader.replace_jmes(None, "logo", TakeFirst(), lambda x: {"url": x})
+ self.assertEqual(loader.get_output_value("url"), ["/images/logo.png"])
+
+ def test_replace_jmes_re(self):
+ loader = CustomItemLoader(selector=self.jmes_selector)
+ self.assertTrue(loader.selector)
+ loader.add_jmes("url", "website.url")
+ self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
+ loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)")
+ self.assertEqual(loader.get_output_value("url"), ["scrapy.org"])
diff --git a/tests/test_utils_misc.py b/tests/test_utils_misc.py
index 36f7c80..6c9e9b6 100644
--- a/tests/test_utils_misc.py
+++ b/tests/test_utils_misc.py
@@ -4,20 +4,19 @@ from itemloaders.utils import arg_to_iter
class UtilsMiscTestCase(unittest.TestCase):
-
def test_arg_to_iter(self):
- assert hasattr(arg_to_iter(None), '__iter__')
- assert hasattr(arg_to_iter(100), '__iter__')
- assert hasattr(arg_to_iter('lala'), '__iter__')
- assert hasattr(arg_to_iter([1, 2, 3]), '__iter__')
- assert hasattr(arg_to_iter(l for l in 'abcd'), '__iter__')
+ assert hasattr(arg_to_iter(None), "__iter__")
+ assert hasattr(arg_to_iter(100), "__iter__")
+ assert hasattr(arg_to_iter("lala"), "__iter__")
+ assert hasattr(arg_to_iter([1, 2, 3]), "__iter__")
+ assert hasattr(arg_to_iter(letter for letter in "abcd"), "__iter__")
self.assertEqual(list(arg_to_iter(None)), [])
- self.assertEqual(list(arg_to_iter('lala')), ['lala'])
+ self.assertEqual(list(arg_to_iter("lala")), ["lala"])
self.assertEqual(list(arg_to_iter(100)), [100])
- self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
+ self.assertEqual(list(arg_to_iter(letter for letter in "abc")), ["a", "b", "c"])
self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
- self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}])
+ self.assertEqual(list(arg_to_iter({"a": 1})), [{"a": 1}])
if __name__ == "__main__":
diff --git a/tests/test_utils_python.py b/tests/test_utils_python.py
index 0547c95..cd7cf85 100644
--- a/tests/test_utils_python.py
+++ b/tests/test_utils_python.py
@@ -2,13 +2,11 @@ import functools
import operator
import platform
import unittest
-from datetime import datetime
from itemloaders.utils import get_func_args
class UtilsPythonTestCase(unittest.TestCase):
-
def test_get_func_args(self):
def f1(a, b, c):
pass
@@ -27,7 +25,6 @@ class UtilsPythonTestCase(unittest.TestCase):
pass
class Callable:
-
def __call__(self, a, b, c):
pass
@@ -37,31 +34,26 @@ class UtilsPythonTestCase(unittest.TestCase):
partial_f2 = functools.partial(f1, b=None)
partial_f3 = functools.partial(partial_f2, None)
- self.assertEqual(get_func_args(f1), ['a', 'b', 'c'])
- self.assertEqual(get_func_args(f2), ['a', 'b', 'c'])
- self.assertEqual(get_func_args(f3), ['a', 'b', 'c'])
- self.assertEqual(get_func_args(A), ['a', 'b', 'c'])
- self.assertEqual(get_func_args(a.method), ['a', 'b', 'c'])
- self.assertEqual(get_func_args(partial_f1), ['b', 'c'])
- self.assertEqual(get_func_args(partial_f2), ['a', 'c'])
- self.assertEqual(get_func_args(partial_f3), ['c'])
- self.assertEqual(get_func_args(cal), ['a', 'b', 'c'])
+ self.assertEqual(get_func_args(f1), ["a", "b", "c"])
+ self.assertEqual(get_func_args(f2), ["a", "b", "c"])
+ self.assertEqual(get_func_args(f3), ["a", "b", "c"])
+ self.assertEqual(get_func_args(A), ["a", "b", "c"])
+ self.assertEqual(get_func_args(a.method), ["a", "b", "c"])
+ self.assertEqual(get_func_args(partial_f1), ["b", "c"])
+ self.assertEqual(get_func_args(partial_f2), ["a", "c"])
+ self.assertEqual(get_func_args(partial_f3), ["c"])
+ self.assertEqual(get_func_args(cal), ["a", "b", "c"])
self.assertEqual(get_func_args(object), [])
+ self.assertEqual(get_func_args(str.split, stripself=True), ["sep", "maxsplit"])
+ self.assertEqual(get_func_args(" ".join, stripself=True), ["iterable"])
- if platform.python_implementation() == 'CPython':
- # TODO: how do we fix this to return the actual argument names?
- self.assertEqual(get_func_args(str.split), [])
- self.assertEqual(get_func_args(" ".join), [])
+ if platform.python_implementation() == "CPython":
+ # doesn't work on CPython: https://bugs.python.org/issue42785
self.assertEqual(get_func_args(operator.itemgetter(2)), [])
- elif platform.python_implementation() == 'PyPy':
- self.assertEqual(get_func_args(str.split, stripself=True), ['sep', 'maxsplit'])
- self.assertEqual(get_func_args(operator.itemgetter(2), stripself=True), ['obj'])
-
- build_date = datetime.strptime(platform.python_build()[1], '%b %d %Y')
- if build_date >= datetime(2020, 4, 7): # PyPy 3.6-v7.3.1
- self.assertEqual(get_func_args(" ".join, stripself=True), ['iterable'])
- else:
- self.assertEqual(get_func_args(" ".join, stripself=True), ['list'])
+ elif platform.python_implementation() == "PyPy":
+ self.assertEqual(
+ get_func_args(operator.itemgetter(2), stripself=True), ["obj"]
+ )
if __name__ == "__main__":
diff --git a/tox.ini b/tox.ini
index bf4e668..1feaf18 100644
--- a/tox.ini
+++ b/tox.ini
@@ -36,3 +36,12 @@ deps = {[docs]deps}
setenv = {[docs]setenv}
commands =
sphinx-build -W -b html . {envtmpdir}/html
+
+[testenv:twinecheck]
+basepython = python3
+deps =
+ twine==4.0.2
+ build==0.10.0
+commands =
+ python -m build --sdist
+ twine check dist/*